generate-singlebyte.js
3.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
var fs = require("fs");
var Iconv = require("iconv").Iconv;
var encodingFamilies = [
{
// Windows code pages
encodings: [1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258],
convert: function(cp) {
return {
name: "windows-"+cp,
aliases: ["win"+cp, "cp"+cp, ""+cp],
}
}
},
{
// ISO-8859 code pages
encodings: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16],
convert: function(i) {
return {
name: "iso-8859-"+i,
aliases: ["cp"+(28590+i), (28590+i)],
}
}
},
{
// IBM/DOS code pages
encodings: [437, 737, 775, 850, 852, 855, 857, 858, 860, 861, 862, 863, 864, 865, 866, 869],
convert: function(cp) {
return {
name: "CP"+cp,
aliases: ["ibm"+cp, ""+cp],
}
}
},
{
// Macintosh code pages
encodings: ["macCroatian", "macCyrillic", "macGreek",
"macIceland", "macRoman", "macRomania",
"macThai", "macTurkish", "macUkraine"],
},
{
// KOI8 code pages
encodings: ["KOI8-R", "KOI8-U"],
},
];
var encodings = {
// Aliases.
"ascii8bit": "ascii",
"usascii": "ascii",
"latin1": "iso88591",
"latin2": "iso88592",
"latin3": "iso88593",
"latin4": "iso88594",
"latin6": "iso885910",
"latin7": "iso885913",
"latin8": "iso885914",
"latin9": "iso885915",
"latin10": "iso885916",
"cp819": "iso88951",
"arabic": "iso88596",
"arabic8": "iso88596",
"greek" : "iso88597",
"greek8" : "iso88597",
"hebrew": "iso88598",
"hebrew8": "iso88598",
"turkish": "iso88599",
"turkish8": "iso88599",
"thai": "iso885911",
"thai8": "iso885911",
"tis620": "iso885911",
"windows874": "iso885911",
"win874": "iso885911",
"cp874": "iso885911",
"874": "iso885911",
"celtic": "iso885914",
"celtic8": "iso885914",
"cp20866": "koi8r",
"20866": "koi8r",
"ibm878": "koi8r",
"cp21866": "koi8u",
"21866": "koi8u",
"ibm1168": "koi8u",
};
// Add all encodings from encodingFamilies.
encodingFamilies.forEach(function(family){
family.encodings.forEach(function(encoding){
if (family.convert)
encoding = family.convert(encoding);
var encodingIconvName = encoding.name ? encoding.name : encoding;
var encodingName = encodingIconvName.replace(/[-_]/g, "").toLowerCase();
encodings[encodingName] = {
type: "singlebyte",
chars: generateCharsString(encodingIconvName)
};
if (encoding.aliases)
encoding.aliases.forEach(function(alias){
encodings[alias] = encodingName;
});
});
});
// Write encodings.
fs.writeFileSync("encodings/singlebyte.js",
"module.exports = " + JSON.stringify(encodings, undefined, " ") + ";");
function generateCharsString(encoding) {
console.log("Generate encoding for " + encoding);
var iconvToUtf8 = new Iconv(encoding, "UTF-8");
var chars = "";
for (var b = 0x80; b < 0x100; b++) {
try {
var convertedChar = iconvToUtf8.convert(new Buffer([b])).toString();
if (convertedChar.length != 1)
throw new Error("Single-byte encoding error: Must return single char.");
} catch (exception) {
if (exception.code === "EILSEQ") {
convertedChar = "\ufffd";
} else {
throw exception;
}
}
chars += convertedChar;
}
return chars;
}