intl/chardet/tools/GenCyrillicClass.cpp

changeset 0
6474c204b198
equal deleted inserted replaced
-1:000000000000 0:14f09d0d63d0
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 #include "nsICharsetConverterManager.h"
6 #include <iostream.h>
7 #include "nsISupports.h"
8 #include "nsIComponentManager.h"
9 #include "nsIServiceManager.h"
10 #include "nsIUnicodeDecoder.h"
11 #include "nsIUnicodeEncoder.h"
12 #include "nsCRT.h"
13 #include <stdio.h>
14 #include <stdlib.h>
15 #if defined(XP_WIN)
16 #include <io.h>
17 #endif
18 #ifdef XP_UNIX
19 #include <unistd.h>
20 #endif
21
22 //---------------------------------------------------------------------------
23 void header()
24 {
25 char *header=
26 "#ifndef nsCyrillicClass_h__\n"
27 "#define nsCyrillicClass_h__\n"
28 "/* PLEASE DO NOT EDIT THIS FILE DIRECTLY. THIS FILE IS GENERATED BY \n"
29 " GenCyrllicClass found in mozilla/intl/chardet/tools\n"
30 " */\n";
31 printf(header);
32 }
33 //---------------------------------------------------------------------------
34 void footer()
35 {
36 printf("#endif\n");
37 }
38 //---------------------------------------------------------------------------
39 void npl()
40 {
41 char *npl=
42 "/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */\n"
43 "/* This Source Code Form is subject to the terms of the Mozilla Public\n"
44 " * License, v. 2.0. If a copy of the MPL was not distributed with this\n"
45 " * file, You can obtain one at http://mozilla.org/MPL/2.0/. */\n";
46 printf(npl);
47 }
48 //---------------------------------------------------------------------------
49 static nsIUnicodeEncoder* gKOI8REncoder = nullptr;
50 static nsICharsetConverterManager* gCCM = nullptr;
51
52 //---------------------------------------------------------------------------
53 uint8_t CyrillicClass(nsIUnicodeDecoder* decoder, uint8_t byte)
54 {
55 char16_t ubuf[2];
56 uint8_t bbuf[2];
57
58 int32_t blen = 1;
59 int32_t ulen = 1;
60 nsresult res = decoder->Convert((char*)&byte, &blen, ubuf, &ulen);
61 if(NS_SUCCEEDED(res) && (1 == ulen ))
62 {
63 ubuf[0] = nsCRT::ToUpper(ubuf[0]);
64 blen=1;
65 res = gKOI8REncoder->Convert(ubuf,&ulen,(char*)bbuf,&blen);
66 if(NS_SUCCEEDED(res) && (1 == blen))
67 {
68 if(0xe0 <= bbuf[0])
69 {
70 return bbuf[0] - (uint8_t)0xdf;
71 }
72 }
73 }
74 return 0;
75 }
76 //---------------------------------------------------------------------------
77 void genCyrillicClass(const char* name, const char* charset)
78 {
79 nsIUnicodeDecoder *decoder = nullptr;
80 nsresult res = NS_OK;
81 nsAutoString str(charset);
82 res = gCCM->GetUnicodeDecoder(&str, &decoder);
83 if(NS_FAILED(res))
84 {
85 printf("cannot locate %s Decoder\n", charset);
86 return;
87 }
88 printf("static const uint8_t %sMap [128] = {\n",name);
89 uint8_t i,j;
90 for(i=0x80;i!=0x00;i+=0x10)
91 {
92 for(j=0;j<=0x0f;j++)
93 {
94 uint8_t cls = CyrillicClass(decoder, i+j);
95 printf(" %2d, ",cls);
96 }
97 printf("\n");
98 }
99 printf("};\n");
100 NS_IF_RELEASE(decoder);
101 }
102 //---------------------------------------------------------------------------
103
104
105 int main(int argc, char** argv) {
106 nsresult res = nullptr;
107
108 nsCOMPtr<nsICharsetConverterManager> gCCM = do_GetService(kCharsetConverterManagerCID, &res);
109
110 if(NS_FAILED(res) && (nullptr != gCCM))
111 {
112 printf("cannot locate CharsetConverterManager\n");
113 return(-1);
114 }
115 nsAutoString koi8r("KOI8-R");
116 res = gCCM->GetUnicodeEncoder(&koi8r,&gKOI8REncoder);
117 if(NS_FAILED(res) && (nullptr != gKOI8REncoder))
118 {
119 printf("cannot locate KOI8-R Encoder\n");
120 return(-1);
121 }
122
123
124 npl();
125 header();
126
127 genCyrillicClass("KOI8", "KOI8-R");
128 genCyrillicClass("CP1251", "windows-1251");
129 genCyrillicClass("IBM866", "IBM866");
130 genCyrillicClass("ISO88595", "ISO-8859-5");
131 genCyrillicClass("MacCyrillic", "x-mac-cyrillic");
132 footer();
133 NS_IF_RELEASE(gKOI8REncoder);
134 return(0);
135 };

mercurial