intl/chardet/tools/GenCyrillicClass.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5 #include "nsICharsetConverterManager.h"
michael@0 6 #include <iostream.h>
michael@0 7 #include "nsISupports.h"
michael@0 8 #include "nsIComponentManager.h"
michael@0 9 #include "nsIServiceManager.h"
michael@0 10 #include "nsIUnicodeDecoder.h"
michael@0 11 #include "nsIUnicodeEncoder.h"
michael@0 12 #include "nsCRT.h"
michael@0 13 #include <stdio.h>
michael@0 14 #include <stdlib.h>
michael@0 15 #if defined(XP_WIN)
michael@0 16 #include <io.h>
michael@0 17 #endif
michael@0 18 #ifdef XP_UNIX
michael@0 19 #include <unistd.h>
michael@0 20 #endif
michael@0 21
michael@0 22 //---------------------------------------------------------------------------
michael@0 23 void header()
michael@0 24 {
michael@0 25 char *header=
michael@0 26 "#ifndef nsCyrillicClass_h__\n"
michael@0 27 "#define nsCyrillicClass_h__\n"
michael@0 28 "/* PLEASE DO NOT EDIT THIS FILE DIRECTLY. THIS FILE IS GENERATED BY \n"
michael@0 29 " GenCyrllicClass found in mozilla/intl/chardet/tools\n"
michael@0 30 " */\n";
michael@0 31 printf(header);
michael@0 32 }
michael@0 33 //---------------------------------------------------------------------------
michael@0 34 void footer()
michael@0 35 {
michael@0 36 printf("#endif\n");
michael@0 37 }
michael@0 38 //---------------------------------------------------------------------------
michael@0 39 void npl()
michael@0 40 {
michael@0 41 char *npl=
michael@0 42 "/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */\n"
michael@0 43 "/* This Source Code Form is subject to the terms of the Mozilla Public\n"
michael@0 44 " * License, v. 2.0. If a copy of the MPL was not distributed with this\n"
michael@0 45 " * file, You can obtain one at http://mozilla.org/MPL/2.0/. */\n";
michael@0 46 printf(npl);
michael@0 47 }
michael@0 48 //---------------------------------------------------------------------------
michael@0 49 static nsIUnicodeEncoder* gKOI8REncoder = nullptr;
michael@0 50 static nsICharsetConverterManager* gCCM = nullptr;
michael@0 51
michael@0 52 //---------------------------------------------------------------------------
michael@0 53 uint8_t CyrillicClass(nsIUnicodeDecoder* decoder, uint8_t byte)
michael@0 54 {
michael@0 55 char16_t ubuf[2];
michael@0 56 uint8_t bbuf[2];
michael@0 57
michael@0 58 int32_t blen = 1;
michael@0 59 int32_t ulen = 1;
michael@0 60 nsresult res = decoder->Convert((char*)&byte, &blen, ubuf, &ulen);
michael@0 61 if(NS_SUCCEEDED(res) && (1 == ulen ))
michael@0 62 {
michael@0 63 ubuf[0] = nsCRT::ToUpper(ubuf[0]);
michael@0 64 blen=1;
michael@0 65 res = gKOI8REncoder->Convert(ubuf,&ulen,(char*)bbuf,&blen);
michael@0 66 if(NS_SUCCEEDED(res) && (1 == blen))
michael@0 67 {
michael@0 68 if(0xe0 <= bbuf[0])
michael@0 69 {
michael@0 70 return bbuf[0] - (uint8_t)0xdf;
michael@0 71 }
michael@0 72 }
michael@0 73 }
michael@0 74 return 0;
michael@0 75 }
michael@0 76 //---------------------------------------------------------------------------
michael@0 77 void genCyrillicClass(const char* name, const char* charset)
michael@0 78 {
michael@0 79 nsIUnicodeDecoder *decoder = nullptr;
michael@0 80 nsresult res = NS_OK;
michael@0 81 nsAutoString str(charset);
michael@0 82 res = gCCM->GetUnicodeDecoder(&str, &decoder);
michael@0 83 if(NS_FAILED(res))
michael@0 84 {
michael@0 85 printf("cannot locate %s Decoder\n", charset);
michael@0 86 return;
michael@0 87 }
michael@0 88 printf("static const uint8_t %sMap [128] = {\n",name);
michael@0 89 uint8_t i,j;
michael@0 90 for(i=0x80;i!=0x00;i+=0x10)
michael@0 91 {
michael@0 92 for(j=0;j<=0x0f;j++)
michael@0 93 {
michael@0 94 uint8_t cls = CyrillicClass(decoder, i+j);
michael@0 95 printf(" %2d, ",cls);
michael@0 96 }
michael@0 97 printf("\n");
michael@0 98 }
michael@0 99 printf("};\n");
michael@0 100 NS_IF_RELEASE(decoder);
michael@0 101 }
michael@0 102 //---------------------------------------------------------------------------
michael@0 103
michael@0 104
michael@0 105 int main(int argc, char** argv) {
michael@0 106 nsresult res = nullptr;
michael@0 107
michael@0 108 nsCOMPtr<nsICharsetConverterManager> gCCM = do_GetService(kCharsetConverterManagerCID, &res);
michael@0 109
michael@0 110 if(NS_FAILED(res) && (nullptr != gCCM))
michael@0 111 {
michael@0 112 printf("cannot locate CharsetConverterManager\n");
michael@0 113 return(-1);
michael@0 114 }
michael@0 115 nsAutoString koi8r("KOI8-R");
michael@0 116 res = gCCM->GetUnicodeEncoder(&koi8r,&gKOI8REncoder);
michael@0 117 if(NS_FAILED(res) && (nullptr != gKOI8REncoder))
michael@0 118 {
michael@0 119 printf("cannot locate KOI8-R Encoder\n");
michael@0 120 return(-1);
michael@0 121 }
michael@0 122
michael@0 123
michael@0 124 npl();
michael@0 125 header();
michael@0 126
michael@0 127 genCyrillicClass("KOI8", "KOI8-R");
michael@0 128 genCyrillicClass("CP1251", "windows-1251");
michael@0 129 genCyrillicClass("IBM866", "IBM866");
michael@0 130 genCyrillicClass("ISO88595", "ISO-8859-5");
michael@0 131 genCyrillicClass("MacCyrillic", "x-mac-cyrillic");
michael@0 132 footer();
michael@0 133 NS_IF_RELEASE(gKOI8REncoder);
michael@0 134 return(0);
michael@0 135 };

mercurial