intl/chardet/tools/GenCyrillicClass.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/chardet/tools/GenCyrillicClass.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,135 @@
     1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +#include "nsICharsetConverterManager.h"
     1.9 +#include <iostream.h>
    1.10 +#include "nsISupports.h"
    1.11 +#include "nsIComponentManager.h"
    1.12 +#include "nsIServiceManager.h"
    1.13 +#include "nsIUnicodeDecoder.h"
    1.14 +#include "nsIUnicodeEncoder.h"
    1.15 +#include "nsCRT.h"
    1.16 +#include <stdio.h>
    1.17 +#include <stdlib.h>
    1.18 +#if defined(XP_WIN)
    1.19 +#include <io.h>
    1.20 +#endif
    1.21 +#ifdef XP_UNIX
    1.22 +#include <unistd.h>
    1.23 +#endif
    1.24 +
    1.25 +//---------------------------------------------------------------------------
    1.26 +void header()
    1.27 +{
    1.28 +char *header=
    1.29 +"#ifndef nsCyrillicClass_h__\n"
    1.30 +"#define nsCyrillicClass_h__\n"
    1.31 +"/* PLEASE DO NOT EDIT THIS FILE DIRECTLY. THIS FILE IS GENERATED BY \n"
    1.32 +"   GenCyrllicClass found in mozilla/intl/chardet/tools\n"
    1.33 +" */\n";
    1.34 +   printf(header);
    1.35 +}
    1.36 +//---------------------------------------------------------------------------
    1.37 +void footer()
    1.38 +{
    1.39 +   printf("#endif\n");
    1.40 +}
    1.41 +//---------------------------------------------------------------------------
    1.42 +void npl()
    1.43 +{
    1.44 +char *npl=
    1.45 +"/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */\n"
    1.46 +"/* This Source Code Form is subject to the terms of the Mozilla Public\n"
    1.47 +" * License, v. 2.0. If a copy of the MPL was not distributed with this\n"
    1.48 +" * file, You can obtain one at http://mozilla.org/MPL/2.0/. */\n";
    1.49 +   printf(npl);
    1.50 +}
    1.51 +//---------------------------------------------------------------------------
    1.52 +static nsIUnicodeEncoder* gKOI8REncoder = nullptr;
    1.53 +static nsICharsetConverterManager* gCCM = nullptr;
    1.54 +
    1.55 +//---------------------------------------------------------------------------
    1.56 +uint8_t CyrillicClass(nsIUnicodeDecoder* decoder, uint8_t byte)
    1.57 +{
    1.58 +   char16_t ubuf[2];
    1.59 +   uint8_t bbuf[2];
    1.60 +
    1.61 +   int32_t blen = 1;
    1.62 +   int32_t ulen = 1;
    1.63 +   nsresult res = decoder->Convert((char*)&byte, &blen, ubuf, &ulen);
    1.64 +   if(NS_SUCCEEDED(res) && (1 == ulen ))
    1.65 +   {
    1.66 +     ubuf[0] = nsCRT::ToUpper(ubuf[0]);
    1.67 +     blen=1;
    1.68 +     res = gKOI8REncoder->Convert(ubuf,&ulen,(char*)bbuf,&blen);
    1.69 +     if(NS_SUCCEEDED(res) && (1 == blen))
    1.70 +     {
    1.71 +        if(0xe0 <= bbuf[0])
    1.72 +        {
    1.73 +              return bbuf[0] - (uint8_t)0xdf;
    1.74 +        }
    1.75 +     }
    1.76 +   }
    1.77 +   return 0;
    1.78 +}
    1.79 +//---------------------------------------------------------------------------
    1.80 +void genCyrillicClass(const char* name, const char* charset)
    1.81 +{
    1.82 +   nsIUnicodeDecoder *decoder = nullptr;
    1.83 +   nsresult res = NS_OK;
    1.84 +   nsAutoString str(charset);
    1.85 +   res = gCCM->GetUnicodeDecoder(&str, &decoder);
    1.86 +   if(NS_FAILED(res))
    1.87 +   {
    1.88 +      printf("cannot locate %s Decoder\n", charset);
    1.89 +      return;
    1.90 +   }
    1.91 +   printf("static const uint8_t %sMap [128] = {\n",name);
    1.92 +   uint8_t i,j;
    1.93 +   for(i=0x80;i!=0x00;i+=0x10)
    1.94 +   {
    1.95 +     for(j=0;j<=0x0f;j++)
    1.96 +     {
    1.97 +        uint8_t cls = CyrillicClass(decoder, i+j);
    1.98 +        printf(" %2d, ",cls);
    1.99 +     }
   1.100 +     printf("\n");
   1.101 +   }
   1.102 +   printf("};\n");
   1.103 +   NS_IF_RELEASE(decoder);
   1.104 +}
   1.105 +//---------------------------------------------------------------------------
   1.106 +
   1.107 +
   1.108 +int main(int argc, char** argv) {
   1.109 +  nsresult res = nullptr;
   1.110 +
   1.111 +  nsCOMPtr<nsICharsetConverterManager> gCCM = do_GetService(kCharsetConverterManagerCID, &res);
   1.112 +
   1.113 +  if(NS_FAILED(res) && (nullptr != gCCM))
   1.114 +   {
   1.115 +      printf("cannot locate CharsetConverterManager\n");
   1.116 +      return(-1);
   1.117 +   }
   1.118 +   nsAutoString koi8r("KOI8-R");
   1.119 +   res = gCCM->GetUnicodeEncoder(&koi8r,&gKOI8REncoder);
   1.120 +   if(NS_FAILED(res) && (nullptr != gKOI8REncoder))
   1.121 +   {
   1.122 +      printf("cannot locate KOI8-R Encoder\n");
   1.123 +      return(-1);
   1.124 +   }
   1.125 +
   1.126 +
   1.127 +   npl();
   1.128 +   header();
   1.129 +   
   1.130 +     genCyrillicClass("KOI8", "KOI8-R");
   1.131 +     genCyrillicClass("CP1251", "windows-1251");
   1.132 +     genCyrillicClass("IBM866", "IBM866");
   1.133 +     genCyrillicClass("ISO88595", "ISO-8859-5");
   1.134 +     genCyrillicClass("MacCyrillic", "x-mac-cyrillic");
   1.135 +   footer();
   1.136 +   NS_IF_RELEASE(gKOI8REncoder);
   1.137 +   return(0);
   1.138 +};

mercurial