intl/chardet/tools/GenCyrillicClass.cpp

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     5 #include "nsICharsetConverterManager.h"
     6 #include <iostream.h>
     7 #include "nsISupports.h"
     8 #include "nsIComponentManager.h"
     9 #include "nsIServiceManager.h"
    10 #include "nsIUnicodeDecoder.h"
    11 #include "nsIUnicodeEncoder.h"
    12 #include "nsCRT.h"
    13 #include <stdio.h>
    14 #include <stdlib.h>
    15 #if defined(XP_WIN)
    16 #include <io.h>
    17 #endif
    18 #ifdef XP_UNIX
    19 #include <unistd.h>
    20 #endif
    22 //---------------------------------------------------------------------------
    23 void header()
    24 {
    25 char *header=
    26 "#ifndef nsCyrillicClass_h__\n"
    27 "#define nsCyrillicClass_h__\n"
    28 "/* PLEASE DO NOT EDIT THIS FILE DIRECTLY. THIS FILE IS GENERATED BY \n"
    29 "   GenCyrllicClass found in mozilla/intl/chardet/tools\n"
    30 " */\n";
    31    printf(header);
    32 }
    33 //---------------------------------------------------------------------------
    34 void footer()
    35 {
    36    printf("#endif\n");
    37 }
    38 //---------------------------------------------------------------------------
    39 void npl()
    40 {
    41 char *npl=
    42 "/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */\n"
    43 "/* This Source Code Form is subject to the terms of the Mozilla Public\n"
    44 " * License, v. 2.0. If a copy of the MPL was not distributed with this\n"
    45 " * file, You can obtain one at http://mozilla.org/MPL/2.0/. */\n";
    46    printf(npl);
    47 }
    48 //---------------------------------------------------------------------------
    49 static nsIUnicodeEncoder* gKOI8REncoder = nullptr;
    50 static nsICharsetConverterManager* gCCM = nullptr;
    52 //---------------------------------------------------------------------------
    53 uint8_t CyrillicClass(nsIUnicodeDecoder* decoder, uint8_t byte)
    54 {
    55    char16_t ubuf[2];
    56    uint8_t bbuf[2];
    58    int32_t blen = 1;
    59    int32_t ulen = 1;
    60    nsresult res = decoder->Convert((char*)&byte, &blen, ubuf, &ulen);
    61    if(NS_SUCCEEDED(res) && (1 == ulen ))
    62    {
    63      ubuf[0] = nsCRT::ToUpper(ubuf[0]);
    64      blen=1;
    65      res = gKOI8REncoder->Convert(ubuf,&ulen,(char*)bbuf,&blen);
    66      if(NS_SUCCEEDED(res) && (1 == blen))
    67      {
    68         if(0xe0 <= bbuf[0])
    69         {
    70               return bbuf[0] - (uint8_t)0xdf;
    71         }
    72      }
    73    }
    74    return 0;
    75 }
    76 //---------------------------------------------------------------------------
    77 void genCyrillicClass(const char* name, const char* charset)
    78 {
    79    nsIUnicodeDecoder *decoder = nullptr;
    80    nsresult res = NS_OK;
    81    nsAutoString str(charset);
    82    res = gCCM->GetUnicodeDecoder(&str, &decoder);
    83    if(NS_FAILED(res))
    84    {
    85       printf("cannot locate %s Decoder\n", charset);
    86       return;
    87    }
    88    printf("static const uint8_t %sMap [128] = {\n",name);
    89    uint8_t i,j;
    90    for(i=0x80;i!=0x00;i+=0x10)
    91    {
    92      for(j=0;j<=0x0f;j++)
    93      {
    94         uint8_t cls = CyrillicClass(decoder, i+j);
    95         printf(" %2d, ",cls);
    96      }
    97      printf("\n");
    98    }
    99    printf("};\n");
   100    NS_IF_RELEASE(decoder);
   101 }
   102 //---------------------------------------------------------------------------
   105 int main(int argc, char** argv) {
   106   nsresult res = nullptr;
   108   nsCOMPtr<nsICharsetConverterManager> gCCM = do_GetService(kCharsetConverterManagerCID, &res);
   110   if(NS_FAILED(res) && (nullptr != gCCM))
   111    {
   112       printf("cannot locate CharsetConverterManager\n");
   113       return(-1);
   114    }
   115    nsAutoString koi8r("KOI8-R");
   116    res = gCCM->GetUnicodeEncoder(&koi8r,&gKOI8REncoder);
   117    if(NS_FAILED(res) && (nullptr != gKOI8REncoder))
   118    {
   119       printf("cannot locate KOI8-R Encoder\n");
   120       return(-1);
   121    }
   124    npl();
   125    header();
   127      genCyrillicClass("KOI8", "KOI8-R");
   128      genCyrillicClass("CP1251", "windows-1251");
   129      genCyrillicClass("IBM866", "IBM866");
   130      genCyrillicClass("ISO88595", "ISO-8859-5");
   131      genCyrillicClass("MacCyrillic", "x-mac-cyrillic");
   132    footer();
   133    NS_IF_RELEASE(gKOI8REncoder);
   134    return(0);
   135 };

mercurial