intl/chardet/src/nsCyrillicDetector.cpp

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

michael@0 1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5 #include "nscore.h"
michael@0 6 #include "nsCyrillicProb.h"
michael@0 7 #include <stdio.h>
michael@0 8
michael@0 9 #include "nsCOMPtr.h"
michael@0 10 #include "nsISupports.h"
michael@0 11 #include "nsICharsetDetector.h"
michael@0 12 #include "nsICharsetDetectionObserver.h"
michael@0 13 #include "nsIStringCharsetDetector.h"
michael@0 14 #include "nsCyrillicDetector.h"
michael@0 15
michael@0 16 //----------------------------------------------------------------------
michael@0 17 // Interface nsISupports [implementation]
michael@0 18 NS_IMPL_ISUPPORTS(nsCyrXPCOMDetector, nsICharsetDetector)
michael@0 19 NS_IMPL_ISUPPORTS(nsCyrXPCOMStringDetector, nsIStringCharsetDetector)
michael@0 20
michael@0 21 void nsCyrillicDetector::HandleData(const char* aBuf, uint32_t aLen)
michael@0 22 {
michael@0 23 uint8_t cls;
michael@0 24 const char* b;
michael@0 25 uint32_t i;
michael@0 26 if(mDone)
michael@0 27 return;
michael@0 28 for(i=0, b=aBuf;i<aLen;i++,b++)
michael@0 29 {
michael@0 30 for(unsigned j=0;j<mItems;j++)
michael@0 31 {
michael@0 32 if( 0x80 & *b)
michael@0 33 cls = mCyrillicClass[j][(*b) & 0x7F];
michael@0 34 else
michael@0 35 cls = 0;
michael@0 36 NS_ASSERTION( cls <= 32 , "illegal character class");
michael@0 37 mProb[j] += gCyrillicProb[mLastCls[j]][cls];
michael@0 38 mLastCls[j] = cls;
michael@0 39 }
michael@0 40 }
michael@0 41 // We now only based on the first block we receive
michael@0 42 DataEnd();
michael@0 43 }
michael@0 44
michael@0 45 //---------------------------------------------------------------------
michael@0 46 #define THRESHOLD_RATIO 1.5f
michael@0 47 void nsCyrillicDetector::DataEnd()
michael@0 48 {
michael@0 49 uint32_t max=0;
michael@0 50 uint8_t maxIdx=0;
michael@0 51 uint8_t j;
michael@0 52 if(mDone)
michael@0 53 return;
michael@0 54 for(j=0;j<mItems;j++) {
michael@0 55 if(mProb[j] > max)
michael@0 56 {
michael@0 57 max = mProb[j];
michael@0 58 maxIdx= j;
michael@0 59 }
michael@0 60 }
michael@0 61
michael@0 62 if( 0 == max ) // if we didn't get any 8 bits data
michael@0 63 return;
michael@0 64
michael@0 65 #ifdef DEBUG
michael@0 66 for(j=0;j<mItems;j++)
michael@0 67 printf("Charset %s->\t%d\n", mCharsets[j], mProb[j]);
michael@0 68 #endif
michael@0 69 this->Report(mCharsets[maxIdx]);
michael@0 70 mDone = true;
michael@0 71 }
michael@0 72
michael@0 73 //---------------------------------------------------------------------
michael@0 74 nsCyrXPCOMDetector:: nsCyrXPCOMDetector(uint8_t aItems,
michael@0 75 const uint8_t ** aCyrillicClass,
michael@0 76 const char **aCharsets)
michael@0 77 : nsCyrillicDetector(aItems, aCyrillicClass, aCharsets)
michael@0 78 {
michael@0 79 mObserver = nullptr;
michael@0 80 }
michael@0 81
michael@0 82 //---------------------------------------------------------------------
michael@0 83 nsCyrXPCOMDetector::~nsCyrXPCOMDetector()
michael@0 84 {
michael@0 85 }
michael@0 86
michael@0 87 //---------------------------------------------------------------------
michael@0 88 NS_IMETHODIMP nsCyrXPCOMDetector::Init(
michael@0 89 nsICharsetDetectionObserver* aObserver)
michael@0 90 {
michael@0 91 NS_ASSERTION(mObserver == nullptr , "Init twice");
michael@0 92 if(nullptr == aObserver)
michael@0 93 return NS_ERROR_ILLEGAL_VALUE;
michael@0 94
michael@0 95 mObserver = aObserver;
michael@0 96 return NS_OK;
michael@0 97 }
michael@0 98
michael@0 99 //----------------------------------------------------------
michael@0 100 NS_IMETHODIMP nsCyrXPCOMDetector::DoIt(
michael@0 101 const char* aBuf, uint32_t aLen, bool* oDontFeedMe)
michael@0 102 {
michael@0 103 NS_ASSERTION(mObserver != nullptr , "have not init yet");
michael@0 104
michael@0 105 if((nullptr == aBuf) || (nullptr == oDontFeedMe))
michael@0 106 return NS_ERROR_ILLEGAL_VALUE;
michael@0 107
michael@0 108 this->HandleData(aBuf, aLen);
michael@0 109 *oDontFeedMe = false;
michael@0 110 return NS_OK;
michael@0 111 }
michael@0 112
michael@0 113 //----------------------------------------------------------
michael@0 114 NS_IMETHODIMP nsCyrXPCOMDetector::Done()
michael@0 115 {
michael@0 116 NS_ASSERTION(mObserver != nullptr , "have not init yet");
michael@0 117 this->DataEnd();
michael@0 118 return NS_OK;
michael@0 119 }
michael@0 120
michael@0 121 //----------------------------------------------------------
michael@0 122 void nsCyrXPCOMDetector::Report(const char* aCharset)
michael@0 123 {
michael@0 124 NS_ASSERTION(mObserver != nullptr , "have not init yet");
michael@0 125 mObserver->Notify(aCharset, eBestAnswer);
michael@0 126 }
michael@0 127
michael@0 128 //---------------------------------------------------------------------
michael@0 129 nsCyrXPCOMStringDetector:: nsCyrXPCOMStringDetector(uint8_t aItems,
michael@0 130 const uint8_t ** aCyrillicClass,
michael@0 131 const char **aCharsets)
michael@0 132 : nsCyrillicDetector(aItems, aCyrillicClass, aCharsets)
michael@0 133 {
michael@0 134 }
michael@0 135
michael@0 136 //---------------------------------------------------------------------
michael@0 137 nsCyrXPCOMStringDetector::~nsCyrXPCOMStringDetector()
michael@0 138 {
michael@0 139 }
michael@0 140
michael@0 141 //---------------------------------------------------------------------
michael@0 142 void nsCyrXPCOMStringDetector::Report(const char *aCharset)
michael@0 143 {
michael@0 144 mResult = aCharset;
michael@0 145 }
michael@0 146
michael@0 147 //---------------------------------------------------------------------
michael@0 148 NS_IMETHODIMP nsCyrXPCOMStringDetector::DoIt(const char* aBuf, uint32_t aLen,
michael@0 149 const char** oCharset, nsDetectionConfident &oConf)
michael@0 150 {
michael@0 151 mResult = nullptr;
michael@0 152 mDone = false;
michael@0 153 this->HandleData(aBuf, aLen);
michael@0 154 this->DataEnd();
michael@0 155 *oCharset=mResult;
michael@0 156 oConf = eBestAnswer;
michael@0 157 return NS_OK;
michael@0 158 }
michael@0 159
michael@0 160

mercurial