intl/hyphenation/src/nsHyphenator.cpp

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     6 #include "nsHyphenator.h"
     7 #include "nsIFile.h"
     8 #include "nsUTF8Utils.h"
     9 #include "nsUnicodeProperties.h"
    10 #include "nsUnicharUtilCIID.h"
    11 #include "nsIURI.h"
    13 #include "hyphen.h"
    15 nsHyphenator::nsHyphenator(nsIURI *aURI)
    16   : mDict(nullptr)
    17 {
    18   nsCString uriSpec;
    19   nsresult rv = aURI->GetSpec(uriSpec);
    20   if (NS_FAILED(rv)) {
    21     return;
    22   }
    23   mDict = hnj_hyphen_load(uriSpec.get());
    24 #ifdef DEBUG
    25   if (mDict) {
    26     printf("loaded hyphenation patterns from %s\n", uriSpec.get());
    27   }
    28 #endif
    29 }
    31 nsHyphenator::~nsHyphenator()
    32 {
    33   if (mDict != nullptr) {
    34     hnj_hyphen_free((HyphenDict*)mDict);
    35     mDict = nullptr;
    36   }
    37 }
    39 bool
    40 nsHyphenator::IsValid()
    41 {
    42   return (mDict != nullptr);
    43 }
    45 nsresult
    46 nsHyphenator::Hyphenate(const nsAString& aString,
    47                         FallibleTArray<bool>& aHyphens)
    48 {
    49   if (!aHyphens.SetLength(aString.Length())) {
    50     return NS_ERROR_OUT_OF_MEMORY;
    51   }
    52   memset(aHyphens.Elements(), false, aHyphens.Length());
    54   bool inWord = false;
    55   uint32_t wordStart = 0, wordLimit = 0;
    56   uint32_t chLen;
    57   for (uint32_t i = 0; i < aString.Length(); i += chLen) {
    58     uint32_t ch = aString[i];
    59     chLen = 1;
    61     if (NS_IS_HIGH_SURROGATE(ch)) {
    62       if (i + 1 < aString.Length() && NS_IS_LOW_SURROGATE(aString[i+1])) {
    63         ch = SURROGATE_TO_UCS4(ch, aString[i+1]);
    64         chLen = 2;
    65       } else {
    66         NS_WARNING("unpaired surrogate found during hyphenation");
    67       }
    68     }
    70     nsIUGenCategory::nsUGenCategory cat = mozilla::unicode::GetGenCategory(ch);
    71     if (cat == nsIUGenCategory::kLetter || cat == nsIUGenCategory::kMark) {
    72       if (!inWord) {
    73         inWord = true;
    74         wordStart = i;
    75       }
    76       wordLimit = i + chLen;
    77       if (i + chLen < aString.Length()) {
    78         continue;
    79       }
    80     }
    82     if (inWord) {
    83       const char16_t *begin = aString.BeginReading();
    84       NS_ConvertUTF16toUTF8 utf8(begin + wordStart,
    85                                  wordLimit - wordStart);
    86       nsAutoTArray<char,200> utf8hyphens;
    87       utf8hyphens.SetLength(utf8.Length() + 5);
    88       char **rep = nullptr;
    89       int *pos = nullptr;
    90       int *cut = nullptr;
    91       int err = hnj_hyphen_hyphenate2((HyphenDict*)mDict,
    92                                       utf8.BeginReading(), utf8.Length(),
    93                                       utf8hyphens.Elements(), nullptr,
    94                                       &rep, &pos, &cut);
    95       if (!err) {
    96         // Surprisingly, hnj_hyphen_hyphenate2 converts the 'hyphens' buffer
    97         // from utf8 code unit indexing (which would match the utf8 input
    98         // string directly) to Unicode character indexing.
    99         // We then need to convert this to utf16 code unit offsets for Gecko.
   100         const char *hyphPtr = utf8hyphens.Elements();
   101         const char16_t *cur = begin + wordStart;
   102         const char16_t *end = begin + wordLimit;
   103         while (cur < end) {
   104           if (*hyphPtr & 0x01) {
   105             aHyphens[cur - begin] = true;
   106           }
   107           cur++;
   108           if (cur < end && NS_IS_LOW_SURROGATE(*cur) &&
   109               NS_IS_HIGH_SURROGATE(*(cur-1)))
   110           {
   111             cur++;
   112           }
   113           hyphPtr++;
   114         }
   115       }
   116     }
   118     inWord = false;
   119   }
   121   return NS_OK;
   122 }

mercurial