intl/hyphenation/src/nsHyphenator.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5
michael@0 6 #include "nsHyphenator.h"
michael@0 7 #include "nsIFile.h"
michael@0 8 #include "nsUTF8Utils.h"
michael@0 9 #include "nsUnicodeProperties.h"
michael@0 10 #include "nsUnicharUtilCIID.h"
michael@0 11 #include "nsIURI.h"
michael@0 12
michael@0 13 #include "hyphen.h"
michael@0 14
michael@0 15 nsHyphenator::nsHyphenator(nsIURI *aURI)
michael@0 16 : mDict(nullptr)
michael@0 17 {
michael@0 18 nsCString uriSpec;
michael@0 19 nsresult rv = aURI->GetSpec(uriSpec);
michael@0 20 if (NS_FAILED(rv)) {
michael@0 21 return;
michael@0 22 }
michael@0 23 mDict = hnj_hyphen_load(uriSpec.get());
michael@0 24 #ifdef DEBUG
michael@0 25 if (mDict) {
michael@0 26 printf("loaded hyphenation patterns from %s\n", uriSpec.get());
michael@0 27 }
michael@0 28 #endif
michael@0 29 }
michael@0 30
michael@0 31 nsHyphenator::~nsHyphenator()
michael@0 32 {
michael@0 33 if (mDict != nullptr) {
michael@0 34 hnj_hyphen_free((HyphenDict*)mDict);
michael@0 35 mDict = nullptr;
michael@0 36 }
michael@0 37 }
michael@0 38
michael@0 39 bool
michael@0 40 nsHyphenator::IsValid()
michael@0 41 {
michael@0 42 return (mDict != nullptr);
michael@0 43 }
michael@0 44
michael@0 45 nsresult
michael@0 46 nsHyphenator::Hyphenate(const nsAString& aString,
michael@0 47 FallibleTArray<bool>& aHyphens)
michael@0 48 {
michael@0 49 if (!aHyphens.SetLength(aString.Length())) {
michael@0 50 return NS_ERROR_OUT_OF_MEMORY;
michael@0 51 }
michael@0 52 memset(aHyphens.Elements(), false, aHyphens.Length());
michael@0 53
michael@0 54 bool inWord = false;
michael@0 55 uint32_t wordStart = 0, wordLimit = 0;
michael@0 56 uint32_t chLen;
michael@0 57 for (uint32_t i = 0; i < aString.Length(); i += chLen) {
michael@0 58 uint32_t ch = aString[i];
michael@0 59 chLen = 1;
michael@0 60
michael@0 61 if (NS_IS_HIGH_SURROGATE(ch)) {
michael@0 62 if (i + 1 < aString.Length() && NS_IS_LOW_SURROGATE(aString[i+1])) {
michael@0 63 ch = SURROGATE_TO_UCS4(ch, aString[i+1]);
michael@0 64 chLen = 2;
michael@0 65 } else {
michael@0 66 NS_WARNING("unpaired surrogate found during hyphenation");
michael@0 67 }
michael@0 68 }
michael@0 69
michael@0 70 nsIUGenCategory::nsUGenCategory cat = mozilla::unicode::GetGenCategory(ch);
michael@0 71 if (cat == nsIUGenCategory::kLetter || cat == nsIUGenCategory::kMark) {
michael@0 72 if (!inWord) {
michael@0 73 inWord = true;
michael@0 74 wordStart = i;
michael@0 75 }
michael@0 76 wordLimit = i + chLen;
michael@0 77 if (i + chLen < aString.Length()) {
michael@0 78 continue;
michael@0 79 }
michael@0 80 }
michael@0 81
michael@0 82 if (inWord) {
michael@0 83 const char16_t *begin = aString.BeginReading();
michael@0 84 NS_ConvertUTF16toUTF8 utf8(begin + wordStart,
michael@0 85 wordLimit - wordStart);
michael@0 86 nsAutoTArray<char,200> utf8hyphens;
michael@0 87 utf8hyphens.SetLength(utf8.Length() + 5);
michael@0 88 char **rep = nullptr;
michael@0 89 int *pos = nullptr;
michael@0 90 int *cut = nullptr;
michael@0 91 int err = hnj_hyphen_hyphenate2((HyphenDict*)mDict,
michael@0 92 utf8.BeginReading(), utf8.Length(),
michael@0 93 utf8hyphens.Elements(), nullptr,
michael@0 94 &rep, &pos, &cut);
michael@0 95 if (!err) {
michael@0 96 // Surprisingly, hnj_hyphen_hyphenate2 converts the 'hyphens' buffer
michael@0 97 // from utf8 code unit indexing (which would match the utf8 input
michael@0 98 // string directly) to Unicode character indexing.
michael@0 99 // We then need to convert this to utf16 code unit offsets for Gecko.
michael@0 100 const char *hyphPtr = utf8hyphens.Elements();
michael@0 101 const char16_t *cur = begin + wordStart;
michael@0 102 const char16_t *end = begin + wordLimit;
michael@0 103 while (cur < end) {
michael@0 104 if (*hyphPtr & 0x01) {
michael@0 105 aHyphens[cur - begin] = true;
michael@0 106 }
michael@0 107 cur++;
michael@0 108 if (cur < end && NS_IS_LOW_SURROGATE(*cur) &&
michael@0 109 NS_IS_HIGH_SURROGATE(*(cur-1)))
michael@0 110 {
michael@0 111 cur++;
michael@0 112 }
michael@0 113 hyphPtr++;
michael@0 114 }
michael@0 115 }
michael@0 116 }
michael@0 117
michael@0 118 inWord = false;
michael@0 119 }
michael@0 120
michael@0 121 return NS_OK;
michael@0 122 }

mercurial