parser/htmlparser/src/nsHTMLEntities.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5
michael@0 6 #include "mozilla/ArrayUtils.h"
michael@0 7
michael@0 8 #include "nsHTMLEntities.h"
michael@0 9
michael@0 10
michael@0 11
michael@0 12 #include "nsString.h"
michael@0 13 #include "nsCRT.h"
michael@0 14 #include "pldhash.h"
michael@0 15
michael@0 16 using namespace mozilla;
michael@0 17
michael@0 18 struct EntityNode {
michael@0 19 const char* mStr; // never owns buffer
michael@0 20 int32_t mUnicode;
michael@0 21 };
michael@0 22
michael@0 23 struct EntityNodeEntry : public PLDHashEntryHdr
michael@0 24 {
michael@0 25 const EntityNode* node;
michael@0 26 };
michael@0 27
michael@0 28 static bool
michael@0 29 matchNodeString(PLDHashTable*, const PLDHashEntryHdr* aHdr,
michael@0 30 const void* key)
michael@0 31 {
michael@0 32 const EntityNodeEntry* entry = static_cast<const EntityNodeEntry*>(aHdr);
michael@0 33 const char* str = static_cast<const char*>(key);
michael@0 34 return (nsCRT::strcmp(entry->node->mStr, str) == 0);
michael@0 35 }
michael@0 36
michael@0 37 static bool
michael@0 38 matchNodeUnicode(PLDHashTable*, const PLDHashEntryHdr* aHdr,
michael@0 39 const void* key)
michael@0 40 {
michael@0 41 const EntityNodeEntry* entry = static_cast<const EntityNodeEntry*>(aHdr);
michael@0 42 const int32_t ucode = NS_PTR_TO_INT32(key);
michael@0 43 return (entry->node->mUnicode == ucode);
michael@0 44 }
michael@0 45
michael@0 46 static PLDHashNumber
michael@0 47 hashUnicodeValue(PLDHashTable*, const void* key)
michael@0 48 {
michael@0 49 // key is actually the unicode value
michael@0 50 return PLDHashNumber(NS_PTR_TO_INT32(key));
michael@0 51 }
michael@0 52
michael@0 53
michael@0 54 static const PLDHashTableOps EntityToUnicodeOps = {
michael@0 55 PL_DHashAllocTable,
michael@0 56 PL_DHashFreeTable,
michael@0 57 PL_DHashStringKey,
michael@0 58 matchNodeString,
michael@0 59 PL_DHashMoveEntryStub,
michael@0 60 PL_DHashClearEntryStub,
michael@0 61 PL_DHashFinalizeStub,
michael@0 62 nullptr,
michael@0 63 };
michael@0 64
michael@0 65 static const PLDHashTableOps UnicodeToEntityOps = {
michael@0 66 PL_DHashAllocTable,
michael@0 67 PL_DHashFreeTable,
michael@0 68 hashUnicodeValue,
michael@0 69 matchNodeUnicode,
michael@0 70 PL_DHashMoveEntryStub,
michael@0 71 PL_DHashClearEntryStub,
michael@0 72 PL_DHashFinalizeStub,
michael@0 73 nullptr,
michael@0 74 };
michael@0 75
michael@0 76 static PLDHashTable gEntityToUnicode = { 0 };
michael@0 77 static PLDHashTable gUnicodeToEntity = { 0 };
michael@0 78 static nsrefcnt gTableRefCnt = 0;
michael@0 79
michael@0 80 #define HTML_ENTITY(_name, _value) { #_name, _value },
michael@0 81 static const EntityNode gEntityArray[] = {
michael@0 82 #include "nsHTMLEntityList.h"
michael@0 83 };
michael@0 84 #undef HTML_ENTITY
michael@0 85
michael@0 86 #define NS_HTML_ENTITY_COUNT ((int32_t)ArrayLength(gEntityArray))
michael@0 87
michael@0 88 nsresult
michael@0 89 nsHTMLEntities::AddRefTable(void)
michael@0 90 {
michael@0 91 if (!gTableRefCnt) {
michael@0 92 if (!PL_DHashTableInit(&gEntityToUnicode, &EntityToUnicodeOps,
michael@0 93 nullptr, sizeof(EntityNodeEntry),
michael@0 94 uint32_t(NS_HTML_ENTITY_COUNT / 0.75),
michael@0 95 fallible_t())) {
michael@0 96 gEntityToUnicode.ops = nullptr;
michael@0 97 return NS_ERROR_OUT_OF_MEMORY;
michael@0 98 }
michael@0 99 if (!PL_DHashTableInit(&gUnicodeToEntity, &UnicodeToEntityOps,
michael@0 100 nullptr, sizeof(EntityNodeEntry),
michael@0 101 uint32_t(NS_HTML_ENTITY_COUNT / 0.75),
michael@0 102 fallible_t())) {
michael@0 103 PL_DHashTableFinish(&gEntityToUnicode);
michael@0 104 gEntityToUnicode.ops = gUnicodeToEntity.ops = nullptr;
michael@0 105 return NS_ERROR_OUT_OF_MEMORY;
michael@0 106 }
michael@0 107 for (const EntityNode *node = gEntityArray,
michael@0 108 *node_end = ArrayEnd(gEntityArray);
michael@0 109 node < node_end; ++node) {
michael@0 110
michael@0 111 // add to Entity->Unicode table
michael@0 112 EntityNodeEntry* entry =
michael@0 113 static_cast<EntityNodeEntry*>
michael@0 114 (PL_DHashTableOperate(&gEntityToUnicode,
michael@0 115 node->mStr,
michael@0 116 PL_DHASH_ADD));
michael@0 117 NS_ASSERTION(entry, "Error adding an entry");
michael@0 118 // Prefer earlier entries when we have duplication.
michael@0 119 if (!entry->node)
michael@0 120 entry->node = node;
michael@0 121
michael@0 122 // add to Unicode->Entity table
michael@0 123 entry = static_cast<EntityNodeEntry*>
michael@0 124 (PL_DHashTableOperate(&gUnicodeToEntity,
michael@0 125 NS_INT32_TO_PTR(node->mUnicode),
michael@0 126 PL_DHASH_ADD));
michael@0 127 NS_ASSERTION(entry, "Error adding an entry");
michael@0 128 // Prefer earlier entries when we have duplication.
michael@0 129 if (!entry->node)
michael@0 130 entry->node = node;
michael@0 131 }
michael@0 132 #ifdef DEBUG
michael@0 133 PL_DHashMarkTableImmutable(&gUnicodeToEntity);
michael@0 134 PL_DHashMarkTableImmutable(&gEntityToUnicode);
michael@0 135 #endif
michael@0 136 }
michael@0 137 ++gTableRefCnt;
michael@0 138 return NS_OK;
michael@0 139 }
michael@0 140
michael@0 141 void
michael@0 142 nsHTMLEntities::ReleaseTable(void)
michael@0 143 {
michael@0 144 if (--gTableRefCnt != 0)
michael@0 145 return;
michael@0 146
michael@0 147 if (gEntityToUnicode.ops) {
michael@0 148 PL_DHashTableFinish(&gEntityToUnicode);
michael@0 149 gEntityToUnicode.ops = nullptr;
michael@0 150 }
michael@0 151 if (gUnicodeToEntity.ops) {
michael@0 152 PL_DHashTableFinish(&gUnicodeToEntity);
michael@0 153 gUnicodeToEntity.ops = nullptr;
michael@0 154 }
michael@0 155
michael@0 156 }
michael@0 157
michael@0 158 int32_t
michael@0 159 nsHTMLEntities::EntityToUnicode(const nsCString& aEntity)
michael@0 160 {
michael@0 161 NS_ASSERTION(gEntityToUnicode.ops, "no lookup table, needs addref");
michael@0 162 if (!gEntityToUnicode.ops)
michael@0 163 return -1;
michael@0 164
michael@0 165 //this little piece of code exists because entities may or may not have the terminating ';'.
michael@0 166 //if we see it, strip if off for this test...
michael@0 167
michael@0 168 if(';'==aEntity.Last()) {
michael@0 169 nsAutoCString temp(aEntity);
michael@0 170 temp.Truncate(aEntity.Length()-1);
michael@0 171 return EntityToUnicode(temp);
michael@0 172 }
michael@0 173
michael@0 174 EntityNodeEntry* entry =
michael@0 175 static_cast<EntityNodeEntry*>
michael@0 176 (PL_DHashTableOperate(&gEntityToUnicode, aEntity.get(), PL_DHASH_LOOKUP));
michael@0 177
michael@0 178 if (!entry || PL_DHASH_ENTRY_IS_FREE(entry))
michael@0 179 return -1;
michael@0 180
michael@0 181 return entry->node->mUnicode;
michael@0 182 }
michael@0 183
michael@0 184
michael@0 185 int32_t
michael@0 186 nsHTMLEntities::EntityToUnicode(const nsAString& aEntity) {
michael@0 187 nsAutoCString theEntity; theEntity.AssignWithConversion(aEntity);
michael@0 188 if(';'==theEntity.Last()) {
michael@0 189 theEntity.Truncate(theEntity.Length()-1);
michael@0 190 }
michael@0 191
michael@0 192 return EntityToUnicode(theEntity);
michael@0 193 }
michael@0 194
michael@0 195
michael@0 196 const char*
michael@0 197 nsHTMLEntities::UnicodeToEntity(int32_t aUnicode)
michael@0 198 {
michael@0 199 NS_ASSERTION(gUnicodeToEntity.ops, "no lookup table, needs addref");
michael@0 200 EntityNodeEntry* entry =
michael@0 201 static_cast<EntityNodeEntry*>
michael@0 202 (PL_DHashTableOperate(&gUnicodeToEntity, NS_INT32_TO_PTR(aUnicode), PL_DHASH_LOOKUP));
michael@0 203
michael@0 204 if (!entry || PL_DHASH_ENTRY_IS_FREE(entry))
michael@0 205 return nullptr;
michael@0 206
michael@0 207 return entry->node->mStr;
michael@0 208 }
michael@0 209
michael@0 210 #ifdef DEBUG
michael@0 211 #include <stdio.h>
michael@0 212
michael@0 213 class nsTestEntityTable {
michael@0 214 public:
michael@0 215 nsTestEntityTable() {
michael@0 216 int32_t value;
michael@0 217 nsHTMLEntities::AddRefTable();
michael@0 218
michael@0 219 // Make sure we can find everything we are supposed to
michael@0 220 for (int i = 0; i < NS_HTML_ENTITY_COUNT; ++i) {
michael@0 221 nsAutoString entity; entity.AssignWithConversion(gEntityArray[i].mStr);
michael@0 222
michael@0 223 value = nsHTMLEntities::EntityToUnicode(entity);
michael@0 224 NS_ASSERTION(value != -1, "can't find entity");
michael@0 225 NS_ASSERTION(value == gEntityArray[i].mUnicode, "bad unicode value");
michael@0 226
michael@0 227 entity.AssignWithConversion(nsHTMLEntities::UnicodeToEntity(value));
michael@0 228 NS_ASSERTION(entity.EqualsASCII(gEntityArray[i].mStr), "bad entity name");
michael@0 229 }
michael@0 230
michael@0 231 // Make sure we don't find things that aren't there
michael@0 232 value = nsHTMLEntities::EntityToUnicode(nsAutoCString("@"));
michael@0 233 NS_ASSERTION(value == -1, "found @");
michael@0 234 value = nsHTMLEntities::EntityToUnicode(nsAutoCString("zzzzz"));
michael@0 235 NS_ASSERTION(value == -1, "found zzzzz");
michael@0 236 nsHTMLEntities::ReleaseTable();
michael@0 237 }
michael@0 238 };
michael@0 239 //nsTestEntityTable validateEntityTable;
michael@0 240 #endif
michael@0 241

mercurial