1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/parser/htmlparser/src/nsHTMLEntities.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,241 @@ 1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +#include "mozilla/ArrayUtils.h" 1.10 + 1.11 +#include "nsHTMLEntities.h" 1.12 + 1.13 + 1.14 + 1.15 +#include "nsString.h" 1.16 +#include "nsCRT.h" 1.17 +#include "pldhash.h" 1.18 + 1.19 +using namespace mozilla; 1.20 + 1.21 +struct EntityNode { 1.22 + const char* mStr; // never owns buffer 1.23 + int32_t mUnicode; 1.24 +}; 1.25 + 1.26 +struct EntityNodeEntry : public PLDHashEntryHdr 1.27 +{ 1.28 + const EntityNode* node; 1.29 +}; 1.30 + 1.31 +static bool 1.32 + matchNodeString(PLDHashTable*, const PLDHashEntryHdr* aHdr, 1.33 + const void* key) 1.34 +{ 1.35 + const EntityNodeEntry* entry = static_cast<const EntityNodeEntry*>(aHdr); 1.36 + const char* str = static_cast<const char*>(key); 1.37 + return (nsCRT::strcmp(entry->node->mStr, str) == 0); 1.38 +} 1.39 + 1.40 +static bool 1.41 + matchNodeUnicode(PLDHashTable*, const PLDHashEntryHdr* aHdr, 1.42 + const void* key) 1.43 +{ 1.44 + const EntityNodeEntry* entry = static_cast<const EntityNodeEntry*>(aHdr); 1.45 + const int32_t ucode = NS_PTR_TO_INT32(key); 1.46 + return (entry->node->mUnicode == ucode); 1.47 +} 1.48 + 1.49 +static PLDHashNumber 1.50 + hashUnicodeValue(PLDHashTable*, const void* key) 1.51 +{ 1.52 + // key is actually the unicode value 1.53 + return PLDHashNumber(NS_PTR_TO_INT32(key)); 1.54 + } 1.55 + 1.56 + 1.57 +static const PLDHashTableOps EntityToUnicodeOps = { 1.58 + PL_DHashAllocTable, 1.59 + PL_DHashFreeTable, 1.60 + PL_DHashStringKey, 1.61 + matchNodeString, 1.62 + PL_DHashMoveEntryStub, 1.63 + PL_DHashClearEntryStub, 1.64 + PL_DHashFinalizeStub, 1.65 + nullptr, 1.66 +}; 1.67 + 1.68 +static const PLDHashTableOps UnicodeToEntityOps = { 1.69 + PL_DHashAllocTable, 1.70 + PL_DHashFreeTable, 1.71 + hashUnicodeValue, 1.72 + matchNodeUnicode, 1.73 + PL_DHashMoveEntryStub, 1.74 + PL_DHashClearEntryStub, 1.75 + PL_DHashFinalizeStub, 1.76 + nullptr, 1.77 +}; 1.78 + 1.79 +static PLDHashTable gEntityToUnicode = { 0 }; 1.80 +static PLDHashTable gUnicodeToEntity = { 0 }; 1.81 +static nsrefcnt gTableRefCnt = 0; 1.82 + 1.83 +#define HTML_ENTITY(_name, _value) { #_name, _value }, 1.84 +static const EntityNode gEntityArray[] = { 1.85 +#include "nsHTMLEntityList.h" 1.86 +}; 1.87 +#undef HTML_ENTITY 1.88 + 1.89 +#define NS_HTML_ENTITY_COUNT ((int32_t)ArrayLength(gEntityArray)) 1.90 + 1.91 +nsresult 1.92 +nsHTMLEntities::AddRefTable(void) 1.93 +{ 1.94 + if (!gTableRefCnt) { 1.95 + if (!PL_DHashTableInit(&gEntityToUnicode, &EntityToUnicodeOps, 1.96 + nullptr, sizeof(EntityNodeEntry), 1.97 + uint32_t(NS_HTML_ENTITY_COUNT / 0.75), 1.98 + fallible_t())) { 1.99 + gEntityToUnicode.ops = nullptr; 1.100 + return NS_ERROR_OUT_OF_MEMORY; 1.101 + } 1.102 + if (!PL_DHashTableInit(&gUnicodeToEntity, &UnicodeToEntityOps, 1.103 + nullptr, sizeof(EntityNodeEntry), 1.104 + uint32_t(NS_HTML_ENTITY_COUNT / 0.75), 1.105 + fallible_t())) { 1.106 + PL_DHashTableFinish(&gEntityToUnicode); 1.107 + gEntityToUnicode.ops = gUnicodeToEntity.ops = nullptr; 1.108 + return NS_ERROR_OUT_OF_MEMORY; 1.109 + } 1.110 + for (const EntityNode *node = gEntityArray, 1.111 + *node_end = ArrayEnd(gEntityArray); 1.112 + node < node_end; ++node) { 1.113 + 1.114 + // add to Entity->Unicode table 1.115 + EntityNodeEntry* entry = 1.116 + static_cast<EntityNodeEntry*> 1.117 + (PL_DHashTableOperate(&gEntityToUnicode, 1.118 + node->mStr, 1.119 + PL_DHASH_ADD)); 1.120 + NS_ASSERTION(entry, "Error adding an entry"); 1.121 + // Prefer earlier entries when we have duplication. 1.122 + if (!entry->node) 1.123 + entry->node = node; 1.124 + 1.125 + // add to Unicode->Entity table 1.126 + entry = static_cast<EntityNodeEntry*> 1.127 + (PL_DHashTableOperate(&gUnicodeToEntity, 1.128 + NS_INT32_TO_PTR(node->mUnicode), 1.129 + PL_DHASH_ADD)); 1.130 + NS_ASSERTION(entry, "Error adding an entry"); 1.131 + // Prefer earlier entries when we have duplication. 1.132 + if (!entry->node) 1.133 + entry->node = node; 1.134 + } 1.135 +#ifdef DEBUG 1.136 + PL_DHashMarkTableImmutable(&gUnicodeToEntity); 1.137 + PL_DHashMarkTableImmutable(&gEntityToUnicode); 1.138 +#endif 1.139 + } 1.140 + ++gTableRefCnt; 1.141 + return NS_OK; 1.142 +} 1.143 + 1.144 +void 1.145 +nsHTMLEntities::ReleaseTable(void) 1.146 +{ 1.147 + if (--gTableRefCnt != 0) 1.148 + return; 1.149 + 1.150 + if (gEntityToUnicode.ops) { 1.151 + PL_DHashTableFinish(&gEntityToUnicode); 1.152 + gEntityToUnicode.ops = nullptr; 1.153 + } 1.154 + if (gUnicodeToEntity.ops) { 1.155 + PL_DHashTableFinish(&gUnicodeToEntity); 1.156 + gUnicodeToEntity.ops = nullptr; 1.157 + } 1.158 + 1.159 +} 1.160 + 1.161 +int32_t 1.162 +nsHTMLEntities::EntityToUnicode(const nsCString& aEntity) 1.163 +{ 1.164 + NS_ASSERTION(gEntityToUnicode.ops, "no lookup table, needs addref"); 1.165 + if (!gEntityToUnicode.ops) 1.166 + return -1; 1.167 + 1.168 + //this little piece of code exists because entities may or may not have the terminating ';'. 1.169 + //if we see it, strip if off for this test... 1.170 + 1.171 + if(';'==aEntity.Last()) { 1.172 + nsAutoCString temp(aEntity); 1.173 + temp.Truncate(aEntity.Length()-1); 1.174 + return EntityToUnicode(temp); 1.175 + } 1.176 + 1.177 + EntityNodeEntry* entry = 1.178 + static_cast<EntityNodeEntry*> 1.179 + (PL_DHashTableOperate(&gEntityToUnicode, aEntity.get(), PL_DHASH_LOOKUP)); 1.180 + 1.181 + if (!entry || PL_DHASH_ENTRY_IS_FREE(entry)) 1.182 + return -1; 1.183 + 1.184 + return entry->node->mUnicode; 1.185 +} 1.186 + 1.187 + 1.188 +int32_t 1.189 +nsHTMLEntities::EntityToUnicode(const nsAString& aEntity) { 1.190 + nsAutoCString theEntity; theEntity.AssignWithConversion(aEntity); 1.191 + if(';'==theEntity.Last()) { 1.192 + theEntity.Truncate(theEntity.Length()-1); 1.193 + } 1.194 + 1.195 + return EntityToUnicode(theEntity); 1.196 +} 1.197 + 1.198 + 1.199 +const char* 1.200 +nsHTMLEntities::UnicodeToEntity(int32_t aUnicode) 1.201 +{ 1.202 + NS_ASSERTION(gUnicodeToEntity.ops, "no lookup table, needs addref"); 1.203 + EntityNodeEntry* entry = 1.204 + static_cast<EntityNodeEntry*> 1.205 + (PL_DHashTableOperate(&gUnicodeToEntity, NS_INT32_TO_PTR(aUnicode), PL_DHASH_LOOKUP)); 1.206 + 1.207 + if (!entry || PL_DHASH_ENTRY_IS_FREE(entry)) 1.208 + return nullptr; 1.209 + 1.210 + return entry->node->mStr; 1.211 +} 1.212 + 1.213 +#ifdef DEBUG 1.214 +#include <stdio.h> 1.215 + 1.216 +class nsTestEntityTable { 1.217 +public: 1.218 + nsTestEntityTable() { 1.219 + int32_t value; 1.220 + nsHTMLEntities::AddRefTable(); 1.221 + 1.222 + // Make sure we can find everything we are supposed to 1.223 + for (int i = 0; i < NS_HTML_ENTITY_COUNT; ++i) { 1.224 + nsAutoString entity; entity.AssignWithConversion(gEntityArray[i].mStr); 1.225 + 1.226 + value = nsHTMLEntities::EntityToUnicode(entity); 1.227 + NS_ASSERTION(value != -1, "can't find entity"); 1.228 + NS_ASSERTION(value == gEntityArray[i].mUnicode, "bad unicode value"); 1.229 + 1.230 + entity.AssignWithConversion(nsHTMLEntities::UnicodeToEntity(value)); 1.231 + NS_ASSERTION(entity.EqualsASCII(gEntityArray[i].mStr), "bad entity name"); 1.232 + } 1.233 + 1.234 + // Make sure we don't find things that aren't there 1.235 + value = nsHTMLEntities::EntityToUnicode(nsAutoCString("@")); 1.236 + NS_ASSERTION(value == -1, "found @"); 1.237 + value = nsHTMLEntities::EntityToUnicode(nsAutoCString("zzzzz")); 1.238 + NS_ASSERTION(value == -1, "found zzzzz"); 1.239 + nsHTMLEntities::ReleaseTable(); 1.240 + } 1.241 +}; 1.242 +//nsTestEntityTable validateEntityTable; 1.243 +#endif 1.244 +