parser/htmlparser/src/nsHTMLEntities.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/parser/htmlparser/src/nsHTMLEntities.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,241 @@
     1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +#include "mozilla/ArrayUtils.h"
    1.10 +
    1.11 +#include "nsHTMLEntities.h"
    1.12 +
    1.13 +
    1.14 +
    1.15 +#include "nsString.h"
    1.16 +#include "nsCRT.h"
    1.17 +#include "pldhash.h"
    1.18 +
    1.19 +using namespace mozilla;
    1.20 +
    1.21 +struct EntityNode {
    1.22 +  const char* mStr; // never owns buffer
    1.23 +  int32_t       mUnicode;
    1.24 +};
    1.25 +
    1.26 +struct EntityNodeEntry : public PLDHashEntryHdr
    1.27 +{
    1.28 +  const EntityNode* node;
    1.29 +}; 
    1.30 +
    1.31 +static bool
    1.32 +  matchNodeString(PLDHashTable*, const PLDHashEntryHdr* aHdr,
    1.33 +                  const void* key)
    1.34 +{
    1.35 +  const EntityNodeEntry* entry = static_cast<const EntityNodeEntry*>(aHdr);
    1.36 +  const char* str = static_cast<const char*>(key);
    1.37 +  return (nsCRT::strcmp(entry->node->mStr, str) == 0);
    1.38 +}
    1.39 +
    1.40 +static bool
    1.41 +  matchNodeUnicode(PLDHashTable*, const PLDHashEntryHdr* aHdr,
    1.42 +                   const void* key)
    1.43 +{
    1.44 +  const EntityNodeEntry* entry = static_cast<const EntityNodeEntry*>(aHdr);
    1.45 +  const int32_t ucode = NS_PTR_TO_INT32(key);
    1.46 +  return (entry->node->mUnicode == ucode);
    1.47 +}
    1.48 +
    1.49 +static PLDHashNumber
    1.50 +  hashUnicodeValue(PLDHashTable*, const void* key)
    1.51 +{
    1.52 +  // key is actually the unicode value
    1.53 +  return PLDHashNumber(NS_PTR_TO_INT32(key));
    1.54 +  }
    1.55 +
    1.56 +
    1.57 +static const PLDHashTableOps EntityToUnicodeOps = {
    1.58 +  PL_DHashAllocTable,
    1.59 +  PL_DHashFreeTable,
    1.60 +  PL_DHashStringKey,
    1.61 +  matchNodeString,
    1.62 +  PL_DHashMoveEntryStub,
    1.63 +  PL_DHashClearEntryStub,
    1.64 +  PL_DHashFinalizeStub,
    1.65 +  nullptr,
    1.66 +}; 
    1.67 +
    1.68 +static const PLDHashTableOps UnicodeToEntityOps = {
    1.69 +  PL_DHashAllocTable,
    1.70 +  PL_DHashFreeTable,
    1.71 +  hashUnicodeValue,
    1.72 +  matchNodeUnicode,
    1.73 +  PL_DHashMoveEntryStub,
    1.74 +  PL_DHashClearEntryStub,
    1.75 +  PL_DHashFinalizeStub,
    1.76 +  nullptr,
    1.77 +};
    1.78 +
    1.79 +static PLDHashTable gEntityToUnicode = { 0 };
    1.80 +static PLDHashTable gUnicodeToEntity = { 0 };
    1.81 +static nsrefcnt gTableRefCnt = 0;
    1.82 +
    1.83 +#define HTML_ENTITY(_name, _value) { #_name, _value },
    1.84 +static const EntityNode gEntityArray[] = {
    1.85 +#include "nsHTMLEntityList.h"
    1.86 +};
    1.87 +#undef HTML_ENTITY
    1.88 +
    1.89 +#define NS_HTML_ENTITY_COUNT ((int32_t)ArrayLength(gEntityArray))
    1.90 +
    1.91 +nsresult
    1.92 +nsHTMLEntities::AddRefTable(void) 
    1.93 +{
    1.94 +  if (!gTableRefCnt) {
    1.95 +    if (!PL_DHashTableInit(&gEntityToUnicode, &EntityToUnicodeOps,
    1.96 +                           nullptr, sizeof(EntityNodeEntry),
    1.97 +                           uint32_t(NS_HTML_ENTITY_COUNT / 0.75),
    1.98 +                           fallible_t())) {
    1.99 +      gEntityToUnicode.ops = nullptr;
   1.100 +      return NS_ERROR_OUT_OF_MEMORY;
   1.101 +    }
   1.102 +    if (!PL_DHashTableInit(&gUnicodeToEntity, &UnicodeToEntityOps,
   1.103 +                           nullptr, sizeof(EntityNodeEntry),
   1.104 +                           uint32_t(NS_HTML_ENTITY_COUNT / 0.75),
   1.105 +                           fallible_t())) {
   1.106 +      PL_DHashTableFinish(&gEntityToUnicode);
   1.107 +      gEntityToUnicode.ops = gUnicodeToEntity.ops = nullptr;
   1.108 +      return NS_ERROR_OUT_OF_MEMORY;
   1.109 +    }
   1.110 +    for (const EntityNode *node = gEntityArray,
   1.111 +                 *node_end = ArrayEnd(gEntityArray);
   1.112 +         node < node_end; ++node) {
   1.113 +
   1.114 +      // add to Entity->Unicode table
   1.115 +      EntityNodeEntry* entry =
   1.116 +        static_cast<EntityNodeEntry*>
   1.117 +                   (PL_DHashTableOperate(&gEntityToUnicode,
   1.118 +                                            node->mStr,
   1.119 +                                            PL_DHASH_ADD));
   1.120 +      NS_ASSERTION(entry, "Error adding an entry");
   1.121 +      // Prefer earlier entries when we have duplication.
   1.122 +      if (!entry->node)
   1.123 +        entry->node = node;
   1.124 +
   1.125 +      // add to Unicode->Entity table
   1.126 +      entry = static_cast<EntityNodeEntry*>
   1.127 +                         (PL_DHashTableOperate(&gUnicodeToEntity,
   1.128 +                                                  NS_INT32_TO_PTR(node->mUnicode),
   1.129 +                                                  PL_DHASH_ADD));
   1.130 +      NS_ASSERTION(entry, "Error adding an entry");
   1.131 +      // Prefer earlier entries when we have duplication.
   1.132 +      if (!entry->node)
   1.133 +        entry->node = node;
   1.134 +    }
   1.135 +#ifdef DEBUG
   1.136 +    PL_DHashMarkTableImmutable(&gUnicodeToEntity);
   1.137 +    PL_DHashMarkTableImmutable(&gEntityToUnicode);
   1.138 +#endif
   1.139 +  }
   1.140 +  ++gTableRefCnt;
   1.141 +  return NS_OK;
   1.142 +}
   1.143 +
   1.144 +void
   1.145 +nsHTMLEntities::ReleaseTable(void) 
   1.146 +{
   1.147 +  if (--gTableRefCnt != 0)
   1.148 +    return;
   1.149 +
   1.150 +  if (gEntityToUnicode.ops) {
   1.151 +    PL_DHashTableFinish(&gEntityToUnicode);
   1.152 +    gEntityToUnicode.ops = nullptr;
   1.153 +  }
   1.154 +  if (gUnicodeToEntity.ops) {
   1.155 +    PL_DHashTableFinish(&gUnicodeToEntity);
   1.156 +    gUnicodeToEntity.ops = nullptr;
   1.157 +  }
   1.158 +
   1.159 +}
   1.160 +
   1.161 +int32_t 
   1.162 +nsHTMLEntities::EntityToUnicode(const nsCString& aEntity)
   1.163 +{
   1.164 +  NS_ASSERTION(gEntityToUnicode.ops, "no lookup table, needs addref");
   1.165 +  if (!gEntityToUnicode.ops)
   1.166 +    return -1;
   1.167 +
   1.168 +    //this little piece of code exists because entities may or may not have the terminating ';'.
   1.169 +    //if we see it, strip if off for this test...
   1.170 +
   1.171 +    if(';'==aEntity.Last()) {
   1.172 +      nsAutoCString temp(aEntity);
   1.173 +      temp.Truncate(aEntity.Length()-1);
   1.174 +      return EntityToUnicode(temp);
   1.175 +    }
   1.176 +      
   1.177 +  EntityNodeEntry* entry = 
   1.178 +    static_cast<EntityNodeEntry*>
   1.179 +               (PL_DHashTableOperate(&gEntityToUnicode, aEntity.get(), PL_DHASH_LOOKUP));
   1.180 +
   1.181 +  if (!entry || PL_DHASH_ENTRY_IS_FREE(entry))
   1.182 +  return -1;
   1.183 +        
   1.184 +  return entry->node->mUnicode;
   1.185 +}
   1.186 +
   1.187 +
   1.188 +int32_t 
   1.189 +nsHTMLEntities::EntityToUnicode(const nsAString& aEntity) {
   1.190 +  nsAutoCString theEntity; theEntity.AssignWithConversion(aEntity);
   1.191 +  if(';'==theEntity.Last()) {
   1.192 +    theEntity.Truncate(theEntity.Length()-1);
   1.193 +  }
   1.194 +
   1.195 +  return EntityToUnicode(theEntity);
   1.196 +}
   1.197 +
   1.198 +
   1.199 +const char*
   1.200 +nsHTMLEntities::UnicodeToEntity(int32_t aUnicode)
   1.201 +{
   1.202 +  NS_ASSERTION(gUnicodeToEntity.ops, "no lookup table, needs addref");
   1.203 +  EntityNodeEntry* entry =
   1.204 +    static_cast<EntityNodeEntry*>
   1.205 +               (PL_DHashTableOperate(&gUnicodeToEntity, NS_INT32_TO_PTR(aUnicode), PL_DHASH_LOOKUP));
   1.206 +                   
   1.207 +  if (!entry || PL_DHASH_ENTRY_IS_FREE(entry))
   1.208 +  return nullptr;
   1.209 +    
   1.210 +  return entry->node->mStr;
   1.211 +}
   1.212 +
   1.213 +#ifdef DEBUG
   1.214 +#include <stdio.h>
   1.215 +
   1.216 +class nsTestEntityTable {
   1.217 +public:
   1.218 +   nsTestEntityTable() {
   1.219 +     int32_t value;
   1.220 +     nsHTMLEntities::AddRefTable();
   1.221 +
   1.222 +     // Make sure we can find everything we are supposed to
   1.223 +     for (int i = 0; i < NS_HTML_ENTITY_COUNT; ++i) {
   1.224 +       nsAutoString entity; entity.AssignWithConversion(gEntityArray[i].mStr);
   1.225 +
   1.226 +       value = nsHTMLEntities::EntityToUnicode(entity);
   1.227 +       NS_ASSERTION(value != -1, "can't find entity");
   1.228 +       NS_ASSERTION(value == gEntityArray[i].mUnicode, "bad unicode value");
   1.229 +
   1.230 +       entity.AssignWithConversion(nsHTMLEntities::UnicodeToEntity(value));
   1.231 +       NS_ASSERTION(entity.EqualsASCII(gEntityArray[i].mStr), "bad entity name");
   1.232 +     }
   1.233 +
   1.234 +     // Make sure we don't find things that aren't there
   1.235 +     value = nsHTMLEntities::EntityToUnicode(nsAutoCString("@"));
   1.236 +     NS_ASSERTION(value == -1, "found @");
   1.237 +     value = nsHTMLEntities::EntityToUnicode(nsAutoCString("zzzzz"));
   1.238 +     NS_ASSERTION(value == -1, "found zzzzz");
   1.239 +     nsHTMLEntities::ReleaseTable();
   1.240 +   }
   1.241 +};
   1.242 +//nsTestEntityTable validateEntityTable;
   1.243 +#endif
   1.244 +

mercurial