xpcom/tests/TestEncoding.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/xpcom/tests/TestEncoding.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,203 @@
     1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +#include "TestHarness.h"
    1.10 +
    1.11 +nsresult TestGoodSurrogatePair()
    1.12 +{
    1.13 +  // When this string is decoded, the surrogate pair is U+10302 and the rest of
    1.14 +  // the string is specified by indexes 2 onward.
    1.15 +  const char16_t goodPairData[] = {  0xD800, 0xDF02, 0x65, 0x78, 0x0 };
    1.16 +  nsDependentString goodPair16(goodPairData);
    1.17 +
    1.18 +  uint32_t byteCount = 0;
    1.19 +  char* goodPair8 = ToNewUTF8String(goodPair16, &byteCount);
    1.20 +  if (!goodPair8)
    1.21 +  {
    1.22 +    fail("out of memory creating goodPair8");
    1.23 +    return NS_ERROR_OUT_OF_MEMORY;
    1.24 +  }
    1.25 +
    1.26 +  if (byteCount != 6)
    1.27 +  {
    1.28 +    fail("wrong number of bytes; expected 6, got %lu", byteCount);
    1.29 +    return NS_ERROR_FAILURE;
    1.30 +  }
    1.31 +
    1.32 +  const unsigned char expected8[] =
    1.33 +    { 0xF0, 0x90, 0x8C, 0x82, 0x65, 0x78, 0x0 };
    1.34 +  if (0 != memcmp(expected8, goodPair8, sizeof(expected8)))
    1.35 +  {
    1.36 +    fail("wrong translation to UTF8");
    1.37 +    return NS_ERROR_FAILURE;
    1.38 +  }
    1.39 +
    1.40 +  // This takes a different code path from the above, so test it to make sure
    1.41 +  // the UTF-16 enumeration remains in sync with the UTF-8 enumeration.
    1.42 +  nsDependentCString expected((const char*)expected8);
    1.43 +  if (0 != CompareUTF8toUTF16(expected, goodPair16))
    1.44 +  {
    1.45 +    fail("bad comparison between UTF-8 and equivalent UTF-16");
    1.46 +    return NS_ERROR_FAILURE;
    1.47 +  }
    1.48 +
    1.49 +  NS_Free(goodPair8);
    1.50 +
    1.51 +  passed("TestGoodSurrogatePair");
    1.52 +  return NS_OK;
    1.53 +}
    1.54 +
    1.55 +nsresult TestBackwardsSurrogatePair()
    1.56 +{
    1.57 +  // When this string is decoded, the two surrogates are wrongly ordered and
    1.58 +  // must each be interpreted as U+FFFD.
    1.59 +  const char16_t backwardsPairData[] = { 0xDDDD, 0xD863, 0x65, 0x78, 0x0 };
    1.60 +  nsDependentString backwardsPair16(backwardsPairData);
    1.61 +
    1.62 +  uint32_t byteCount = 0;
    1.63 +  char* backwardsPair8 = ToNewUTF8String(backwardsPair16, &byteCount);
    1.64 +  if (!backwardsPair8)
    1.65 +  {
    1.66 +    fail("out of memory creating backwardsPair8");
    1.67 +    return NS_ERROR_OUT_OF_MEMORY;
    1.68 +  }
    1.69 +
    1.70 +  if (byteCount != 8)
    1.71 +  {
    1.72 +    fail("wrong number of bytes; expected 8, got %lu", byteCount);
    1.73 +    return NS_ERROR_FAILURE;
    1.74 +  }
    1.75 +
    1.76 +  const unsigned char expected8[] =
    1.77 +    { 0xEF, 0xBF, 0xBD, 0xEF, 0xBF, 0xBD, 0x65, 0x78, 0x0 };
    1.78 +  if (0 != memcmp(expected8, backwardsPair8, sizeof(expected8)))
    1.79 +  {
    1.80 +    fail("wrong translation to UTF8");
    1.81 +    return NS_ERROR_FAILURE;
    1.82 +  }
    1.83 +
    1.84 +  // This takes a different code path from the above, so test it to make sure
    1.85 +  // the UTF-16 enumeration remains in sync with the UTF-8 enumeration.
    1.86 +  nsDependentCString expected((const char*)expected8);
    1.87 +  if (0 != CompareUTF8toUTF16(expected, backwardsPair16))
    1.88 +  {
    1.89 +    fail("bad comparison between UTF-8 and malformed but equivalent UTF-16");
    1.90 +    return NS_ERROR_FAILURE;
    1.91 +  }
    1.92 +
    1.93 +  NS_Free(backwardsPair8);
    1.94 +
    1.95 +  passed("TestBackwardsSurrogatePair");
    1.96 +  return NS_OK;
    1.97 +}
    1.98 +
    1.99 +nsresult TestMalformedUTF16OrphanHighSurrogate()
   1.100 +{
   1.101 +  // When this string is decoded, the high surrogate should be replaced and the
   1.102 +  // rest of the string is specified by indexes 1 onward.
   1.103 +  const char16_t highSurrogateData[] = { 0xD863, 0x74, 0x65, 0x78, 0x74, 0x0 };
   1.104 +  nsDependentString highSurrogate16(highSurrogateData);
   1.105 +
   1.106 +  uint32_t byteCount = 0;
   1.107 +  char* highSurrogate8 = ToNewUTF8String(highSurrogate16, &byteCount);
   1.108 +  if (!highSurrogate8)
   1.109 +  {
   1.110 +    fail("out of memory creating highSurrogate8");
   1.111 +    return NS_ERROR_OUT_OF_MEMORY;
   1.112 +  }
   1.113 +
   1.114 +  if (byteCount != 7)
   1.115 +  {
   1.116 +    fail("wrong number of bytes; expected 7, got %lu", byteCount);
   1.117 +    return NS_ERROR_FAILURE;
   1.118 +  }
   1.119 +
   1.120 +  const unsigned char expected8[] =
   1.121 +    { 0xEF, 0xBF, 0xBD, 0x74, 0x65, 0x78, 0x74, 0x0 };
   1.122 +  if (0 != memcmp(expected8, highSurrogate8, sizeof(expected8)))
   1.123 +  {
   1.124 +    fail("wrong translation to UTF8");
   1.125 +    return NS_ERROR_FAILURE;
   1.126 +  }
   1.127 +
   1.128 +  // This takes a different code path from the above, so test it to make sure
   1.129 +  // the UTF-16 enumeration remains in sync with the UTF-8 enumeration.
   1.130 +  nsDependentCString expected((const char*)expected8);
   1.131 +  if (0 != CompareUTF8toUTF16(expected, highSurrogate16))
   1.132 +  {
   1.133 +    fail("bad comparison between UTF-8 and malformed but equivalent UTF-16");
   1.134 +    return NS_ERROR_FAILURE;
   1.135 +  }
   1.136 +
   1.137 +  NS_Free(highSurrogate8);
   1.138 +
   1.139 +  passed("TestMalformedUTF16OrphanHighSurrogate");
   1.140 +  return NS_OK;
   1.141 +}
   1.142 +
   1.143 +nsresult TestMalformedUTF16OrphanLowSurrogate()
   1.144 +{
   1.145 +  // When this string is decoded, the low surrogate should be replaced and the
   1.146 +  // rest of the string is specified by indexes 1 onward.
   1.147 +  const char16_t lowSurrogateData[] = { 0xDDDD, 0x74, 0x65, 0x78, 0x74, 0x0 };
   1.148 +  nsDependentString lowSurrogate16(lowSurrogateData);
   1.149 +
   1.150 +  uint32_t byteCount = 0;
   1.151 +  char* lowSurrogate8 = ToNewUTF8String(lowSurrogate16, &byteCount);
   1.152 +  if (!lowSurrogate8)
   1.153 +  {
   1.154 +    fail("out of memory creating lowSurrogate8");
   1.155 +    return NS_ERROR_OUT_OF_MEMORY;
   1.156 +  }
   1.157 +
   1.158 +  if (byteCount != 7)
   1.159 +  {
   1.160 +    fail("wrong number of bytes; expected 7, got %lu", byteCount);
   1.161 +    return NS_ERROR_FAILURE;
   1.162 +  }
   1.163 +
   1.164 +  const unsigned char expected8[] =
   1.165 +    { 0xEF, 0xBF, 0xBD, 0x74, 0x65, 0x78, 0x74, 0x0 };
   1.166 +  if (0 != memcmp(expected8, lowSurrogate8, sizeof(expected8)))
   1.167 +  {
   1.168 +    fail("wrong translation to UTF8");
   1.169 +    return NS_ERROR_FAILURE;
   1.170 +  }
   1.171 +
   1.172 +  // This takes a different code path from the above, so test it to make sure
   1.173 +  // the UTF-16 enumeration remains in sync with the UTF-8 enumeration.
   1.174 +  nsDependentCString expected((const char*)expected8);
   1.175 +  if (0 != CompareUTF8toUTF16(expected, lowSurrogate16))
   1.176 +  {
   1.177 +    fail("bad comparison between UTF-8 and malformed but equivalent UTF-16");
   1.178 +    return NS_ERROR_FAILURE;
   1.179 +  }
   1.180 +
   1.181 +  NS_Free(lowSurrogate8);
   1.182 +
   1.183 +  passed("TestMalformedUTF16OrphanLowSurrogate");
   1.184 +  return NS_OK;
   1.185 +}
   1.186 +
   1.187 +
   1.188 +int main(int argc, char** argv)
   1.189 +{
   1.190 +  ScopedXPCOM xpcom("TestEncoding");
   1.191 +  if (xpcom.failed())
   1.192 +    return 1;
   1.193 +
   1.194 +  int rv = 0;
   1.195 +
   1.196 +  if (NS_FAILED(TestGoodSurrogatePair()))
   1.197 +    rv = 1;
   1.198 +  if (NS_FAILED(TestBackwardsSurrogatePair()))
   1.199 +    rv = 1;
   1.200 +  if (NS_FAILED(TestMalformedUTF16OrphanHighSurrogate()))
   1.201 +    rv = 1;
   1.202 +  if (NS_FAILED(TestMalformedUTF16OrphanLowSurrogate()))
   1.203 +    rv = 1;
   1.204 +
   1.205 +  return rv;
   1.206 +}

mercurial