michael@0: /* michael@0: ******************************************************************************* michael@0: * michael@0: * Copyright (C) 2003-2013, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: * michael@0: ******************************************************************************* michael@0: * file name: gencnvex.c michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created on: 2003oct12 michael@0: * created by: Markus W. Scherer michael@0: */ michael@0: michael@0: #include michael@0: #include "unicode/utypes.h" michael@0: #include "unicode/ustring.h" michael@0: #include "cstring.h" michael@0: #include "cmemory.h" michael@0: #include "ucnv_cnv.h" michael@0: #include "ucnvmbcs.h" michael@0: #include "toolutil.h" michael@0: #include "unewdata.h" michael@0: #include "ucm.h" michael@0: #include "makeconv.h" michael@0: #include "genmbcs.h" michael@0: michael@0: #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) michael@0: michael@0: michael@0: static void michael@0: CnvExtClose(NewConverter *cnvData); michael@0: michael@0: static UBool michael@0: CnvExtIsValid(NewConverter *cnvData, michael@0: const uint8_t *bytes, int32_t length); michael@0: michael@0: static UBool michael@0: CnvExtAddTable(NewConverter *cnvData, UCMTable *table, UConverterStaticData *staticData); michael@0: michael@0: static uint32_t michael@0: CnvExtWrite(NewConverter *cnvData, const UConverterStaticData *staticData, michael@0: UNewDataMemory *pData, int32_t tableType); michael@0: michael@0: typedef struct CnvExtData { michael@0: NewConverter newConverter; michael@0: michael@0: UCMFile *ucm; michael@0: michael@0: /* toUnicode (state table in ucm->states) */ michael@0: UToolMemory *toUTable, *toUUChars; michael@0: michael@0: /* fromUnicode */ michael@0: UToolMemory *fromUTableUChars, *fromUTableValues, *fromUBytes; michael@0: michael@0: uint16_t stage1[MBCS_STAGE_1_SIZE]; michael@0: uint16_t stage2[MBCS_STAGE_2_SIZE]; michael@0: uint16_t stage3[0x10000< |2 mappings */ michael@0: uint16_t stage3Sub1Block; michael@0: michael@0: /* statistics */ michael@0: int32_t michael@0: maxInBytes, maxOutBytes, maxBytesPerUChar, michael@0: maxInUChars, maxOutUChars, maxUCharsPerByte; michael@0: } CnvExtData; michael@0: michael@0: NewConverter * michael@0: CnvExtOpen(UCMFile *ucm) { michael@0: CnvExtData *extData; michael@0: michael@0: extData=(CnvExtData *)uprv_malloc(sizeof(CnvExtData)); michael@0: if(extData==NULL) { michael@0: printf("out of memory\n"); michael@0: exit(U_MEMORY_ALLOCATION_ERROR); michael@0: } michael@0: uprv_memset(extData, 0, sizeof(CnvExtData)); michael@0: michael@0: extData->ucm=ucm; /* aliased, not owned */ michael@0: michael@0: extData->newConverter.close=CnvExtClose; michael@0: extData->newConverter.isValid=CnvExtIsValid; michael@0: extData->newConverter.addTable=CnvExtAddTable; michael@0: extData->newConverter.write=CnvExtWrite; michael@0: return &extData->newConverter; michael@0: } michael@0: michael@0: static void michael@0: CnvExtClose(NewConverter *cnvData) { michael@0: CnvExtData *extData=(CnvExtData *)cnvData; michael@0: if(extData!=NULL) { michael@0: utm_close(extData->toUTable); michael@0: utm_close(extData->toUUChars); michael@0: utm_close(extData->fromUTableUChars); michael@0: utm_close(extData->fromUTableValues); michael@0: utm_close(extData->fromUBytes); michael@0: uprv_free(extData); michael@0: } michael@0: } michael@0: michael@0: /* we do not expect this to be called */ michael@0: static UBool michael@0: CnvExtIsValid(NewConverter *cnvData, michael@0: const uint8_t *bytes, int32_t length) { michael@0: return FALSE; michael@0: } michael@0: michael@0: static uint32_t michael@0: CnvExtWrite(NewConverter *cnvData, const UConverterStaticData *staticData, michael@0: UNewDataMemory *pData, int32_t tableType) { michael@0: CnvExtData *extData=(CnvExtData *)cnvData; michael@0: int32_t length, top, headerSize; michael@0: michael@0: int32_t indexes[UCNV_EXT_INDEXES_MIN_LENGTH]={ 0 }; michael@0: michael@0: if(tableType&TABLE_BASE) { michael@0: headerSize=0; michael@0: } else { michael@0: _MBCSHeader header={ { 0, 0, 0, 0 }, 0, 0, 0, 0, 0, 0, 0 }; michael@0: michael@0: /* write the header and base table name for an extension-only table */ michael@0: length=(int32_t)uprv_strlen(extData->ucm->baseName)+1; michael@0: while(length&3) { michael@0: /* add padding */ michael@0: extData->ucm->baseName[length++]=0; michael@0: } michael@0: michael@0: headerSize=MBCS_HEADER_V4_LENGTH*4+length; michael@0: michael@0: /* fill the header */ michael@0: header.version[0]=4; michael@0: header.version[1]=2; michael@0: header.flags=(uint32_t)((headerSize<<8)|MBCS_OUTPUT_EXT_ONLY); michael@0: michael@0: /* write the header and the base table name */ michael@0: udata_writeBlock(pData, &header, MBCS_HEADER_V4_LENGTH*4); michael@0: udata_writeBlock(pData, extData->ucm->baseName, length); michael@0: } michael@0: michael@0: /* fill indexes[] - offsets/indexes are in units of the target array */ michael@0: top=0; michael@0: michael@0: indexes[UCNV_EXT_INDEXES_LENGTH]=length=UCNV_EXT_INDEXES_MIN_LENGTH; michael@0: top+=length*4; michael@0: michael@0: indexes[UCNV_EXT_TO_U_INDEX]=top; michael@0: indexes[UCNV_EXT_TO_U_LENGTH]=length=utm_countItems(extData->toUTable); michael@0: top+=length*4; michael@0: michael@0: indexes[UCNV_EXT_TO_U_UCHARS_INDEX]=top; michael@0: indexes[UCNV_EXT_TO_U_UCHARS_LENGTH]=length=utm_countItems(extData->toUUChars); michael@0: top+=length*2; michael@0: michael@0: indexes[UCNV_EXT_FROM_U_UCHARS_INDEX]=top; michael@0: length=utm_countItems(extData->fromUTableUChars); michael@0: top+=length*2; michael@0: michael@0: if(top&3) { michael@0: /* add padding */ michael@0: *((UChar *)utm_alloc(extData->fromUTableUChars))=0; michael@0: *((uint32_t *)utm_alloc(extData->fromUTableValues))=0; michael@0: ++length; michael@0: top+=2; michael@0: } michael@0: indexes[UCNV_EXT_FROM_U_LENGTH]=length; michael@0: michael@0: indexes[UCNV_EXT_FROM_U_VALUES_INDEX]=top; michael@0: top+=length*4; michael@0: michael@0: indexes[UCNV_EXT_FROM_U_BYTES_INDEX]=top; michael@0: length=utm_countItems(extData->fromUBytes); michael@0: top+=length; michael@0: michael@0: if(top&1) { michael@0: /* add padding */ michael@0: *((uint8_t *)utm_alloc(extData->fromUBytes))=0; michael@0: ++length; michael@0: ++top; michael@0: } michael@0: indexes[UCNV_EXT_FROM_U_BYTES_LENGTH]=length; michael@0: michael@0: indexes[UCNV_EXT_FROM_U_STAGE_12_INDEX]=top; michael@0: indexes[UCNV_EXT_FROM_U_STAGE_1_LENGTH]=length=extData->stage1Top; michael@0: indexes[UCNV_EXT_FROM_U_STAGE_12_LENGTH]=length+=extData->stage2Top; michael@0: top+=length*2; michael@0: michael@0: indexes[UCNV_EXT_FROM_U_STAGE_3_INDEX]=top; michael@0: length=extData->stage3Top; michael@0: top+=length*2; michael@0: michael@0: if(top&3) { michael@0: /* add padding */ michael@0: extData->stage3[extData->stage3Top++]=0; michael@0: ++length; michael@0: top+=2; michael@0: } michael@0: indexes[UCNV_EXT_FROM_U_STAGE_3_LENGTH]=length; michael@0: michael@0: indexes[UCNV_EXT_FROM_U_STAGE_3B_INDEX]=top; michael@0: indexes[UCNV_EXT_FROM_U_STAGE_3B_LENGTH]=length=extData->stage3bTop; michael@0: top+=length*4; michael@0: michael@0: indexes[UCNV_EXT_SIZE]=top; michael@0: michael@0: /* statistics */ michael@0: indexes[UCNV_EXT_COUNT_BYTES]= michael@0: (extData->maxInBytes<<16)| michael@0: (extData->maxOutBytes<<8)| michael@0: extData->maxBytesPerUChar; michael@0: indexes[UCNV_EXT_COUNT_UCHARS]= michael@0: (extData->maxInUChars<<16)| michael@0: (extData->maxOutUChars<<8)| michael@0: extData->maxUCharsPerByte; michael@0: michael@0: indexes[UCNV_EXT_FLAGS]=extData->ucm->ext->unicodeMask; michael@0: michael@0: /* write the extension data */ michael@0: udata_writeBlock(pData, indexes, sizeof(indexes)); michael@0: udata_writeBlock(pData, utm_getStart(extData->toUTable), indexes[UCNV_EXT_TO_U_LENGTH]*4); michael@0: udata_writeBlock(pData, utm_getStart(extData->toUUChars), indexes[UCNV_EXT_TO_U_UCHARS_LENGTH]*2); michael@0: michael@0: udata_writeBlock(pData, utm_getStart(extData->fromUTableUChars), indexes[UCNV_EXT_FROM_U_LENGTH]*2); michael@0: udata_writeBlock(pData, utm_getStart(extData->fromUTableValues), indexes[UCNV_EXT_FROM_U_LENGTH]*4); michael@0: udata_writeBlock(pData, utm_getStart(extData->fromUBytes), indexes[UCNV_EXT_FROM_U_BYTES_LENGTH]); michael@0: michael@0: udata_writeBlock(pData, extData->stage1, extData->stage1Top*2); michael@0: udata_writeBlock(pData, extData->stage2, extData->stage2Top*2); michael@0: udata_writeBlock(pData, extData->stage3, extData->stage3Top*2); michael@0: udata_writeBlock(pData, extData->stage3b, extData->stage3bTop*4); michael@0: michael@0: #if 0 michael@0: { michael@0: int32_t i, j; michael@0: michael@0: length=extData->stage1Top; michael@0: printf("\nstage1[%x]:\n", length); michael@0: michael@0: for(i=0; istage1[i]!=length) { michael@0: printf("stage1[%04x]=%04x\n", i, extData->stage1[i]); michael@0: } michael@0: } michael@0: michael@0: j=length; michael@0: length=extData->stage2Top; michael@0: printf("\nstage2[%x]:\n", length); michael@0: michael@0: for(i=0; istage2[i]!=0) { michael@0: printf("stage12[%04x]=%04x\n", j, extData->stage2[i]); michael@0: } michael@0: } michael@0: michael@0: length=extData->stage3Top; michael@0: printf("\nstage3[%x]:\n", length); michael@0: michael@0: for(i=0; istage3[i]!=0) { michael@0: printf("stage3[%04x]=%04x\n", i, extData->stage3[i]); michael@0: } michael@0: } michael@0: michael@0: length=extData->stage3bTop; michael@0: printf("\nstage3b[%x]:\n", length); michael@0: michael@0: for(i=0; istage3b[i]!=0) { michael@0: printf("stage3b[%04x]=%08x\n", i, extData->stage3b[i]); michael@0: } michael@0: } michael@0: } michael@0: #endif michael@0: michael@0: if(VERBOSE) { michael@0: printf("size of extension data: %ld\n", (long)top); michael@0: } michael@0: michael@0: /* return the number of bytes that should have been written */ michael@0: return (uint32_t)(headerSize+top); michael@0: } michael@0: michael@0: /* to Unicode --------------------------------------------------------------- */ michael@0: michael@0: /* michael@0: * Remove fromUnicode fallbacks and SUB mappings which are irrelevant for michael@0: * the toUnicode table. michael@0: * This includes mappings with MBCS_FROM_U_EXT_FLAG which were suitable michael@0: * for the base toUnicode table but not for the base fromUnicode table. michael@0: * The table must be sorted. michael@0: * Modifies previous data in the reverseMap. michael@0: */ michael@0: static int32_t michael@0: reduceToUMappings(UCMTable *table) { michael@0: UCMapping *mappings; michael@0: int32_t *map; michael@0: int32_t i, j, count; michael@0: int8_t flag; michael@0: michael@0: mappings=table->mappings; michael@0: map=table->reverseMap; michael@0: count=table->mappingsLength; michael@0: michael@0: /* leave the map alone for the initial mappings with desired flags */ michael@0: for(i=j=0; iuLen==1) { michael@0: u16Length=U16_LENGTH(m->u); michael@0: value=(uint32_t)(UCNV_EXT_TO_U_MIN_CODE_POINT+m->u); michael@0: } else { michael@0: /* the parser enforces m->uLen<=UCNV_EXT_MAX_UCHARS */ michael@0: michael@0: /* get the result code point string and its 16-bit string length */ michael@0: u32=UCM_GET_CODE_POINTS(table, m); michael@0: errorCode=U_ZERO_ERROR; michael@0: u_strFromUTF32(NULL, 0, &u16Length, u32, m->uLen, &errorCode); michael@0: if(U_FAILURE(errorCode) && errorCode!=U_BUFFER_OVERFLOW_ERROR) { michael@0: exit(errorCode); michael@0: } michael@0: michael@0: /* allocate it and put its length and index into the value */ michael@0: value= michael@0: (((uint32_t)u16Length+UCNV_EXT_TO_U_LENGTH_OFFSET)<toUUChars)); michael@0: u=utm_allocN(extData->toUUChars, u16Length); michael@0: michael@0: /* write the result 16-bit string */ michael@0: errorCode=U_ZERO_ERROR; michael@0: u_strFromUTF32(u, u16Length, NULL, u32, m->uLen, &errorCode); michael@0: if(U_FAILURE(errorCode) && errorCode!=U_BUFFER_OVERFLOW_ERROR) { michael@0: exit(errorCode); michael@0: } michael@0: } michael@0: if(m->f==0) { michael@0: value|=UCNV_EXT_TO_U_ROUNDTRIP_FLAG; michael@0: } michael@0: michael@0: /* update statistics */ michael@0: if(m->bLen>extData->maxInBytes) { michael@0: extData->maxInBytes=m->bLen; michael@0: } michael@0: if(u16Length>extData->maxOutUChars) { michael@0: extData->maxOutUChars=u16Length; michael@0: } michael@0: michael@0: ratio=(u16Length+(m->bLen-1))/m->bLen; michael@0: if(ratio>extData->maxUCharsPerByte) { michael@0: extData->maxUCharsPerByte=ratio; michael@0: } michael@0: michael@0: return value; michael@0: } michael@0: michael@0: /* michael@0: * Recursive toUTable generator core function. michael@0: * Preconditions: michael@0: * - start0: if there is one mapping with an input unit sequence of unitIndex+1 michael@0: * then defaultValue=compute the mapping result for this whole sequence michael@0: * else defaultValue=0 michael@0: * michael@0: * recurse into the subsection michael@0: */ michael@0: static UBool michael@0: generateToUTable(CnvExtData *extData, UCMTable *table, michael@0: int32_t start, int32_t limit, int32_t unitIndex, michael@0: uint32_t defaultValue) { michael@0: UCMapping *mappings, *m; michael@0: int32_t *map; michael@0: int32_t i, j, uniqueCount, count, subStart, subLimit; michael@0: michael@0: uint8_t *bytes; michael@0: int32_t low, high, prev; michael@0: michael@0: uint32_t *section; michael@0: michael@0: mappings=table->mappings; michael@0: map=table->reverseMap; michael@0: michael@0: /* step 1: examine the input units; set low, high, uniqueCount */ michael@0: m=mappings+map[start]; michael@0: bytes=UCM_GET_BYTES(table, m); michael@0: low=bytes[unitIndex]; michael@0: uniqueCount=1; michael@0: michael@0: prev=high=low; michael@0: for(i=start+1; i=(3*count)/4)) { michael@0: /* michael@0: * for the root table and for fairly full tables: michael@0: * allocate for direct, linear array access michael@0: * by keeping count, to write an entry for each unit value michael@0: * from low to high michael@0: * exception: use a compact table if count==0x100 because michael@0: * that cannot be encoded in the length byte michael@0: */ michael@0: } else { michael@0: count=uniqueCount; michael@0: } michael@0: michael@0: if(count>=0x100) { michael@0: fprintf(stderr, "error: toUnicode extension table section overflow: %ld section entries\n", (long)count); michael@0: return FALSE; michael@0: } michael@0: michael@0: /* allocate the section: 1 entry for the header + count for the items */ michael@0: section=(uint32_t *)utm_allocN(extData->toUTable, 1+count); michael@0: michael@0: /* write the section header */ michael@0: *section++=((uint32_t)count<uniqueCount) { michael@0: /* write empty subsections for unused units in a linear table */ michael@0: while(++prevbLen==unitIndex+1) { michael@0: /* do not include this in generateToUTable() */ michael@0: ++subStart; michael@0: michael@0: if(subStarttoUTable); michael@0: michael@0: /* recurse */ michael@0: if(!generateToUTable(extData, table, subStart, subLimit, unitIndex+1, defaultValue)) { michael@0: return FALSE; michael@0: } michael@0: } michael@0: } michael@0: return TRUE; michael@0: } michael@0: michael@0: /* michael@0: * Generate the toUTable and toUUChars from the input table. michael@0: * The input table must be sorted, and all precision flags must be 0..3. michael@0: * This function will modify the table's reverseMap. michael@0: */ michael@0: static UBool michael@0: makeToUTable(CnvExtData *extData, UCMTable *table) { michael@0: int32_t toUCount; michael@0: michael@0: toUCount=reduceToUMappings(table); michael@0: michael@0: extData->toUTable=utm_open("cnv extension toUTable", 0x10000, UCNV_EXT_TO_U_MIN_CODE_POINT, 4); michael@0: extData->toUUChars=utm_open("cnv extension toUUChars", 0x10000, UCNV_EXT_TO_U_INDEX_MASK+1, 2); michael@0: michael@0: return generateToUTable(extData, table, 0, toUCount, 0, 0); michael@0: } michael@0: michael@0: /* from Unicode ------------------------------------------------------------- */ michael@0: michael@0: /* michael@0: * preprocessing: michael@0: * rebuild reverseMap with mapping indexes for mappings relevant for from Unicode michael@0: * change each Unicode string to encode all but the first code point in 16-bit form michael@0: * michael@0: * generation: michael@0: * for each unique code point michael@0: * write an entry in the 3-stage trie michael@0: * check that there is only one single-code point sequence michael@0: * start recursion for following 16-bit input units michael@0: */ michael@0: michael@0: /* michael@0: * Remove toUnicode fallbacks and non- SUB mappings michael@0: * which are irrelevant for the fromUnicode extension table. michael@0: * Remove MBCS_FROM_U_EXT_FLAG bits. michael@0: * Overwrite the reverseMap with an index array to the relevant mappings. michael@0: * Modify the code point sequences to a generator-friendly format where michael@0: * the first code points remains unchanged but the following are recoded michael@0: * into 16-bit Unicode string form. michael@0: * The table must be sorted. michael@0: * Destroys previous data in the reverseMap. michael@0: */ michael@0: static int32_t michael@0: prepareFromUMappings(UCMTable *table) { michael@0: UCMapping *mappings, *m; michael@0: int32_t *map; michael@0: int32_t i, j, count; michael@0: int8_t flag; michael@0: michael@0: mappings=table->mappings; michael@0: map=table->reverseMap; michael@0: count=table->mappingsLength; michael@0: michael@0: /* michael@0: * we do not go through the map on input because the mappings are michael@0: * sorted lexically michael@0: */ michael@0: m=mappings; michael@0: michael@0: for(i=j=0; if; michael@0: if(flag>=0) { michael@0: flag&=MBCS_FROM_U_EXT_MASK; michael@0: m->f=flag; michael@0: } michael@0: if(flag==0 || flag==1 || (flag==2 && m->bLen==1) || flag==4) { michael@0: map[j++]=i; michael@0: michael@0: if(m->uLen>1) { michael@0: /* recode all but the first code point to 16-bit Unicode */ michael@0: UChar32 *u32; michael@0: UChar *u; michael@0: UChar32 c; michael@0: int32_t q, r; michael@0: michael@0: u32=UCM_GET_CODE_POINTS(table, m); michael@0: u=(UChar *)u32; /* destructive in-place recoding */ michael@0: for(r=2, q=1; quLen; ++q) { michael@0: c=u32[q]; michael@0: U16_APPEND_UNSAFE(u, r, c); michael@0: } michael@0: michael@0: /* counts the first code point always at 2 - the first 16-bit unit is at 16-bit index 2 */ michael@0: m->uLen=(int8_t)r; michael@0: } michael@0: } michael@0: } michael@0: michael@0: return j; michael@0: } michael@0: michael@0: static uint32_t michael@0: getFromUBytesValue(CnvExtData *extData, UCMTable *table, UCMapping *m) { michael@0: uint8_t *bytes, *resultBytes; michael@0: uint32_t value; michael@0: int32_t u16Length, ratio; michael@0: michael@0: if(m->f==2) { michael@0: /* michael@0: * no mapping, preferred michael@0: * michael@0: * no need to count in statistics because the subchars are already michael@0: * counted for maxOutBytes and maxBytesPerUChar in UConverterStaticData, michael@0: * and this non-mapping does not count for maxInUChars which are always michael@0: * trivially at least two if counting unmappable supplementary code points michael@0: */ michael@0: return UCNV_EXT_FROM_U_SUBCHAR1; michael@0: } michael@0: michael@0: bytes=UCM_GET_BYTES(table, m); michael@0: value=0; michael@0: switch(m->bLen) { michael@0: /* 1..3: store the bytes in the value word */ michael@0: case 3: michael@0: value=((uint32_t)*bytes++)<<16; michael@0: case 2: michael@0: value|=((uint32_t)*bytes++)<<8; michael@0: case 1: michael@0: value|=*bytes; michael@0: break; michael@0: default: michael@0: /* the parser enforces m->bLen<=UCNV_EXT_MAX_BYTES */ michael@0: /* store the bytes in fromUBytes[] and the index in the value word */ michael@0: value=(uint32_t)utm_countItems(extData->fromUBytes); michael@0: resultBytes=utm_allocN(extData->fromUBytes, m->bLen); michael@0: uprv_memcpy(resultBytes, bytes, m->bLen); michael@0: break; michael@0: } michael@0: value|=(uint32_t)m->bLen<f==0) { michael@0: value|=UCNV_EXT_FROM_U_ROUNDTRIP_FLAG; michael@0: } else if(m->f==4) { michael@0: value|=UCNV_EXT_FROM_U_GOOD_ONE_WAY_FLAG; michael@0: } michael@0: michael@0: /* calculate the real UTF-16 length (see recoding in prepareFromUMappings()) */ michael@0: if(m->uLen==1) { michael@0: u16Length=U16_LENGTH(m->u); michael@0: } else { michael@0: u16Length=U16_LENGTH(UCM_GET_CODE_POINTS(table, m)[0])+(m->uLen-2); michael@0: } michael@0: michael@0: /* update statistics */ michael@0: if(u16Length>extData->maxInUChars) { michael@0: extData->maxInUChars=u16Length; michael@0: } michael@0: if(m->bLen>extData->maxOutBytes) { michael@0: extData->maxOutBytes=m->bLen; michael@0: } michael@0: michael@0: ratio=(m->bLen+(u16Length-1))/u16Length; michael@0: if(ratio>extData->maxBytesPerUChar) { michael@0: extData->maxBytesPerUChar=ratio; michael@0: } michael@0: michael@0: return value; michael@0: } michael@0: michael@0: /* michael@0: * works like generateToUTable(), except that the michael@0: * output section consists of two arrays, one for input UChars and one michael@0: * for result values michael@0: * michael@0: * also, fromUTable sections are always stored in a compact form for michael@0: * access via binary search michael@0: */ michael@0: static UBool michael@0: generateFromUTable(CnvExtData *extData, UCMTable *table, michael@0: int32_t start, int32_t limit, int32_t unitIndex, michael@0: uint32_t defaultValue) { michael@0: UCMapping *mappings, *m; michael@0: int32_t *map; michael@0: int32_t i, j, uniqueCount, count, subStart, subLimit; michael@0: michael@0: UChar *uchars; michael@0: UChar32 low, high, prev; michael@0: michael@0: UChar *sectionUChars; michael@0: uint32_t *sectionValues; michael@0: michael@0: mappings=table->mappings; michael@0: map=table->reverseMap; michael@0: michael@0: /* step 1: examine the input units; set low, high, uniqueCount */ michael@0: m=mappings+map[start]; michael@0: uchars=(UChar *)UCM_GET_CODE_POINTS(table, m); michael@0: low=uchars[unitIndex]; michael@0: uniqueCount=1; michael@0: michael@0: prev=high=low; michael@0: for(i=start+1; ifromUTableUChars, 1+count); michael@0: sectionValues=(uint32_t *)utm_allocN(extData->fromUTableValues, 1+count); michael@0: michael@0: /* write the section header */ michael@0: *sectionUChars++=(UChar)count; michael@0: *sectionValues++=defaultValue; michael@0: michael@0: /* step 3: write temporary section table with subsection starts */ michael@0: prev=low-1; /* just before low to prevent empty subsections before low */ michael@0: j=0; /* section table index */ michael@0: for(i=start; iuLen==unitIndex+1) { michael@0: /* do not include this in generateToUTable() */ michael@0: ++subStart; michael@0: michael@0: if(subStartfromUTableValues); michael@0: michael@0: /* recurse */ michael@0: if(!generateFromUTable(extData, table, subStart, subLimit, unitIndex+1, defaultValue)) { michael@0: return FALSE; michael@0: } michael@0: } michael@0: } michael@0: return TRUE; michael@0: } michael@0: michael@0: /* michael@0: * add entries to the fromUnicode trie, michael@0: * assume to be called with code points in ascending order michael@0: * and use that to build the trie in precompacted form michael@0: */ michael@0: static void michael@0: addFromUTrieEntry(CnvExtData *extData, UChar32 c, uint32_t value) { michael@0: int32_t i1, i2, i3, i3b, nextOffset, min, newBlock; michael@0: michael@0: if(value==0) { michael@0: return; michael@0: } michael@0: michael@0: /* michael@0: * compute the index for each stage, michael@0: * allocate a stage block if necessary, michael@0: * and write the stage value michael@0: */ michael@0: i1=c>>10; michael@0: if(i1>=extData->stage1Top) { michael@0: extData->stage1Top=i1+1; michael@0: } michael@0: michael@0: nextOffset=(c>>4)&0x3f; michael@0: michael@0: if(extData->stage1[i1]==0) { michael@0: /* allocate another block in stage 2; overlap with the previous block */ michael@0: newBlock=extData->stage2Top; michael@0: min=newBlock-nextOffset; /* minimum block start with overlap */ michael@0: while(minstage2[newBlock-1]==0) { michael@0: --newBlock; michael@0: } michael@0: michael@0: extData->stage1[i1]=(uint16_t)newBlock; michael@0: extData->stage2Top=newBlock+MBCS_STAGE_2_BLOCK_SIZE; michael@0: if(extData->stage2Top>LENGTHOF(extData->stage2)) { michael@0: fprintf(stderr, "error: too many stage 2 entries at U+%04x\n", (int)c); michael@0: exit(U_MEMORY_ALLOCATION_ERROR); michael@0: } michael@0: } michael@0: michael@0: i2=extData->stage1[i1]+nextOffset; michael@0: nextOffset=c&0xf; michael@0: michael@0: if(extData->stage2[i2]==0) { michael@0: /* allocate another block in stage 3; overlap with the previous block */ michael@0: newBlock=extData->stage3Top; michael@0: min=newBlock-nextOffset; /* minimum block start with overlap */ michael@0: while(minstage3[newBlock-1]==0) { michael@0: --newBlock; michael@0: } michael@0: michael@0: /* round up to a multiple of stage 3 granularity >1 (similar to utrie.c) */ michael@0: newBlock=(newBlock+(UCNV_EXT_STAGE_3_GRANULARITY-1))&~(UCNV_EXT_STAGE_3_GRANULARITY-1); michael@0: extData->stage2[i2]=(uint16_t)(newBlock>>UCNV_EXT_STAGE_2_LEFT_SHIFT); michael@0: michael@0: extData->stage3Top=newBlock+MBCS_STAGE_3_BLOCK_SIZE; michael@0: if(extData->stage3Top>LENGTHOF(extData->stage3)) { michael@0: fprintf(stderr, "error: too many stage 3 entries at U+%04x\n", (int)c); michael@0: exit(U_MEMORY_ALLOCATION_ERROR); michael@0: } michael@0: } michael@0: michael@0: i3=((int32_t)extData->stage2[i2]<stage3[i3]==0 because we get michael@0: * code points in strictly ascending order michael@0: */ michael@0: michael@0: if(value==UCNV_EXT_FROM_U_SUBCHAR1) { michael@0: /* SUB mapping, see getFromUBytesValue() and prepareFromUMappings() */ michael@0: extData->stage3[i3]=1; michael@0: michael@0: /* michael@0: * precompaction is not optimal for |2 mappings because michael@0: * stage3 values for them are all the same, unlike for other mappings michael@0: * which all have unique values; michael@0: * use a simple compaction of reusing a whole block filled with these michael@0: * mappings michael@0: */ michael@0: michael@0: /* is the entire block filled with |2 mappings? */ michael@0: if(nextOffset==MBCS_STAGE_3_BLOCK_SIZE-1) { michael@0: for(min=i3-nextOffset; michael@0: minstage3[min]==1; michael@0: ++min) {} michael@0: michael@0: if(min==i3) { michael@0: /* the entire block is filled with these mappings */ michael@0: if(extData->stage3Sub1Block!=0) { michael@0: /* point to the previous such block and remove this block from stage3 */ michael@0: extData->stage2[i2]=extData->stage3Sub1Block; michael@0: extData->stage3Top-=MBCS_STAGE_3_BLOCK_SIZE; michael@0: uprv_memset(extData->stage3+extData->stage3Top, 0, MBCS_STAGE_3_BLOCK_SIZE*2); michael@0: } else { michael@0: /* remember this block's stage2 entry */ michael@0: extData->stage3Sub1Block=extData->stage2[i2]; michael@0: } michael@0: } michael@0: } michael@0: } else { michael@0: if((i3b=extData->stage3bTop++)>=LENGTHOF(extData->stage3b)) { michael@0: fprintf(stderr, "error: too many stage 3b entries at U+%04x\n", (int)c); michael@0: exit(U_MEMORY_ALLOCATION_ERROR); michael@0: } michael@0: michael@0: /* roundtrip or fallback mapping */ michael@0: extData->stage3[i3]=(uint16_t)i3b; michael@0: extData->stage3b[i3b]=value; michael@0: } michael@0: } michael@0: michael@0: static UBool michael@0: generateFromUTrie(CnvExtData *extData, UCMTable *table, int32_t mapLength) { michael@0: UCMapping *mappings, *m; michael@0: int32_t *map; michael@0: uint32_t value; michael@0: int32_t subStart, subLimit; michael@0: michael@0: UChar32 *codePoints; michael@0: UChar32 c, next; michael@0: michael@0: if(mapLength==0) { michael@0: return TRUE; michael@0: } michael@0: michael@0: mappings=table->mappings; michael@0: map=table->reverseMap; michael@0: michael@0: /* michael@0: * iterate over same-initial-code point mappings, michael@0: * enter the initial code point into the trie, michael@0: * and start a recursion on the corresponding mappings section michael@0: * with generateFromUTable() michael@0: */ michael@0: m=mappings+map[0]; michael@0: codePoints=UCM_GET_CODE_POINTS(table, m); michael@0: next=codePoints[0]; michael@0: subLimit=0; michael@0: while(subLimituLen==1) { michael@0: /* do not include this in generateFromUTable() */ michael@0: ++subStart; michael@0: michael@0: if(subStartfromUTableValues)); michael@0: michael@0: /* recurse, starting from 16-bit-unit index 2, the first 16-bit unit after c */ michael@0: if(!generateFromUTable(extData, table, subStart, subLimit, 2, value)) { michael@0: return FALSE; michael@0: } michael@0: } michael@0: } michael@0: return TRUE; michael@0: } michael@0: michael@0: /* michael@0: * Generate the fromU data structures from the input table. michael@0: * The input table must be sorted, and all precision flags must be 0..3. michael@0: * This function will modify the table's reverseMap. michael@0: */ michael@0: static UBool michael@0: makeFromUTable(CnvExtData *extData, UCMTable *table) { michael@0: uint16_t *stage1; michael@0: int32_t i, stage1Top, fromUCount; michael@0: michael@0: fromUCount=prepareFromUMappings(table); michael@0: michael@0: extData->fromUTableUChars=utm_open("cnv extension fromUTableUChars", 0x10000, UCNV_EXT_FROM_U_DATA_MASK+1, 2); michael@0: extData->fromUTableValues=utm_open("cnv extension fromUTableValues", 0x10000, UCNV_EXT_FROM_U_DATA_MASK+1, 4); michael@0: extData->fromUBytes=utm_open("cnv extension fromUBytes", 0x10000, UCNV_EXT_FROM_U_DATA_MASK+1, 1); michael@0: michael@0: /* allocate all-unassigned stage blocks */ michael@0: extData->stage2Top=MBCS_STAGE_2_FIRST_ASSIGNED; michael@0: extData->stage3Top=MBCS_STAGE_3_FIRST_ASSIGNED; michael@0: michael@0: /* michael@0: * stage 3b stores only unique values, and in michael@0: * index 0: 0 for "no mapping" michael@0: * index 1: "no mapping" with preference for rather than michael@0: */ michael@0: extData->stage3b[1]=UCNV_EXT_FROM_U_SUBCHAR1; michael@0: extData->stage3bTop=2; michael@0: michael@0: /* allocate the first entry in the fromUTable because index 0 means "no result" */ michael@0: utm_alloc(extData->fromUTableUChars); michael@0: utm_alloc(extData->fromUTableValues); michael@0: michael@0: if(!generateFromUTrie(extData, table, fromUCount)) { michael@0: return FALSE; michael@0: } michael@0: michael@0: /* michael@0: * offset the stage 1 trie entries by stage1Top because they will michael@0: * be stored in a single array michael@0: */ michael@0: stage1=extData->stage1; michael@0: stage1Top=extData->stage1Top; michael@0: for(i=0; iunicodeMask&UCNV_HAS_SURROGATES) { michael@0: fprintf(stderr, "error: contains mappings for surrogate code points\n"); michael@0: return FALSE; michael@0: } michael@0: michael@0: staticData->conversionType=UCNV_MBCS; michael@0: michael@0: extData=(CnvExtData *)cnvData; michael@0: michael@0: /* michael@0: * assume that the table is sorted michael@0: * michael@0: * call the functions in this order because michael@0: * makeToUTable() modifies the original reverseMap, michael@0: * makeFromUTable() writes a whole new mapping into reverseMap michael@0: */ michael@0: return michael@0: makeToUTable(extData, table) && michael@0: makeFromUTable(extData, table); michael@0: }