michael@0: /* 
michael@0: **********************************************************************
michael@0: *   Copyright (C) 2000-2012, International Business Machines
michael@0: *   Corporation and others.  All Rights Reserved.
michael@0: **********************************************************************
michael@0: *   file name:  ucnvlat1.cpp
michael@0: *   encoding:   US-ASCII
michael@0: *   tab size:   8 (not used)
michael@0: *   indentation:4
michael@0: *
michael@0: *   created on: 2000feb07
michael@0: *   created by: Markus W. Scherer
michael@0: */
michael@0: 
michael@0: #include "unicode/utypes.h"
michael@0: 
michael@0: #if !UCONFIG_NO_CONVERSION
michael@0: 
michael@0: #include "unicode/ucnv.h"
michael@0: #include "unicode/uset.h"
michael@0: #include "unicode/utf8.h"
michael@0: #include "ucnv_bld.h"
michael@0: #include "ucnv_cnv.h"
michael@0: 
michael@0: /* control optimizations according to the platform */
michael@0: #define LATIN1_UNROLL_FROM_UNICODE 1
michael@0: 
michael@0: /* ISO 8859-1 --------------------------------------------------------------- */
michael@0: 
michael@0: /* This is a table-less and callback-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
michael@0: static void
michael@0: _Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
michael@0:                             UErrorCode *pErrorCode) {
michael@0:     const uint8_t *source;
michael@0:     UChar *target;
michael@0:     int32_t targetCapacity, length;
michael@0:     int32_t *offsets;
michael@0: 
michael@0:     int32_t sourceIndex;
michael@0: 
michael@0:     /* set up the local pointers */
michael@0:     source=(const uint8_t *)pArgs->source;
michael@0:     target=pArgs->target;
michael@0:     targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
michael@0:     offsets=pArgs->offsets;
michael@0: 
michael@0:     sourceIndex=0;
michael@0: 
michael@0:     /*
michael@0:      * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
michael@0:      * for the minimum of the sourceLength and targetCapacity
michael@0:      */
michael@0:     length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
michael@0:     if(length<=targetCapacity) {
michael@0:         targetCapacity=length;
michael@0:     } else {
michael@0:         /* target will be full */
michael@0:         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0:         length=targetCapacity;
michael@0:     }
michael@0: 
michael@0:     if(targetCapacity>=8) {
michael@0:         /* This loop is unrolled for speed and improved pipelining. */
michael@0:         int32_t count, loops;
michael@0: 
michael@0:         loops=count=targetCapacity>>3;
michael@0:         length=targetCapacity&=0x7;
michael@0:         do {
michael@0:             target[0]=source[0];
michael@0:             target[1]=source[1];
michael@0:             target[2]=source[2];
michael@0:             target[3]=source[3];
michael@0:             target[4]=source[4];
michael@0:             target[5]=source[5];
michael@0:             target[6]=source[6];
michael@0:             target[7]=source[7];
michael@0:             target+=8;
michael@0:             source+=8;
michael@0:         } while(--count>0);
michael@0: 
michael@0:         if(offsets!=NULL) {
michael@0:             do {
michael@0:                 offsets[0]=sourceIndex++;
michael@0:                 offsets[1]=sourceIndex++;
michael@0:                 offsets[2]=sourceIndex++;
michael@0:                 offsets[3]=sourceIndex++;
michael@0:                 offsets[4]=sourceIndex++;
michael@0:                 offsets[5]=sourceIndex++;
michael@0:                 offsets[6]=sourceIndex++;
michael@0:                 offsets[7]=sourceIndex++;
michael@0:                 offsets+=8;
michael@0:             } while(--loops>0);
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     /* conversion loop */
michael@0:     while(targetCapacity>0) {
michael@0:         *target++=*source++;
michael@0:         --targetCapacity;
michael@0:     }
michael@0: 
michael@0:     /* write back the updated pointers */
michael@0:     pArgs->source=(const char *)source;
michael@0:     pArgs->target=target;
michael@0: 
michael@0:     /* set offsets */
michael@0:     if(offsets!=NULL) {
michael@0:         while(length>0) {
michael@0:             *offsets++=sourceIndex++;
michael@0:             --length;
michael@0:         }
michael@0:         pArgs->offsets=offsets;
michael@0:     }
michael@0: }
michael@0: 
michael@0: /* This is a table-less and callback-less version of ucnv_MBCSSingleGetNextUChar(). */
michael@0: static UChar32
michael@0: _Latin1GetNextUChar(UConverterToUnicodeArgs *pArgs,
michael@0:                     UErrorCode *pErrorCode) {
michael@0:     const uint8_t *source=(const uint8_t *)pArgs->source;
michael@0:     if(source<(const uint8_t *)pArgs->sourceLimit) {
michael@0:         pArgs->source=(const char *)(source+1);
michael@0:         return *source;
michael@0:     }
michael@0: 
michael@0:     /* no output because of empty input */
michael@0:     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
michael@0:     return 0xffff;
michael@0: }
michael@0: 
michael@0: /* This is a table-less version of ucnv_MBCSSingleFromBMPWithOffsets(). */
michael@0: static void
michael@0: _Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
michael@0:                               UErrorCode *pErrorCode) {
michael@0:     UConverter *cnv;
michael@0:     const UChar *source, *sourceLimit;
michael@0:     uint8_t *target, *oldTarget;
michael@0:     int32_t targetCapacity, length;
michael@0:     int32_t *offsets;
michael@0: 
michael@0:     UChar32 cp;
michael@0:     UChar c, max;
michael@0: 
michael@0:     int32_t sourceIndex;
michael@0: 
michael@0:     /* set up the local pointers */
michael@0:     cnv=pArgs->converter;
michael@0:     source=pArgs->source;
michael@0:     sourceLimit=pArgs->sourceLimit;
michael@0:     target=oldTarget=(uint8_t *)pArgs->target;
michael@0:     targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
michael@0:     offsets=pArgs->offsets;
michael@0: 
michael@0:     if(cnv->sharedData==&_Latin1Data) {
michael@0:         max=0xff; /* Latin-1 */
michael@0:     } else {
michael@0:         max=0x7f; /* US-ASCII */
michael@0:     }
michael@0: 
michael@0:     /* get the converter state from UConverter */
michael@0:     cp=cnv->fromUChar32;
michael@0: 
michael@0:     /* sourceIndex=-1 if the current character began in the previous buffer */
michael@0:     sourceIndex= cp==0 ? 0 : -1;
michael@0: 
michael@0:     /*
michael@0:      * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
michael@0:      * for the minimum of the sourceLength and targetCapacity
michael@0:      */
michael@0:     length=(int32_t)(sourceLimit-source);
michael@0:     if(length<targetCapacity) {
michael@0:         targetCapacity=length;
michael@0:     }
michael@0: 
michael@0:     /* conversion loop */
michael@0:     if(cp!=0 && targetCapacity>0) {
michael@0:         goto getTrail;
michael@0:     }
michael@0: 
michael@0: #if LATIN1_UNROLL_FROM_UNICODE
michael@0:     /* unroll the loop with the most common case */
michael@0:     if(targetCapacity>=16) {
michael@0:         int32_t count, loops;
michael@0:         UChar u, oredChars;
michael@0: 
michael@0:         loops=count=targetCapacity>>4;
michael@0:         do {
michael@0:             oredChars=u=*source++;
michael@0:             *target++=(uint8_t)u;
michael@0:             oredChars|=u=*source++;
michael@0:             *target++=(uint8_t)u;
michael@0:             oredChars|=u=*source++;
michael@0:             *target++=(uint8_t)u;
michael@0:             oredChars|=u=*source++;
michael@0:             *target++=(uint8_t)u;
michael@0:             oredChars|=u=*source++;
michael@0:             *target++=(uint8_t)u;
michael@0:             oredChars|=u=*source++;
michael@0:             *target++=(uint8_t)u;
michael@0:             oredChars|=u=*source++;
michael@0:             *target++=(uint8_t)u;
michael@0:             oredChars|=u=*source++;
michael@0:             *target++=(uint8_t)u;
michael@0:             oredChars|=u=*source++;
michael@0:             *target++=(uint8_t)u;
michael@0:             oredChars|=u=*source++;
michael@0:             *target++=(uint8_t)u;
michael@0:             oredChars|=u=*source++;
michael@0:             *target++=(uint8_t)u;
michael@0:             oredChars|=u=*source++;
michael@0:             *target++=(uint8_t)u;
michael@0:             oredChars|=u=*source++;
michael@0:             *target++=(uint8_t)u;
michael@0:             oredChars|=u=*source++;
michael@0:             *target++=(uint8_t)u;
michael@0:             oredChars|=u=*source++;
michael@0:             *target++=(uint8_t)u;
michael@0:             oredChars|=u=*source++;
michael@0:             *target++=(uint8_t)u;
michael@0: 
michael@0:             /* were all 16 entries really valid? */
michael@0:             if(oredChars>max) {
michael@0:                 /* no, return to the first of these 16 */
michael@0:                 source-=16;
michael@0:                 target-=16;
michael@0:                 break;
michael@0:             }
michael@0:         } while(--count>0);
michael@0:         count=loops-count;
michael@0:         targetCapacity-=16*count;
michael@0: 
michael@0:         if(offsets!=NULL) {
michael@0:             oldTarget+=16*count;
michael@0:             while(count>0) {
michael@0:                 *offsets++=sourceIndex++;
michael@0:                 *offsets++=sourceIndex++;
michael@0:                 *offsets++=sourceIndex++;
michael@0:                 *offsets++=sourceIndex++;
michael@0:                 *offsets++=sourceIndex++;
michael@0:                 *offsets++=sourceIndex++;
michael@0:                 *offsets++=sourceIndex++;
michael@0:                 *offsets++=sourceIndex++;
michael@0:                 *offsets++=sourceIndex++;
michael@0:                 *offsets++=sourceIndex++;
michael@0:                 *offsets++=sourceIndex++;
michael@0:                 *offsets++=sourceIndex++;
michael@0:                 *offsets++=sourceIndex++;
michael@0:                 *offsets++=sourceIndex++;
michael@0:                 *offsets++=sourceIndex++;
michael@0:                 *offsets++=sourceIndex++;
michael@0:                 --count;
michael@0:             }
michael@0:         }
michael@0:     }
michael@0: #endif
michael@0: 
michael@0:     /* conversion loop */
michael@0:     c=0;
michael@0:     while(targetCapacity>0 && (c=*source++)<=max) {
michael@0:         /* convert the Unicode code point */
michael@0:         *target++=(uint8_t)c;
michael@0:         --targetCapacity;
michael@0:     }
michael@0: 
michael@0:     if(c>max) {
michael@0:         cp=c;
michael@0:         if(!U_IS_SURROGATE(cp)) {
michael@0:             /* callback(unassigned) */
michael@0:         } else if(U_IS_SURROGATE_LEAD(cp)) {
michael@0: getTrail:
michael@0:             if(source<sourceLimit) {
michael@0:                 /* test the following code unit */
michael@0:                 UChar trail=*source;
michael@0:                 if(U16_IS_TRAIL(trail)) {
michael@0:                     ++source;
michael@0:                     cp=U16_GET_SUPPLEMENTARY(cp, trail);
michael@0:                     /* this codepage does not map supplementary code points */
michael@0:                     /* callback(unassigned) */
michael@0:                 } else {
michael@0:                     /* this is an unmatched lead code unit (1st surrogate) */
michael@0:                     /* callback(illegal) */
michael@0:                 }
michael@0:             } else {
michael@0:                 /* no more input */
michael@0:                 cnv->fromUChar32=cp;
michael@0:                 goto noMoreInput;
michael@0:             }
michael@0:         } else {
michael@0:             /* this is an unmatched trail code unit (2nd surrogate) */
michael@0:             /* callback(illegal) */
michael@0:         }
michael@0: 
michael@0:         *pErrorCode= U_IS_SURROGATE(cp) ? U_ILLEGAL_CHAR_FOUND : U_INVALID_CHAR_FOUND;
michael@0:         cnv->fromUChar32=cp;
michael@0:     }
michael@0: noMoreInput:
michael@0: 
michael@0:     /* set offsets since the start */
michael@0:     if(offsets!=NULL) {
michael@0:         size_t count=target-oldTarget;
michael@0:         while(count>0) {
michael@0:             *offsets++=sourceIndex++;
michael@0:             --count;
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) {
michael@0:         /* target is full */
michael@0:         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0:     }
michael@0: 
michael@0:     /* write back the updated pointers */
michael@0:     pArgs->source=source;
michael@0:     pArgs->target=(char *)target;
michael@0:     pArgs->offsets=offsets;
michael@0: }
michael@0: 
michael@0: /* Convert UTF-8 to Latin-1. Adapted from ucnv_SBCSFromUTF8(). */
michael@0: static void
michael@0: ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
michael@0:                     UConverterToUnicodeArgs *pToUArgs,
michael@0:                     UErrorCode *pErrorCode) {
michael@0:     UConverter *utf8;
michael@0:     const uint8_t *source, *sourceLimit;
michael@0:     uint8_t *target;
michael@0:     int32_t targetCapacity;
michael@0: 
michael@0:     UChar32 c;
michael@0:     uint8_t b, t1;
michael@0: 
michael@0:     /* set up the local pointers */
michael@0:     utf8=pToUArgs->converter;
michael@0:     source=(uint8_t *)pToUArgs->source;
michael@0:     sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
michael@0:     target=(uint8_t *)pFromUArgs->target;
michael@0:     targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
michael@0: 
michael@0:     /* get the converter state from the UTF-8 UConverter */
michael@0:     c=(UChar32)utf8->toUnicodeStatus;
michael@0:     if(c!=0 && source<sourceLimit) {
michael@0:         if(targetCapacity==0) {
michael@0:             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0:             return;
michael@0:         } else if(c>=0xc2 && c<=0xc3 && (t1=(uint8_t)(*source-0x80)) <= 0x3f) {
michael@0:             ++source;
michael@0:             *target++=(uint8_t)(((c&3)<<6)|t1);
michael@0:             --targetCapacity;
michael@0: 
michael@0:             utf8->toUnicodeStatus=0;
michael@0:             utf8->toULength=0;
michael@0:         } else {
michael@0:             /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
michael@0:             *pErrorCode=U_USING_DEFAULT_WARNING;
michael@0:             return;
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     /*
michael@0:      * Make sure that the last byte sequence before sourceLimit is complete
michael@0:      * or runs into a lead byte.
michael@0:      * In the conversion loop compare source with sourceLimit only once
michael@0:      * per multi-byte character.
michael@0:      * For Latin-1, adjust sourceLimit only for 1 trail byte because
michael@0:      * the conversion loop handles at most 2-byte sequences.
michael@0:      */
michael@0:     if(source<sourceLimit && U8_IS_LEAD(*(sourceLimit-1))) {
michael@0:         --sourceLimit;
michael@0:     }
michael@0: 
michael@0:     /* conversion loop */
michael@0:     while(source<sourceLimit) {
michael@0:         if(targetCapacity>0) {
michael@0:             b=*source++;
michael@0:             if((int8_t)b>=0) {
michael@0:                 /* convert ASCII */
michael@0:                 *target++=(uint8_t)b;
michael@0:                 --targetCapacity;
michael@0:             } else if( /* handle U+0080..U+00FF inline */
michael@0:                        b>=0xc2 && b<=0xc3 &&
michael@0:                        (t1=(uint8_t)(*source-0x80)) <= 0x3f
michael@0:             ) {
michael@0:                 ++source;
michael@0:                 *target++=(uint8_t)(((b&3)<<6)|t1);
michael@0:                 --targetCapacity;
michael@0:             } else {
michael@0:                 /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
michael@0:                 pToUArgs->source=(char *)(source-1);
michael@0:                 pFromUArgs->target=(char *)target;
michael@0:                 *pErrorCode=U_USING_DEFAULT_WARNING;
michael@0:                 return;
michael@0:             }
michael@0:         } else {
michael@0:             /* target is full */
michael@0:             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0:             break;
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     /*
michael@0:      * The sourceLimit may have been adjusted before the conversion loop
michael@0:      * to stop before a truncated sequence.
michael@0:      * If so, then collect the truncated sequence now.
michael@0:      * For Latin-1, there is at most exactly one lead byte because of the
michael@0:      * smaller sourceLimit adjustment logic.
michael@0:      */
michael@0:     if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
michael@0:         utf8->toUnicodeStatus=utf8->toUBytes[0]=b=*source++;
michael@0:         utf8->toULength=1;
michael@0:         utf8->mode=U8_COUNT_TRAIL_BYTES(b)+1;
michael@0:     }
michael@0: 
michael@0:     /* write back the updated pointers */
michael@0:     pToUArgs->source=(char *)source;
michael@0:     pFromUArgs->target=(char *)target;
michael@0: }
michael@0: 
michael@0: static void
michael@0: _Latin1GetUnicodeSet(const UConverter *cnv,
michael@0:                      const USetAdder *sa,
michael@0:                      UConverterUnicodeSet which,
michael@0:                      UErrorCode *pErrorCode) {
michael@0:     sa->addRange(sa->set, 0, 0xff);
michael@0: }
michael@0: 
michael@0: static const UConverterImpl _Latin1Impl={
michael@0:     UCNV_LATIN_1,
michael@0: 
michael@0:     NULL,
michael@0:     NULL,
michael@0: 
michael@0:     NULL,
michael@0:     NULL,
michael@0:     NULL,
michael@0: 
michael@0:     _Latin1ToUnicodeWithOffsets,
michael@0:     _Latin1ToUnicodeWithOffsets,
michael@0:     _Latin1FromUnicodeWithOffsets,
michael@0:     _Latin1FromUnicodeWithOffsets,
michael@0:     _Latin1GetNextUChar,
michael@0: 
michael@0:     NULL,
michael@0:     NULL,
michael@0:     NULL,
michael@0:     NULL,
michael@0:     _Latin1GetUnicodeSet,
michael@0: 
michael@0:     NULL,
michael@0:     ucnv_Latin1FromUTF8
michael@0: };
michael@0: 
michael@0: static const UConverterStaticData _Latin1StaticData={
michael@0:     sizeof(UConverterStaticData),
michael@0:     "ISO-8859-1",
michael@0:     819, UCNV_IBM, UCNV_LATIN_1, 1, 1,
michael@0:     { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
michael@0:     0,
michael@0:     0,
michael@0:     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
michael@0: };
michael@0: 
michael@0: const UConverterSharedData _Latin1Data={
michael@0:     sizeof(UConverterSharedData), ~((uint32_t) 0),
michael@0:     NULL, NULL, &_Latin1StaticData, FALSE, &_Latin1Impl, 
michael@0:     0
michael@0: };
michael@0: 
michael@0: /* US-ASCII ----------------------------------------------------------------- */
michael@0: 
michael@0: /* This is a table-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
michael@0: static void
michael@0: _ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
michael@0:                            UErrorCode *pErrorCode) {
michael@0:     const uint8_t *source, *sourceLimit;
michael@0:     UChar *target, *oldTarget;
michael@0:     int32_t targetCapacity, length;
michael@0:     int32_t *offsets;
michael@0: 
michael@0:     int32_t sourceIndex;
michael@0: 
michael@0:     uint8_t c;
michael@0: 
michael@0:     /* set up the local pointers */
michael@0:     source=(const uint8_t *)pArgs->source;
michael@0:     sourceLimit=(const uint8_t *)pArgs->sourceLimit;
michael@0:     target=oldTarget=pArgs->target;
michael@0:     targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
michael@0:     offsets=pArgs->offsets;
michael@0: 
michael@0:     /* sourceIndex=-1 if the current character began in the previous buffer */
michael@0:     sourceIndex=0;
michael@0: 
michael@0:     /*
michael@0:      * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
michael@0:      * for the minimum of the sourceLength and targetCapacity
michael@0:      */
michael@0:     length=(int32_t)(sourceLimit-source);
michael@0:     if(length<targetCapacity) {
michael@0:         targetCapacity=length;
michael@0:     }
michael@0: 
michael@0:     if(targetCapacity>=8) {
michael@0:         /* This loop is unrolled for speed and improved pipelining. */
michael@0:         int32_t count, loops;
michael@0:         UChar oredChars;
michael@0: 
michael@0:         loops=count=targetCapacity>>3;
michael@0:         do {
michael@0:             oredChars=target[0]=source[0];
michael@0:             oredChars|=target[1]=source[1];
michael@0:             oredChars|=target[2]=source[2];
michael@0:             oredChars|=target[3]=source[3];
michael@0:             oredChars|=target[4]=source[4];
michael@0:             oredChars|=target[5]=source[5];
michael@0:             oredChars|=target[6]=source[6];
michael@0:             oredChars|=target[7]=source[7];
michael@0: 
michael@0:             /* were all 16 entries really valid? */
michael@0:             if(oredChars>0x7f) {
michael@0:                 /* no, return to the first of these 16 */
michael@0:                 break;
michael@0:             }
michael@0:             source+=8;
michael@0:             target+=8;
michael@0:         } while(--count>0);
michael@0:         count=loops-count;
michael@0:         targetCapacity-=count*8;
michael@0: 
michael@0:         if(offsets!=NULL) {
michael@0:             oldTarget+=count*8;
michael@0:             while(count>0) {
michael@0:                 offsets[0]=sourceIndex++;
michael@0:                 offsets[1]=sourceIndex++;
michael@0:                 offsets[2]=sourceIndex++;
michael@0:                 offsets[3]=sourceIndex++;
michael@0:                 offsets[4]=sourceIndex++;
michael@0:                 offsets[5]=sourceIndex++;
michael@0:                 offsets[6]=sourceIndex++;
michael@0:                 offsets[7]=sourceIndex++;
michael@0:                 offsets+=8;
michael@0:                 --count;
michael@0:             }
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     /* conversion loop */
michael@0:     c=0;
michael@0:     while(targetCapacity>0 && (c=*source++)<=0x7f) {
michael@0:         *target++=c;
michael@0:         --targetCapacity;
michael@0:     }
michael@0: 
michael@0:     if(c>0x7f) {
michael@0:         /* callback(illegal); copy the current bytes to toUBytes[] */
michael@0:         UConverter *cnv=pArgs->converter;
michael@0:         cnv->toUBytes[0]=c;
michael@0:         cnv->toULength=1;
michael@0:         *pErrorCode=U_ILLEGAL_CHAR_FOUND;
michael@0:     } else if(source<sourceLimit && target>=pArgs->targetLimit) {
michael@0:         /* target is full */
michael@0:         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0:     }
michael@0: 
michael@0:     /* set offsets since the start */
michael@0:     if(offsets!=NULL) {
michael@0:         size_t count=target-oldTarget;
michael@0:         while(count>0) {
michael@0:             *offsets++=sourceIndex++;
michael@0:             --count;
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     /* write back the updated pointers */
michael@0:     pArgs->source=(const char *)source;
michael@0:     pArgs->target=target;
michael@0:     pArgs->offsets=offsets;
michael@0: }
michael@0: 
michael@0: /* This is a table-less version of ucnv_MBCSSingleGetNextUChar(). */
michael@0: static UChar32
michael@0: _ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs,
michael@0:                    UErrorCode *pErrorCode) {
michael@0:     const uint8_t *source;
michael@0:     uint8_t b;
michael@0: 
michael@0:     source=(const uint8_t *)pArgs->source;
michael@0:     if(source<(const uint8_t *)pArgs->sourceLimit) {
michael@0:         b=*source++;
michael@0:         pArgs->source=(const char *)source;
michael@0:         if(b<=0x7f) {
michael@0:             return b;
michael@0:         } else {
michael@0:             UConverter *cnv=pArgs->converter;
michael@0:             cnv->toUBytes[0]=b;
michael@0:             cnv->toULength=1;
michael@0:             *pErrorCode=U_ILLEGAL_CHAR_FOUND;
michael@0:             return 0xffff;
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     /* no output because of empty input */
michael@0:     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
michael@0:     return 0xffff;
michael@0: }
michael@0: 
michael@0: /* "Convert" UTF-8 to US-ASCII: Validate and copy. */
michael@0: static void
michael@0: ucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
michael@0:                    UConverterToUnicodeArgs *pToUArgs,
michael@0:                    UErrorCode *pErrorCode) {
michael@0:     const uint8_t *source, *sourceLimit;
michael@0:     uint8_t *target;
michael@0:     int32_t targetCapacity, length;
michael@0: 
michael@0:     uint8_t c;
michael@0: 
michael@0:     if(pToUArgs->converter->toUnicodeStatus!=0) {
michael@0:         /* no handling of partial UTF-8 characters here, fall back to pivoting */
michael@0:         *pErrorCode=U_USING_DEFAULT_WARNING;
michael@0:         return;
michael@0:     }
michael@0: 
michael@0:     /* set up the local pointers */
michael@0:     source=(const uint8_t *)pToUArgs->source;
michael@0:     sourceLimit=(const uint8_t *)pToUArgs->sourceLimit;
michael@0:     target=(uint8_t *)pFromUArgs->target;
michael@0:     targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
michael@0: 
michael@0:     /*
michael@0:      * since the conversion here is 1:1 uint8_t:uint8_t, we need only one counter
michael@0:      * for the minimum of the sourceLength and targetCapacity
michael@0:      */
michael@0:     length=(int32_t)(sourceLimit-source);
michael@0:     if(length<targetCapacity) {
michael@0:         targetCapacity=length;
michael@0:     }
michael@0: 
michael@0:     /* unroll the loop with the most common case */
michael@0:     if(targetCapacity>=16) {
michael@0:         int32_t count, loops;
michael@0:         uint8_t oredChars;
michael@0: 
michael@0:         loops=count=targetCapacity>>4;
michael@0:         do {
michael@0:             oredChars=*target++=*source++;
michael@0:             oredChars|=*target++=*source++;
michael@0:             oredChars|=*target++=*source++;
michael@0:             oredChars|=*target++=*source++;
michael@0:             oredChars|=*target++=*source++;
michael@0:             oredChars|=*target++=*source++;
michael@0:             oredChars|=*target++=*source++;
michael@0:             oredChars|=*target++=*source++;
michael@0:             oredChars|=*target++=*source++;
michael@0:             oredChars|=*target++=*source++;
michael@0:             oredChars|=*target++=*source++;
michael@0:             oredChars|=*target++=*source++;
michael@0:             oredChars|=*target++=*source++;
michael@0:             oredChars|=*target++=*source++;
michael@0:             oredChars|=*target++=*source++;
michael@0:             oredChars|=*target++=*source++;
michael@0: 
michael@0:             /* were all 16 entries really valid? */
michael@0:             if(oredChars>0x7f) {
michael@0:                 /* no, return to the first of these 16 */
michael@0:                 source-=16;
michael@0:                 target-=16;
michael@0:                 break;
michael@0:             }
michael@0:         } while(--count>0);
michael@0:         count=loops-count;
michael@0:         targetCapacity-=16*count;
michael@0:     }
michael@0: 
michael@0:     /* conversion loop */
michael@0:     c=0;
michael@0:     while(targetCapacity>0 && (c=*source)<=0x7f) {
michael@0:         ++source;
michael@0:         *target++=c;
michael@0:         --targetCapacity;
michael@0:     }
michael@0: 
michael@0:     if(c>0x7f) {
michael@0:         /* non-ASCII character, handle in standard converter */
michael@0:         *pErrorCode=U_USING_DEFAULT_WARNING;
michael@0:     } else if(source<sourceLimit && target>=(const uint8_t *)pFromUArgs->targetLimit) {
michael@0:         /* target is full */
michael@0:         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
michael@0:     }
michael@0: 
michael@0:     /* write back the updated pointers */
michael@0:     pToUArgs->source=(const char *)source;
michael@0:     pFromUArgs->target=(char *)target;
michael@0: }
michael@0: 
michael@0: static void
michael@0: _ASCIIGetUnicodeSet(const UConverter *cnv,
michael@0:                     const USetAdder *sa,
michael@0:                     UConverterUnicodeSet which,
michael@0:                     UErrorCode *pErrorCode) {
michael@0:     sa->addRange(sa->set, 0, 0x7f);
michael@0: }
michael@0: 
michael@0: static const UConverterImpl _ASCIIImpl={
michael@0:     UCNV_US_ASCII,
michael@0: 
michael@0:     NULL,
michael@0:     NULL,
michael@0: 
michael@0:     NULL,
michael@0:     NULL,
michael@0:     NULL,
michael@0: 
michael@0:     _ASCIIToUnicodeWithOffsets,
michael@0:     _ASCIIToUnicodeWithOffsets,
michael@0:     _Latin1FromUnicodeWithOffsets,
michael@0:     _Latin1FromUnicodeWithOffsets,
michael@0:     _ASCIIGetNextUChar,
michael@0: 
michael@0:     NULL,
michael@0:     NULL,
michael@0:     NULL,
michael@0:     NULL,
michael@0:     _ASCIIGetUnicodeSet,
michael@0: 
michael@0:     NULL,
michael@0:     ucnv_ASCIIFromUTF8
michael@0: };
michael@0: 
michael@0: static const UConverterStaticData _ASCIIStaticData={
michael@0:     sizeof(UConverterStaticData),
michael@0:     "US-ASCII",
michael@0:     367, UCNV_IBM, UCNV_US_ASCII, 1, 1,
michael@0:     { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
michael@0:     0,
michael@0:     0,
michael@0:     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
michael@0: };
michael@0: 
michael@0: const UConverterSharedData _ASCIIData={
michael@0:     sizeof(UConverterSharedData), ~((uint32_t) 0),
michael@0:     NULL, NULL, &_ASCIIStaticData, FALSE, &_ASCIIImpl, 
michael@0:     0
michael@0: };
michael@0: 
michael@0: #endif