1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/ushape.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1706 @@ 1.4 +/* 1.5 + ****************************************************************************** 1.6 + * 1.7 + * Copyright (C) 2000-2013, International Business Machines 1.8 + * Corporation and others. All Rights Reserved. 1.9 + * 1.10 + ****************************************************************************** 1.11 + * file name: ushape.cpp 1.12 + * encoding: US-ASCII 1.13 + * tab size: 8 (not used) 1.14 + * indentation:4 1.15 + * 1.16 + * created on: 2000jun29 1.17 + * created by: Markus W. Scherer 1.18 + * 1.19 + * Arabic letter shaping implemented by Ayman Roshdy 1.20 + */ 1.21 + 1.22 +#include "unicode/utypes.h" 1.23 +#include "unicode/uchar.h" 1.24 +#include "unicode/ustring.h" 1.25 +#include "unicode/ushape.h" 1.26 +#include "cmemory.h" 1.27 +#include "putilimp.h" 1.28 +#include "ustr_imp.h" 1.29 +#include "ubidi_props.h" 1.30 +#include "uassert.h" 1.31 + 1.32 +#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 1.33 + 1.34 +/* 1.35 + * This implementation is designed for 16-bit Unicode strings. 1.36 + * The main assumption is that the Arabic characters and their 1.37 + * presentation forms each fit into a single UChar. 1.38 + * With UTF-8, they occupy 2 or 3 bytes, and more than the ASCII 1.39 + * characters. 1.40 + */ 1.41 + 1.42 +/* 1.43 + * ### TODO in general for letter shaping: 1.44 + * - the letter shaping code is UTF-16-unaware; needs update 1.45 + * + especially invertBuffer()?! 1.46 + * - needs to handle the "Arabic Tail" that is used in some legacy codepages 1.47 + * as a glyph fragment of wide-glyph letters 1.48 + * + IBM Unicode conversion tables map it to U+200B (ZWSP) 1.49 + * + IBM Egypt has proposed to encode the tail in Unicode among Arabic Presentation Forms 1.50 + * + Unicode 3.2 added U+FE73 ARABIC TAIL FRAGMENT 1.51 + */ 1.52 + 1.53 +/* definitions for Arabic letter shaping ------------------------------------ */ 1.54 + 1.55 +#define IRRELEVANT 4 1.56 +#define LAMTYPE 16 1.57 +#define ALEFTYPE 32 1.58 +#define LINKR 1 1.59 +#define LINKL 2 1.60 +#define APRESENT 8 1.61 +#define SHADDA 64 1.62 +#define CSHADDA 128 1.63 +#define COMBINE (SHADDA+CSHADDA) 1.64 + 1.65 +#define HAMZAFE_CHAR 0xfe80 1.66 +#define HAMZA06_CHAR 0x0621 1.67 +#define YEH_HAMZA_CHAR 0x0626 1.68 +#define YEH_HAMZAFE_CHAR 0xFE89 1.69 +#define LAMALEF_SPACE_SUB 0xFFFF 1.70 +#define TASHKEEL_SPACE_SUB 0xFFFE 1.71 +#define NEW_TAIL_CHAR 0xFE73 1.72 +#define OLD_TAIL_CHAR 0x200B 1.73 +#define LAM_CHAR 0x0644 1.74 +#define SPACE_CHAR 0x0020 1.75 +#define SHADDA_CHAR 0xFE7C 1.76 +#define TATWEEL_CHAR 0x0640 1.77 +#define SHADDA_TATWEEL_CHAR 0xFE7D 1.78 +#define SHADDA06_CHAR 0x0651 1.79 + 1.80 +#define SHAPE_MODE 0 1.81 +#define DESHAPE_MODE 1 1.82 + 1.83 +struct uShapeVariables { 1.84 + UChar tailChar; 1.85 + uint32_t uShapeLamalefBegin; 1.86 + uint32_t uShapeLamalefEnd; 1.87 + uint32_t uShapeTashkeelBegin; 1.88 + uint32_t uShapeTashkeelEnd; 1.89 + int spacesRelativeToTextBeginEnd; 1.90 +}; 1.91 + 1.92 +static const uint8_t tailFamilyIsolatedFinal[] = { 1.93 + /* FEB1 */ 1, 1.94 + /* FEB2 */ 1, 1.95 + /* FEB3 */ 0, 1.96 + /* FEB4 */ 0, 1.97 + /* FEB5 */ 1, 1.98 + /* FEB6 */ 1, 1.99 + /* FEB7 */ 0, 1.100 + /* FEB8 */ 0, 1.101 + /* FEB9 */ 1, 1.102 + /* FEBA */ 1, 1.103 + /* FEBB */ 0, 1.104 + /* FEBC */ 0, 1.105 + /* FEBD */ 1, 1.106 + /* FEBE */ 1 1.107 +}; 1.108 + 1.109 +static const uint8_t tashkeelMedial[] = { 1.110 + /* FE70 */ 0, 1.111 + /* FE71 */ 1, 1.112 + /* FE72 */ 0, 1.113 + /* FE73 */ 0, 1.114 + /* FE74 */ 0, 1.115 + /* FE75 */ 0, 1.116 + /* FE76 */ 0, 1.117 + /* FE77 */ 1, 1.118 + /* FE78 */ 0, 1.119 + /* FE79 */ 1, 1.120 + /* FE7A */ 0, 1.121 + /* FE7B */ 1, 1.122 + /* FE7C */ 0, 1.123 + /* FE7D */ 1, 1.124 + /* FE7E */ 0, 1.125 + /* FE7F */ 1 1.126 +}; 1.127 + 1.128 +static const UChar yehHamzaToYeh[] = 1.129 +{ 1.130 +/* isolated*/ 0xFEEF, 1.131 +/* final */ 0xFEF0 1.132 +}; 1.133 + 1.134 +static const uint8_t IrrelevantPos[] = { 1.135 + 0x0, 0x2, 0x4, 0x6, 1.136 + 0x8, 0xA, 0xC, 0xE 1.137 +}; 1.138 + 1.139 + 1.140 +static const UChar convertLamAlef[] = 1.141 +{ 1.142 +/*FEF5*/ 0x0622, 1.143 +/*FEF6*/ 0x0622, 1.144 +/*FEF7*/ 0x0623, 1.145 +/*FEF8*/ 0x0623, 1.146 +/*FEF9*/ 0x0625, 1.147 +/*FEFA*/ 0x0625, 1.148 +/*FEFB*/ 0x0627, 1.149 +/*FEFC*/ 0x0627 1.150 +}; 1.151 + 1.152 +static const UChar araLink[178]= 1.153 +{ 1.154 + 1 + 32 + 256 * 0x11,/*0x0622*/ 1.155 + 1 + 32 + 256 * 0x13,/*0x0623*/ 1.156 + 1 + 256 * 0x15,/*0x0624*/ 1.157 + 1 + 32 + 256 * 0x17,/*0x0625*/ 1.158 + 1 + 2 + 256 * 0x19,/*0x0626*/ 1.159 + 1 + 32 + 256 * 0x1D,/*0x0627*/ 1.160 + 1 + 2 + 256 * 0x1F,/*0x0628*/ 1.161 + 1 + 256 * 0x23,/*0x0629*/ 1.162 + 1 + 2 + 256 * 0x25,/*0x062A*/ 1.163 + 1 + 2 + 256 * 0x29,/*0x062B*/ 1.164 + 1 + 2 + 256 * 0x2D,/*0x062C*/ 1.165 + 1 + 2 + 256 * 0x31,/*0x062D*/ 1.166 + 1 + 2 + 256 * 0x35,/*0x062E*/ 1.167 + 1 + 256 * 0x39,/*0x062F*/ 1.168 + 1 + 256 * 0x3B,/*0x0630*/ 1.169 + 1 + 256 * 0x3D,/*0x0631*/ 1.170 + 1 + 256 * 0x3F,/*0x0632*/ 1.171 + 1 + 2 + 256 * 0x41,/*0x0633*/ 1.172 + 1 + 2 + 256 * 0x45,/*0x0634*/ 1.173 + 1 + 2 + 256 * 0x49,/*0x0635*/ 1.174 + 1 + 2 + 256 * 0x4D,/*0x0636*/ 1.175 + 1 + 2 + 256 * 0x51,/*0x0637*/ 1.176 + 1 + 2 + 256 * 0x55,/*0x0638*/ 1.177 + 1 + 2 + 256 * 0x59,/*0x0639*/ 1.178 + 1 + 2 + 256 * 0x5D,/*0x063A*/ 1.179 + 0, 0, 0, 0, 0, /*0x063B-0x063F*/ 1.180 + 1 + 2, /*0x0640*/ 1.181 + 1 + 2 + 256 * 0x61,/*0x0641*/ 1.182 + 1 + 2 + 256 * 0x65,/*0x0642*/ 1.183 + 1 + 2 + 256 * 0x69,/*0x0643*/ 1.184 + 1 + 2 + 16 + 256 * 0x6D,/*0x0644*/ 1.185 + 1 + 2 + 256 * 0x71,/*0x0645*/ 1.186 + 1 + 2 + 256 * 0x75,/*0x0646*/ 1.187 + 1 + 2 + 256 * 0x79,/*0x0647*/ 1.188 + 1 + 256 * 0x7D,/*0x0648*/ 1.189 + 1 + 256 * 0x7F,/*0x0649*/ 1.190 + 1 + 2 + 256 * 0x81,/*0x064A*/ 1.191 + 4 + 256 * 1, /*0x064B*/ 1.192 + 4 + 128 + 256 * 1, /*0x064C*/ 1.193 + 4 + 128 + 256 * 1, /*0x064D*/ 1.194 + 4 + 128 + 256 * 1, /*0x064E*/ 1.195 + 4 + 128 + 256 * 1, /*0x064F*/ 1.196 + 4 + 128 + 256 * 1, /*0x0650*/ 1.197 + 4 + 64 + 256 * 3, /*0x0651*/ 1.198 + 4 + 256 * 1, /*0x0652*/ 1.199 + 4 + 256 * 7, /*0x0653*/ 1.200 + 4 + 256 * 8, /*0x0654*/ 1.201 + 4 + 256 * 8, /*0x0655*/ 1.202 + 4 + 256 * 1, /*0x0656*/ 1.203 + 0, 0, 0, 0, 0, /*0x0657-0x065B*/ 1.204 + 1 + 256 * 0x85,/*0x065C*/ 1.205 + 1 + 256 * 0x87,/*0x065D*/ 1.206 + 1 + 256 * 0x89,/*0x065E*/ 1.207 + 1 + 256 * 0x8B,/*0x065F*/ 1.208 + 0, 0, 0, 0, 0, /*0x0660-0x0664*/ 1.209 + 0, 0, 0, 0, 0, /*0x0665-0x0669*/ 1.210 + 0, 0, 0, 0, 0, 0, /*0x066A-0x066F*/ 1.211 + 4 + 256 * 6, /*0x0670*/ 1.212 + 1 + 8 + 256 * 0x00,/*0x0671*/ 1.213 + 1 + 32, /*0x0672*/ 1.214 + 1 + 32, /*0x0673*/ 1.215 + 0, /*0x0674*/ 1.216 + 1 + 32, /*0x0675*/ 1.217 + 1, 1, /*0x0676-0x0677*/ 1.218 + 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x0678-0x067D*/ 1.219 + 1+2+8+256 * 0x06, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x067E-0x0683*/ 1.220 + 1+2, 1+2, 1+2+8+256 * 0x2A, 1+2, /*0x0684-0x0687*/ 1.221 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*0x0688-0x0691*/ 1.222 + 1, 1, 1, 1, 1, 1, 1+8+256 * 0x3A, 1, /*0x0692-0x0699*/ 1.223 + 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/ 1.224 + 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/ 1.225 + 1+2, 1+2, 1+2, 1+2, 1+2, 1+2+8+256 * 0x3E, /*0x06A4-0x06AD*/ 1.226 + 1+2, 1+2, 1+2, 1+2, /*0x06A4-0x06AD*/ 1.227 + 1+2, 1+2+8+256 * 0x42, 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/ 1.228 + 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/ 1.229 + 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x06B8-0x06BF*/ 1.230 + 1+2, 1+2, /*0x06B8-0x06BF*/ 1.231 + 1, /*0x06C0*/ 1.232 + 1+2, /*0x06C1*/ 1.233 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*0x06C2-0x06CB*/ 1.234 + 1+2+8+256 * 0xAC, /*0x06CC*/ 1.235 + 1, /*0x06CD*/ 1.236 + 1+2, 1+2, 1+2, 1+2, /*0x06CE-0x06D1*/ 1.237 + 1, 1 /*0x06D2-0x06D3*/ 1.238 +}; 1.239 + 1.240 +static const uint8_t presALink[] = { 1.241 +/***********0*****1*****2*****3*****4*****5*****6*****7*****8*****9*****A*****B*****C*****D*****E*****F*/ 1.242 +/*FB5*/ 0, 1, 0, 0, 0, 0, 0, 1, 2,1 + 2, 0, 0, 0, 0, 0, 0, 1.243 +/*FB6*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.244 +/*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,1 + 2, 0, 0, 1.245 +/*FB8*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1.246 +/*FB9*/ 2,1 + 2, 0, 1, 2,1 + 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.247 +/*FBA*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.248 +/*FBB*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.249 +/*FBC*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.250 +/*FBD*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.251 +/*FBE*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.252 +/*FBF*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,1 + 2, 1.253 +/*FC0*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.254 +/*FC1*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.255 +/*FC2*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.256 +/*FC3*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.257 +/*FC4*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.258 +/*FC5*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 1.259 +/*FC6*/ 4, 4, 4 1.260 +}; 1.261 + 1.262 +static const uint8_t presBLink[]= 1.263 +{ 1.264 +/***********0*****1*****2*****3*****4*****5*****6*****7*****8*****9*****A*****B*****C*****D*****E*****F*/ 1.265 +/*FE7*/1 + 2,1 + 2,1 + 2, 0,1 + 2, 0,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2, 1.266 +/*FE8*/ 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2,1 + 2, 0, 1, 0, 1.267 +/*FE9*/ 1, 2,1 + 2, 0, 1, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2, 1.268 +/*FEA*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 0, 1, 0, 1, 0, 1.269 +/*FEB*/ 1, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2, 1.270 +/*FEC*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2, 1.271 +/*FED*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2, 1.272 +/*FEE*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 0, 1.273 +/*FEF*/ 1, 0, 1, 2,1 + 2, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0 1.274 +}; 1.275 + 1.276 +static const UChar convertFBto06[] = 1.277 +{ 1.278 +/***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/ 1.279 +/*FB5*/ 0x671, 0x671, 0, 0, 0, 0, 0x67E, 0x67E, 0x67E, 0x67E, 0, 0, 0, 0, 0, 0, 1.280 +/*FB6*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.281 +/*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x686, 0x686, 0x686, 0x686, 0, 0, 1.282 +/*FB8*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x698, 0x698, 0, 0, 0x6A9, 0x6A9, 1.283 +/*FB9*/ 0x6A9, 0x6A9, 0x6AF, 0x6AF, 0x6AF, 0x6AF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.284 +/*FBA*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.285 +/*FBB*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.286 +/*FBC*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.287 +/*FBD*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.288 +/*FBE*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.289 +/*FBF*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x6CC, 0x6CC, 0x6CC, 0x6CC 1.290 +}; 1.291 + 1.292 +static const UChar convertFEto06[] = 1.293 +{ 1.294 +/***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/ 1.295 +/*FE7*/ 0x64B, 0x64B, 0x64C, 0x64C, 0x64D, 0x64D, 0x64E, 0x64E, 0x64F, 0x64F, 0x650, 0x650, 0x651, 0x651, 0x652, 0x652, 1.296 +/*FE8*/ 0x621, 0x622, 0x622, 0x623, 0x623, 0x624, 0x624, 0x625, 0x625, 0x626, 0x626, 0x626, 0x626, 0x627, 0x627, 0x628, 1.297 +/*FE9*/ 0x628, 0x628, 0x628, 0x629, 0x629, 0x62A, 0x62A, 0x62A, 0x62A, 0x62B, 0x62B, 0x62B, 0x62B, 0x62C, 0x62C, 0x62C, 1.298 +/*FEA*/ 0x62C, 0x62D, 0x62D, 0x62D, 0x62D, 0x62E, 0x62E, 0x62E, 0x62E, 0x62F, 0x62F, 0x630, 0x630, 0x631, 0x631, 0x632, 1.299 +/*FEB*/ 0x632, 0x633, 0x633, 0x633, 0x633, 0x634, 0x634, 0x634, 0x634, 0x635, 0x635, 0x635, 0x635, 0x636, 0x636, 0x636, 1.300 +/*FEC*/ 0x636, 0x637, 0x637, 0x637, 0x637, 0x638, 0x638, 0x638, 0x638, 0x639, 0x639, 0x639, 0x639, 0x63A, 0x63A, 0x63A, 1.301 +/*FED*/ 0x63A, 0x641, 0x641, 0x641, 0x641, 0x642, 0x642, 0x642, 0x642, 0x643, 0x643, 0x643, 0x643, 0x644, 0x644, 0x644, 1.302 +/*FEE*/ 0x644, 0x645, 0x645, 0x645, 0x645, 0x646, 0x646, 0x646, 0x646, 0x647, 0x647, 0x647, 0x647, 0x648, 0x648, 0x649, 1.303 +/*FEF*/ 0x649, 0x64A, 0x64A, 0x64A, 0x64A, 0x65C, 0x65C, 0x65D, 0x65D, 0x65E, 0x65E, 0x65F, 0x65F 1.304 +}; 1.305 + 1.306 +static const uint8_t shapeTable[4][4][4]= 1.307 +{ 1.308 + { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,1} }, 1.309 + { {0,0,2,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} }, 1.310 + { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,3} }, 1.311 + { {0,0,1,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} } 1.312 +}; 1.313 + 1.314 +/* 1.315 + * This function shapes European digits to Arabic-Indic digits 1.316 + * in-place, writing over the input characters. 1.317 + * Since we know that we are only looking for BMP code points, 1.318 + * we can safely just work with code units (again, at least UTF-16). 1.319 + */ 1.320 +static void 1.321 +_shapeToArabicDigitsWithContext(UChar *s, int32_t length, 1.322 + UChar digitBase, 1.323 + UBool isLogical, UBool lastStrongWasAL) { 1.324 + const UBiDiProps *bdp; 1.325 + int32_t i; 1.326 + UChar c; 1.327 + 1.328 + bdp=ubidi_getSingleton(); 1.329 + digitBase-=0x30; 1.330 + 1.331 + /* the iteration direction depends on the type of input */ 1.332 + if(isLogical) { 1.333 + for(i=0; i<length; ++i) { 1.334 + c=s[i]; 1.335 + switch(ubidi_getClass(bdp, c)) { 1.336 + case U_LEFT_TO_RIGHT: /* L */ 1.337 + case U_RIGHT_TO_LEFT: /* R */ 1.338 + lastStrongWasAL=FALSE; 1.339 + break; 1.340 + case U_RIGHT_TO_LEFT_ARABIC: /* AL */ 1.341 + lastStrongWasAL=TRUE; 1.342 + break; 1.343 + case U_EUROPEAN_NUMBER: /* EN */ 1.344 + if(lastStrongWasAL && (uint32_t)(c-0x30)<10) { 1.345 + s[i]=(UChar)(digitBase+c); /* digitBase+(c-0x30) - digitBase was modified above */ 1.346 + } 1.347 + break; 1.348 + default : 1.349 + break; 1.350 + } 1.351 + } 1.352 + } else { 1.353 + for(i=length; i>0; /* pre-decrement in the body */) { 1.354 + c=s[--i]; 1.355 + switch(ubidi_getClass(bdp, c)) { 1.356 + case U_LEFT_TO_RIGHT: /* L */ 1.357 + case U_RIGHT_TO_LEFT: /* R */ 1.358 + lastStrongWasAL=FALSE; 1.359 + break; 1.360 + case U_RIGHT_TO_LEFT_ARABIC: /* AL */ 1.361 + lastStrongWasAL=TRUE; 1.362 + break; 1.363 + case U_EUROPEAN_NUMBER: /* EN */ 1.364 + if(lastStrongWasAL && (uint32_t)(c-0x30)<10) { 1.365 + s[i]=(UChar)(digitBase+c); /* digitBase+(c-0x30) - digitBase was modified above */ 1.366 + } 1.367 + break; 1.368 + default : 1.369 + break; 1.370 + } 1.371 + } 1.372 + } 1.373 +} 1.374 + 1.375 +/* 1.376 + *Name : invertBuffer 1.377 + *Function : This function inverts the buffer, it's used 1.378 + * in case the user specifies the buffer to be 1.379 + * U_SHAPE_TEXT_DIRECTION_LOGICAL 1.380 + */ 1.381 +static void 1.382 +invertBuffer(UChar *buffer, int32_t size, uint32_t /*options*/, int32_t lowlimit, int32_t highlimit) { 1.383 + UChar temp; 1.384 + int32_t i=0,j=0; 1.385 + for(i=lowlimit,j=size-highlimit-1;i<j;i++,j--) { 1.386 + temp = buffer[i]; 1.387 + buffer[i] = buffer[j]; 1.388 + buffer[j] = temp; 1.389 + } 1.390 +} 1.391 + 1.392 +/* 1.393 + *Name : changeLamAlef 1.394 + *Function : Converts the Alef characters into an equivalent 1.395 + * LamAlef location in the 0x06xx Range, this is an 1.396 + * intermediate stage in the operation of the program 1.397 + * later it'll be converted into the 0xFExx LamAlefs 1.398 + * in the shaping function. 1.399 + */ 1.400 +static inline UChar 1.401 +changeLamAlef(UChar ch) { 1.402 + switch(ch) { 1.403 + case 0x0622 : 1.404 + return 0x065C; 1.405 + case 0x0623 : 1.406 + return 0x065D; 1.407 + case 0x0625 : 1.408 + return 0x065E; 1.409 + case 0x0627 : 1.410 + return 0x065F; 1.411 + } 1.412 + return 0; 1.413 +} 1.414 + 1.415 +/* 1.416 + *Name : getLink 1.417 + *Function : Resolves the link between the characters as 1.418 + * Arabic characters have four forms : 1.419 + * Isolated, Initial, Middle and Final Form 1.420 + */ 1.421 +static UChar 1.422 +getLink(UChar ch) { 1.423 + if(ch >= 0x0622 && ch <= 0x06D3) { 1.424 + return(araLink[ch-0x0622]); 1.425 + } else if(ch == 0x200D) { 1.426 + return(3); 1.427 + } else if(ch >= 0x206D && ch <= 0x206F) { 1.428 + return(4); 1.429 + }else if(ch >= 0xFB50 && ch <= 0xFC62) { 1.430 + return(presALink[ch-0xFB50]); 1.431 + } else if(ch >= 0xFE70 && ch <= 0xFEFC) { 1.432 + return(presBLink[ch-0xFE70]); 1.433 + }else { 1.434 + return(0); 1.435 + } 1.436 +} 1.437 + 1.438 +/* 1.439 + *Name : countSpaces 1.440 + *Function : Counts the number of spaces 1.441 + * at each end of the logical buffer 1.442 + */ 1.443 +static void 1.444 +countSpaces(UChar *dest, int32_t size, uint32_t /*options*/, int32_t *spacesCountl, int32_t *spacesCountr) { 1.445 + int32_t i = 0; 1.446 + int32_t countl = 0,countr = 0; 1.447 + while((dest[i] == SPACE_CHAR) && (countl < size)) { 1.448 + countl++; 1.449 + i++; 1.450 + } 1.451 + if (countl < size) { /* the entire buffer is not all space */ 1.452 + while(dest[size-1] == SPACE_CHAR) { 1.453 + countr++; 1.454 + size--; 1.455 + } 1.456 + } 1.457 + *spacesCountl = countl; 1.458 + *spacesCountr = countr; 1.459 +} 1.460 + 1.461 +/* 1.462 + *Name : isTashkeelChar 1.463 + *Function : Returns 1 for Tashkeel characters in 06 range else return 0 1.464 + */ 1.465 +static inline int32_t 1.466 +isTashkeelChar(UChar ch) { 1.467 + return (int32_t)( ch>=0x064B && ch<= 0x0652 ); 1.468 +} 1.469 + 1.470 +/* 1.471 + *Name : isTashkeelCharFE 1.472 + *Function : Returns 1 for Tashkeel characters in FE range else return 0 1.473 + */ 1.474 +static inline int32_t 1.475 +isTashkeelCharFE(UChar ch) { 1.476 + return (int32_t)( ch>=0xFE70 && ch<= 0xFE7F ); 1.477 +} 1.478 + 1.479 +/* 1.480 + *Name : isAlefChar 1.481 + *Function : Returns 1 for Alef characters else return 0 1.482 + */ 1.483 +static inline int32_t 1.484 +isAlefChar(UChar ch) { 1.485 + return (int32_t)( (ch==0x0622)||(ch==0x0623)||(ch==0x0625)||(ch==0x0627) ); 1.486 +} 1.487 + 1.488 +/* 1.489 + *Name : isLamAlefChar 1.490 + *Function : Returns 1 for LamAlef characters else return 0 1.491 + */ 1.492 +static inline int32_t 1.493 +isLamAlefChar(UChar ch) { 1.494 + return (int32_t)((ch>=0xFEF5)&&(ch<=0xFEFC) ); 1.495 +} 1.496 + 1.497 +/*BIDI 1.498 + *Name : isTailChar 1.499 + *Function : returns 1 if the character matches one of the tail characters (0xfe73 or 0x200b) otherwise returns 0 1.500 + */ 1.501 + 1.502 +static inline int32_t 1.503 +isTailChar(UChar ch) { 1.504 + if(ch == OLD_TAIL_CHAR || ch == NEW_TAIL_CHAR){ 1.505 + return 1; 1.506 + }else{ 1.507 + return 0; 1.508 + } 1.509 +} 1.510 + 1.511 +/*BIDI 1.512 + *Name : isSeenTailFamilyChar 1.513 + *Function : returns 1 if the character is a seen family isolated character 1.514 + * in the FE range otherwise returns 0 1.515 + */ 1.516 + 1.517 +static inline int32_t 1.518 +isSeenTailFamilyChar(UChar ch) { 1.519 + if(ch >= 0xfeb1 && ch < 0xfebf){ 1.520 + return tailFamilyIsolatedFinal [ch - 0xFEB1]; 1.521 + }else{ 1.522 + return 0; 1.523 + } 1.524 +} 1.525 + 1.526 + /* Name : isSeenFamilyChar 1.527 + * Function : returns 1 if the character is a seen family character in the Unicode 1.528 + * 06 range otherwise returns 0 1.529 + */ 1.530 + 1.531 +static inline int32_t 1.532 +isSeenFamilyChar(UChar ch){ 1.533 + if(ch >= 0x633 && ch <= 0x636){ 1.534 + return 1; 1.535 + }else { 1.536 + return 0; 1.537 + } 1.538 +} 1.539 + 1.540 +/*Start of BIDI*/ 1.541 +/* 1.542 + *Name : isAlefMaksouraChar 1.543 + *Function : returns 1 if the character is a Alef Maksoura Final or isolated 1.544 + * otherwise returns 0 1.545 + */ 1.546 +static inline int32_t 1.547 +isAlefMaksouraChar(UChar ch) { 1.548 + return (int32_t)( (ch == 0xFEEF) || ( ch == 0xFEF0) || (ch == 0x0649)); 1.549 +} 1.550 + 1.551 +/* 1.552 + * Name : isYehHamzaChar 1.553 + * Function : returns 1 if the character is a yehHamza isolated or yehhamza 1.554 + * final is found otherwise returns 0 1.555 + */ 1.556 +static inline int32_t 1.557 +isYehHamzaChar(UChar ch) { 1.558 + if((ch==0xFE89)||(ch==0xFE8A)){ 1.559 + return 1; 1.560 + }else{ 1.561 + return 0; 1.562 + } 1.563 +} 1.564 + 1.565 + /* 1.566 + * Name: isTashkeelOnTatweelChar 1.567 + * Function: Checks if the Tashkeel Character is on Tatweel or not,if the 1.568 + * Tashkeel on tatweel (FE range), it returns 1 else if the 1.569 + * Tashkeel with shadda on tatweel (FC range)return 2 otherwise 1.570 + * returns 0 1.571 + */ 1.572 +static inline int32_t 1.573 +isTashkeelOnTatweelChar(UChar ch){ 1.574 + if(ch >= 0xfe70 && ch <= 0xfe7f && ch != NEW_TAIL_CHAR && ch != 0xFE75 && ch != SHADDA_TATWEEL_CHAR) 1.575 + { 1.576 + return tashkeelMedial [ch - 0xFE70]; 1.577 + }else if( (ch >= 0xfcf2 && ch <= 0xfcf4) || (ch == SHADDA_TATWEEL_CHAR)) { 1.578 + return 2; 1.579 + }else{ 1.580 + return 0; 1.581 + } 1.582 +} 1.583 + 1.584 +/* 1.585 + * Name: isIsolatedTashkeelChar 1.586 + * Function: Checks if the Tashkeel Character is in the isolated form 1.587 + * (i.e. Unicode FE range) returns 1 else if the Tashkeel 1.588 + * with shadda is in the isolated form (i.e. Unicode FC range) 1.589 + * returns 2 otherwise returns 0 1.590 + */ 1.591 +static inline int32_t 1.592 +isIsolatedTashkeelChar(UChar ch){ 1.593 + if(ch >= 0xfe70 && ch <= 0xfe7f && ch != NEW_TAIL_CHAR && ch != 0xFE75){ 1.594 + return (1 - tashkeelMedial [ch - 0xFE70]); 1.595 + }else if(ch >= 0xfc5e && ch <= 0xfc63){ 1.596 + return 1; 1.597 + }else{ 1.598 + return 0; 1.599 + } 1.600 +} 1.601 + 1.602 + 1.603 + 1.604 + 1.605 +/* 1.606 + *Name : calculateSize 1.607 + *Function : This function calculates the destSize to be used in preflighting 1.608 + * when the destSize is equal to 0 1.609 + * It is used also to calculate the new destsize in case the 1.610 + * destination buffer will be resized. 1.611 + */ 1.612 + 1.613 +static int32_t 1.614 +calculateSize(const UChar *source, int32_t sourceLength, 1.615 +int32_t destSize,uint32_t options) { 1.616 + int32_t i = 0; 1.617 + 1.618 + int lamAlefOption = 0; 1.619 + int tashkeelOption = 0; 1.620 + 1.621 + destSize = sourceLength; 1.622 + 1.623 + if (((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE || 1.624 + ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED )) && 1.625 + ((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE )){ 1.626 + lamAlefOption = 1; 1.627 + } 1.628 + if((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE && 1.629 + ((options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_RESIZE ) ){ 1.630 + tashkeelOption = 1; 1.631 + } 1.632 + 1.633 + if(lamAlefOption || tashkeelOption){ 1.634 + if((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_VISUAL_LTR) { 1.635 + for(i=0;i<sourceLength;i++) { 1.636 + if( ((isAlefChar(source[i]))&& (i<(sourceLength-1)) &&(source[i+1] == LAM_CHAR)) || (isTashkeelCharFE(source[i])) ) { 1.637 + destSize--; 1.638 + } 1.639 + } 1.640 + }else if((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL) { 1.641 + for(i=0;i<sourceLength;i++) { 1.642 + if( ( (source[i] == LAM_CHAR) && (i<(sourceLength-1)) && (isAlefChar(source[i+1]))) || (isTashkeelCharFE(source[i])) ) { 1.643 + destSize--; 1.644 + } 1.645 + } 1.646 + } 1.647 + } 1.648 + 1.649 + if ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_UNSHAPE){ 1.650 + if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE){ 1.651 + for(i=0;i<sourceLength;i++) { 1.652 + if(isLamAlefChar(source[i])) 1.653 + destSize++; 1.654 + } 1.655 + } 1.656 + } 1.657 + 1.658 + return destSize; 1.659 +} 1.660 + 1.661 +/* 1.662 + *Name : handleTashkeelWithTatweel 1.663 + *Function : Replaces Tashkeel as following: 1.664 + * Case 1 :if the Tashkeel on tatweel, replace it with Tatweel. 1.665 + * Case 2 :if the Tashkeel aggregated with Shadda on Tatweel, replace 1.666 + * it with Shadda on Tatweel. 1.667 + * Case 3: if the Tashkeel is isolated replace it with Space. 1.668 + * 1.669 + */ 1.670 +static int32_t 1.671 +handleTashkeelWithTatweel(UChar *dest, int32_t sourceLength, 1.672 + int32_t /*destSize*/, uint32_t /*options*/, 1.673 + UErrorCode * /*pErrorCode*/) { 1.674 + int i; 1.675 + for(i = 0; i < sourceLength; i++){ 1.676 + if((isTashkeelOnTatweelChar(dest[i]) == 1)){ 1.677 + dest[i] = TATWEEL_CHAR; 1.678 + }else if((isTashkeelOnTatweelChar(dest[i]) == 2)){ 1.679 + dest[i] = SHADDA_TATWEEL_CHAR; 1.680 + }else if(isIsolatedTashkeelChar(dest[i]) && dest[i] != SHADDA_CHAR){ 1.681 + dest[i] = SPACE_CHAR; 1.682 + } 1.683 + } 1.684 + return sourceLength; 1.685 +} 1.686 + 1.687 + 1.688 + 1.689 +/* 1.690 + *Name : handleGeneratedSpaces 1.691 + *Function : The shapeUnicode function converts Lam + Alef into LamAlef + space, 1.692 + * and Tashkeel to space. 1.693 + * handleGeneratedSpaces function puts these generated spaces 1.694 + * according to the options the user specifies. LamAlef and Tashkeel 1.695 + * spaces can be replaced at begin, at end, at near or decrease the 1.696 + * buffer size. 1.697 + * 1.698 + * There is also Auto option for LamAlef and tashkeel, which will put 1.699 + * the spaces at end of the buffer (or end of text if the user used 1.700 + * the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END). 1.701 + * 1.702 + * If the text type was visual_LTR and the option 1.703 + * U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected the END 1.704 + * option will place the space at the beginning of the buffer and 1.705 + * BEGIN will place the space at the end of the buffer. 1.706 + */ 1.707 + 1.708 +static int32_t 1.709 +handleGeneratedSpaces(UChar *dest, int32_t sourceLength, 1.710 + int32_t destSize, 1.711 + uint32_t options, 1.712 + UErrorCode *pErrorCode,struct uShapeVariables shapeVars ) { 1.713 + 1.714 + int32_t i = 0, j = 0; 1.715 + int32_t count = 0; 1.716 + UChar *tempbuffer=NULL; 1.717 + 1.718 + int lamAlefOption = 0; 1.719 + int tashkeelOption = 0; 1.720 + int shapingMode = SHAPE_MODE; 1.721 + 1.722 + if (shapingMode == 0){ 1.723 + if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE ){ 1.724 + lamAlefOption = 1; 1.725 + } 1.726 + if ( (options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_RESIZE ){ 1.727 + tashkeelOption = 1; 1.728 + } 1.729 + } 1.730 + 1.731 + tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR); 1.732 + /* Test for NULL */ 1.733 + if(tempbuffer == NULL) { 1.734 + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 1.735 + return 0; 1.736 + } 1.737 + 1.738 + 1.739 + if (lamAlefOption || tashkeelOption){ 1.740 + uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); 1.741 + 1.742 + i = j = 0; count = 0; 1.743 + while(i < sourceLength) { 1.744 + if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) || 1.745 + (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){ 1.746 + j--; 1.747 + count++; 1.748 + } else { 1.749 + tempbuffer[j] = dest[i]; 1.750 + } 1.751 + i++; 1.752 + j++; 1.753 + } 1.754 + 1.755 + while(count >= 0) { 1.756 + tempbuffer[i] = 0x0000; 1.757 + i--; 1.758 + count--; 1.759 + } 1.760 + 1.761 + uprv_memcpy(dest, tempbuffer, sourceLength*U_SIZEOF_UCHAR); 1.762 + destSize = u_strlen(dest); 1.763 + } 1.764 + 1.765 + lamAlefOption = 0; 1.766 + 1.767 + if (shapingMode == 0){ 1.768 + if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_NEAR ){ 1.769 + lamAlefOption = 1; 1.770 + } 1.771 + } 1.772 + 1.773 + if (lamAlefOption){ 1.774 + /* Lam+Alef is already shaped into LamAlef + FFFF */ 1.775 + i = 0; 1.776 + while(i < sourceLength) { 1.777 + if(lamAlefOption&&dest[i] == LAMALEF_SPACE_SUB){ 1.778 + dest[i] = SPACE_CHAR; 1.779 + } 1.780 + i++; 1.781 + } 1.782 + destSize = sourceLength; 1.783 + } 1.784 + lamAlefOption = 0; 1.785 + tashkeelOption = 0; 1.786 + 1.787 + if (shapingMode == 0) { 1.788 + if ( ((options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefBegin) || 1.789 + (((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO ) 1.790 + && (shapeVars.spacesRelativeToTextBeginEnd==1)) ) { 1.791 + lamAlefOption = 1; 1.792 + } 1.793 + if ( (options&U_SHAPE_TASHKEEL_MASK) == shapeVars.uShapeTashkeelBegin ) { 1.794 + tashkeelOption = 1; 1.795 + } 1.796 + } 1.797 + 1.798 + if(lamAlefOption || tashkeelOption){ 1.799 + uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); 1.800 + 1.801 + i = j = sourceLength; count = 0; 1.802 + 1.803 + while(i >= 0) { 1.804 + if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) || 1.805 + (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){ 1.806 + j++; 1.807 + count++; 1.808 + }else { 1.809 + tempbuffer[j] = dest[i]; 1.810 + } 1.811 + i--; 1.812 + j--; 1.813 + } 1.814 + 1.815 + for(i=0 ;i < count; i++){ 1.816 + tempbuffer[i] = SPACE_CHAR; 1.817 + } 1.818 + 1.819 + uprv_memcpy(dest, tempbuffer, sourceLength*U_SIZEOF_UCHAR); 1.820 + destSize = sourceLength; 1.821 + } 1.822 + 1.823 + 1.824 + 1.825 + lamAlefOption = 0; 1.826 + tashkeelOption = 0; 1.827 + 1.828 + if (shapingMode == 0) { 1.829 + if ( ((options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefEnd) || 1.830 + (((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO ) 1.831 + && (shapeVars.spacesRelativeToTextBeginEnd==0)) ) { 1.832 + lamAlefOption = 1; 1.833 + } 1.834 + if ( (options&U_SHAPE_TASHKEEL_MASK) == shapeVars.uShapeTashkeelEnd ){ 1.835 + tashkeelOption = 1; 1.836 + } 1.837 + } 1.838 + 1.839 + if(lamAlefOption || tashkeelOption){ 1.840 + uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); 1.841 + 1.842 + i = j = 0; count = 0; 1.843 + while(i < sourceLength) { 1.844 + if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) || 1.845 + (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){ 1.846 + j--; 1.847 + count++; 1.848 + }else { 1.849 + tempbuffer[j] = dest[i]; 1.850 + } 1.851 + i++; 1.852 + j++; 1.853 + } 1.854 + 1.855 + while(count >= 0) { 1.856 + tempbuffer[i] = SPACE_CHAR; 1.857 + i--; 1.858 + count--; 1.859 + } 1.860 + 1.861 + uprv_memcpy(dest,tempbuffer, sourceLength*U_SIZEOF_UCHAR); 1.862 + destSize = sourceLength; 1.863 + } 1.864 + 1.865 + 1.866 + if(tempbuffer){ 1.867 + uprv_free(tempbuffer); 1.868 + } 1.869 + 1.870 + return destSize; 1.871 +} 1.872 + 1.873 +/* 1.874 + *Name :expandCompositCharAtBegin 1.875 + *Function :Expands the LamAlef character to Lam and Alef consuming the required 1.876 + * space from beginning of the buffer. If the text type was visual_LTR 1.877 + * and the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected 1.878 + * the spaces will be located at end of buffer. 1.879 + * If there are no spaces to expand the LamAlef, an error 1.880 + * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h 1.881 + */ 1.882 + 1.883 +static int32_t 1.884 +expandCompositCharAtBegin(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode) { 1.885 + int32_t i = 0,j = 0; 1.886 + int32_t countl = 0; 1.887 + UChar *tempbuffer=NULL; 1.888 + 1.889 + tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR); 1.890 + 1.891 + /* Test for NULL */ 1.892 + if(tempbuffer == NULL) { 1.893 + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 1.894 + return 0; 1.895 + } 1.896 + 1.897 + uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); 1.898 + 1.899 + i = 0; 1.900 + while(dest[i] == SPACE_CHAR) { 1.901 + countl++; 1.902 + i++; 1.903 + } 1.904 + 1.905 + i = j = sourceLength-1; 1.906 + 1.907 + while(i >= 0 && j >= 0) { 1.908 + if( countl>0 && isLamAlefChar(dest[i])) { 1.909 + tempbuffer[j] = LAM_CHAR; 1.910 + /* to ensure the array index is within the range */ 1.911 + U_ASSERT(dest[i] >= 0xFEF5u 1.912 + && dest[i]-0xFEF5u < sizeof(convertLamAlef)/sizeof(convertLamAlef[0])); 1.913 + tempbuffer[j-1] = convertLamAlef[ dest[i] - 0xFEF5 ]; 1.914 + j--; 1.915 + countl--; 1.916 + }else { 1.917 + if( countl == 0 && isLamAlefChar(dest[i]) ) { 1.918 + *pErrorCode=U_NO_SPACE_AVAILABLE; 1.919 + } 1.920 + tempbuffer[j] = dest[i]; 1.921 + } 1.922 + i--; 1.923 + j--; 1.924 + } 1.925 + uprv_memcpy(dest, tempbuffer, sourceLength*U_SIZEOF_UCHAR); 1.926 + 1.927 + uprv_free(tempbuffer); 1.928 + 1.929 + destSize = sourceLength; 1.930 + return destSize; 1.931 +} 1.932 + 1.933 +/* 1.934 + *Name : expandCompositCharAtEnd 1.935 + *Function : Expands the LamAlef character to Lam and Alef consuming the 1.936 + * required space from end of the buffer. If the text type was 1.937 + * Visual LTR and the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END 1.938 + * was used, the spaces will be consumed from begin of buffer. If 1.939 + * there are no spaces to expand the LamAlef, an error 1.940 + * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h 1.941 + */ 1.942 + 1.943 +static int32_t 1.944 +expandCompositCharAtEnd(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode) { 1.945 + int32_t i = 0,j = 0; 1.946 + 1.947 + int32_t countr = 0; 1.948 + int32_t inpsize = sourceLength; 1.949 + 1.950 + UChar *tempbuffer=NULL; 1.951 + tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR); 1.952 + 1.953 + /* Test for NULL */ 1.954 + if(tempbuffer == NULL) { 1.955 + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 1.956 + return 0; 1.957 + } 1.958 + 1.959 + uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); 1.960 + 1.961 + while(dest[inpsize-1] == SPACE_CHAR) { 1.962 + countr++; 1.963 + inpsize--; 1.964 + } 1.965 + 1.966 + i = sourceLength - countr - 1; 1.967 + j = sourceLength - 1; 1.968 + 1.969 + while(i >= 0 && j >= 0) { 1.970 + if( countr>0 && isLamAlefChar(dest[i]) ) { 1.971 + tempbuffer[j] = LAM_CHAR; 1.972 + tempbuffer[j-1] = convertLamAlef[ dest[i] - 0xFEF5 ]; 1.973 + j--; 1.974 + countr--; 1.975 + }else { 1.976 + if ((countr == 0) && isLamAlefChar(dest[i]) ) { 1.977 + *pErrorCode=U_NO_SPACE_AVAILABLE; 1.978 + } 1.979 + tempbuffer[j] = dest[i]; 1.980 + } 1.981 + i--; 1.982 + j--; 1.983 + } 1.984 + 1.985 + if(countr > 0) { 1.986 + uprv_memmove(tempbuffer, tempbuffer+countr, sourceLength*U_SIZEOF_UCHAR); 1.987 + if(u_strlen(tempbuffer) < sourceLength) { 1.988 + for(i=sourceLength-1;i>=sourceLength-countr;i--) { 1.989 + tempbuffer[i] = SPACE_CHAR; 1.990 + } 1.991 + } 1.992 + } 1.993 + uprv_memcpy(dest, tempbuffer, sourceLength*U_SIZEOF_UCHAR); 1.994 + 1.995 + uprv_free(tempbuffer); 1.996 + 1.997 + destSize = sourceLength; 1.998 + return destSize; 1.999 +} 1.1000 + 1.1001 +/* 1.1002 + *Name : expandCompositCharAtNear 1.1003 + *Function : Expands the LamAlef character into Lam + Alef, YehHamza character 1.1004 + * into Yeh + Hamza, SeenFamily character into SeenFamily character 1.1005 + * + Tail, while consuming the space next to the character. 1.1006 + * If there are no spaces next to the character, an error 1.1007 + * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h 1.1008 + */ 1.1009 + 1.1010 +static int32_t 1.1011 +expandCompositCharAtNear(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode, 1.1012 + int yehHamzaOption, int seenTailOption, int lamAlefOption, struct uShapeVariables shapeVars) { 1.1013 + int32_t i = 0; 1.1014 + 1.1015 + 1.1016 + UChar lamalefChar, yehhamzaChar; 1.1017 + 1.1018 + for(i = 0 ;i<=sourceLength-1;i++) { 1.1019 + if (seenTailOption && isSeenTailFamilyChar(dest[i])) { 1.1020 + if ((i>0) && (dest[i-1] == SPACE_CHAR) ) { 1.1021 + dest[i-1] = shapeVars.tailChar; 1.1022 + }else { 1.1023 + *pErrorCode=U_NO_SPACE_AVAILABLE; 1.1024 + } 1.1025 + }else if(yehHamzaOption && (isYehHamzaChar(dest[i])) ) { 1.1026 + if ((i>0) && (dest[i-1] == SPACE_CHAR) ) { 1.1027 + yehhamzaChar = dest[i]; 1.1028 + dest[i] = yehHamzaToYeh[yehhamzaChar - YEH_HAMZAFE_CHAR]; 1.1029 + dest[i-1] = HAMZAFE_CHAR; 1.1030 + }else { 1.1031 + 1.1032 + *pErrorCode=U_NO_SPACE_AVAILABLE; 1.1033 + } 1.1034 + }else if(lamAlefOption && isLamAlefChar(dest[i+1])) { 1.1035 + if(dest[i] == SPACE_CHAR){ 1.1036 + lamalefChar = dest[i+1]; 1.1037 + dest[i+1] = LAM_CHAR; 1.1038 + dest[i] = convertLamAlef[ lamalefChar - 0xFEF5 ]; 1.1039 + }else { 1.1040 + *pErrorCode=U_NO_SPACE_AVAILABLE; 1.1041 + } 1.1042 + } 1.1043 + } 1.1044 + destSize = sourceLength; 1.1045 + return destSize; 1.1046 +} 1.1047 + /* 1.1048 + * Name : expandCompositChar 1.1049 + * Function : LamAlef, need special handling, since it expands from one 1.1050 + * character into two characters while shaping or deshaping. 1.1051 + * In order to expand it, near or far spaces according to the 1.1052 + * options user specifies. Also buffer size can be increased. 1.1053 + * 1.1054 + * For SeenFamily characters and YehHamza only the near option is 1.1055 + * supported, while for LamAlef we can take spaces from begin, end, 1.1056 + * near or even increase the buffer size. 1.1057 + * There is also the Auto option for LamAlef only, which will first 1.1058 + * search for a space at end, begin then near, respectively. 1.1059 + * If there are no spaces to expand these characters, an error will be set to 1.1060 + * U_NO_SPACE_AVAILABLE as defined in utypes.h 1.1061 + */ 1.1062 + 1.1063 +static int32_t 1.1064 +expandCompositChar(UChar *dest, int32_t sourceLength, 1.1065 + int32_t destSize,uint32_t options, 1.1066 + UErrorCode *pErrorCode, int shapingMode,struct uShapeVariables shapeVars) { 1.1067 + 1.1068 + int32_t i = 0,j = 0; 1.1069 + 1.1070 + UChar *tempbuffer=NULL; 1.1071 + int yehHamzaOption = 0; 1.1072 + int seenTailOption = 0; 1.1073 + int lamAlefOption = 0; 1.1074 + 1.1075 + if (shapingMode == 1){ 1.1076 + if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO){ 1.1077 + 1.1078 + if(shapeVars.spacesRelativeToTextBeginEnd == 0) { 1.1079 + destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode); 1.1080 + 1.1081 + if(*pErrorCode == U_NO_SPACE_AVAILABLE) { 1.1082 + *pErrorCode = U_ZERO_ERROR; 1.1083 + destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode); 1.1084 + } 1.1085 + }else { 1.1086 + destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode); 1.1087 + 1.1088 + if(*pErrorCode == U_NO_SPACE_AVAILABLE) { 1.1089 + *pErrorCode = U_ZERO_ERROR; 1.1090 + destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode); 1.1091 + } 1.1092 + } 1.1093 + 1.1094 + if(*pErrorCode == U_NO_SPACE_AVAILABLE) { 1.1095 + *pErrorCode = U_ZERO_ERROR; 1.1096 + destSize = expandCompositCharAtNear(dest, sourceLength, destSize, pErrorCode, yehHamzaOption, 1.1097 + seenTailOption, 1,shapeVars); 1.1098 + } 1.1099 + } 1.1100 + } 1.1101 + 1.1102 + if (shapingMode == 1){ 1.1103 + if ( (options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefEnd){ 1.1104 + destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode); 1.1105 + } 1.1106 + } 1.1107 + 1.1108 + if (shapingMode == 1){ 1.1109 + if ( (options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefBegin){ 1.1110 + destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode); 1.1111 + } 1.1112 + } 1.1113 + 1.1114 + if (shapingMode == 0){ 1.1115 + if ((options&U_SHAPE_YEHHAMZA_MASK) == U_SHAPE_YEHHAMZA_TWOCELL_NEAR){ 1.1116 + yehHamzaOption = 1; 1.1117 + } 1.1118 + if ((options&U_SHAPE_SEEN_MASK) == U_SHAPE_SEEN_TWOCELL_NEAR){ 1.1119 + seenTailOption = 1; 1.1120 + } 1.1121 + } 1.1122 + if (shapingMode == 1) { 1.1123 + if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_NEAR) { 1.1124 + lamAlefOption = 1; 1.1125 + } 1.1126 + } 1.1127 + 1.1128 + 1.1129 + if (yehHamzaOption || seenTailOption || lamAlefOption){ 1.1130 + destSize = expandCompositCharAtNear(dest, sourceLength, destSize, pErrorCode, yehHamzaOption, 1.1131 + seenTailOption,lamAlefOption,shapeVars); 1.1132 + } 1.1133 + 1.1134 + 1.1135 + if (shapingMode == 1){ 1.1136 + if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE){ 1.1137 + destSize = calculateSize(dest,sourceLength,destSize,options); 1.1138 + tempbuffer = (UChar *)uprv_malloc((destSize+1)*U_SIZEOF_UCHAR); 1.1139 + 1.1140 + /* Test for NULL */ 1.1141 + if(tempbuffer == NULL) { 1.1142 + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 1.1143 + return 0; 1.1144 + } 1.1145 + 1.1146 + uprv_memset(tempbuffer, 0, (destSize+1)*U_SIZEOF_UCHAR); 1.1147 + 1.1148 + i = j = 0; 1.1149 + while(i < destSize && j < destSize) { 1.1150 + if(isLamAlefChar(dest[i]) ) { 1.1151 + tempbuffer[j] = convertLamAlef[ dest[i] - 0xFEF5 ]; 1.1152 + tempbuffer[j+1] = LAM_CHAR; 1.1153 + j++; 1.1154 + }else { 1.1155 + tempbuffer[j] = dest[i]; 1.1156 + } 1.1157 + i++; 1.1158 + j++; 1.1159 + } 1.1160 + 1.1161 + uprv_memcpy(dest, tempbuffer, destSize*U_SIZEOF_UCHAR); 1.1162 + } 1.1163 + } 1.1164 + 1.1165 + if(tempbuffer) { 1.1166 + uprv_free(tempbuffer); 1.1167 + } 1.1168 + return destSize; 1.1169 +} 1.1170 + 1.1171 +/* 1.1172 + *Name : shapeUnicode 1.1173 + *Function : Converts an Arabic Unicode buffer in 06xx Range into a shaped 1.1174 + * arabic Unicode buffer in FExx Range 1.1175 + */ 1.1176 +static int32_t 1.1177 +shapeUnicode(UChar *dest, int32_t sourceLength, 1.1178 + int32_t destSize,uint32_t options, 1.1179 + UErrorCode *pErrorCode, 1.1180 + int tashkeelFlag, struct uShapeVariables shapeVars) { 1.1181 + 1.1182 + int32_t i, iend; 1.1183 + int32_t step; 1.1184 + int32_t lastPos,Nx, Nw; 1.1185 + unsigned int Shape; 1.1186 + int32_t lamalef_found = 0; 1.1187 + int32_t seenfamFound = 0, yehhamzaFound =0, tashkeelFound = 0; 1.1188 + UChar prevLink = 0, lastLink = 0, currLink, nextLink = 0; 1.1189 + UChar wLamalef; 1.1190 + 1.1191 + /* 1.1192 + * Converts the input buffer from FExx Range into 06xx Range 1.1193 + * to make sure that all characters are in the 06xx range 1.1194 + * even the lamalef is converted to the special region in 1.1195 + * the 06xx range 1.1196 + */ 1.1197 + if ((options & U_SHAPE_PRESERVE_PRESENTATION_MASK) == U_SHAPE_PRESERVE_PRESENTATION_NOOP) { 1.1198 + for (i = 0; i < sourceLength; i++) { 1.1199 + UChar inputChar = dest[i]; 1.1200 + if ( (inputChar >= 0xFB50) && (inputChar <= 0xFBFF)) { 1.1201 + UChar c = convertFBto06 [ (inputChar - 0xFB50) ]; 1.1202 + if (c != 0) 1.1203 + dest[i] = c; 1.1204 + } else if ( (inputChar >= 0xFE70) && (inputChar <= 0xFEFC)) { 1.1205 + dest[i] = convertFEto06 [ (inputChar - 0xFE70) ] ; 1.1206 + } else { 1.1207 + dest[i] = inputChar ; 1.1208 + } 1.1209 + } 1.1210 + } 1.1211 + 1.1212 + 1.1213 + /* sets the index to the end of the buffer, together with the step point to -1 */ 1.1214 + i = sourceLength - 1; 1.1215 + iend = -1; 1.1216 + step = -1; 1.1217 + 1.1218 + /* 1.1219 + * This function resolves the link between the characters . 1.1220 + * Arabic characters have four forms : 1.1221 + * Isolated Form, Initial Form, Middle Form and Final Form 1.1222 + */ 1.1223 + currLink = getLink(dest[i]); 1.1224 + 1.1225 + lastPos = i; 1.1226 + Nx = -2, Nw = 0; 1.1227 + 1.1228 + while (i != iend) { 1.1229 + /* If high byte of currLink > 0 then more than one shape */ 1.1230 + if ((currLink & 0xFF00) > 0 || (getLink(dest[i]) & IRRELEVANT) != 0) { 1.1231 + Nw = i + step; 1.1232 + while (Nx < 0) { /* we need to know about next char */ 1.1233 + if(Nw == iend) { 1.1234 + nextLink = 0; 1.1235 + Nx = 3000; 1.1236 + } else { 1.1237 + nextLink = getLink(dest[Nw]); 1.1238 + if((nextLink & IRRELEVANT) == 0) { 1.1239 + Nx = Nw; 1.1240 + } else { 1.1241 + Nw = Nw + step; 1.1242 + } 1.1243 + } 1.1244 + } 1.1245 + 1.1246 + if ( ((currLink & ALEFTYPE) > 0) && ((lastLink & LAMTYPE) > 0) ) { 1.1247 + lamalef_found = 1; 1.1248 + wLamalef = changeLamAlef(dest[i]); /*get from 0x065C-0x065f */ 1.1249 + if ( wLamalef != 0) { 1.1250 + dest[i] = LAMALEF_SPACE_SUB; /* The default case is to drop the Alef and replace */ 1.1251 + dest[lastPos] =wLamalef; /* it by LAMALEF_SPACE_SUB which is the last character in the */ 1.1252 + i=lastPos; /* unicode private use area, this is done to make */ 1.1253 + } /* sure that removeLamAlefSpaces() handles only the */ 1.1254 + lastLink = prevLink; /* spaces generated during lamalef generation. */ 1.1255 + currLink = getLink(wLamalef); /* LAMALEF_SPACE_SUB is added here and is replaced by spaces */ 1.1256 + } /* in removeLamAlefSpaces() */ 1.1257 + 1.1258 + if ((i > 0) && (dest[i-1] == SPACE_CHAR)){ 1.1259 + if ( isSeenFamilyChar(dest[i])) { 1.1260 + seenfamFound = 1; 1.1261 + } else if (dest[i] == YEH_HAMZA_CHAR) { 1.1262 + yehhamzaFound = 1; 1.1263 + } 1.1264 + } 1.1265 + else if(i==0){ 1.1266 + if ( isSeenFamilyChar(dest[i])){ 1.1267 + seenfamFound = 1; 1.1268 + } else if (dest[i] == YEH_HAMZA_CHAR) { 1.1269 + yehhamzaFound = 1; 1.1270 + } 1.1271 + } 1.1272 + 1.1273 + /* 1.1274 + * get the proper shape according to link ability of neighbors 1.1275 + * and of character; depends on the order of the shapes 1.1276 + * (isolated, initial, middle, final) in the compatibility area 1.1277 + */ 1.1278 + Shape = shapeTable[nextLink & (LINKR + LINKL)] 1.1279 + [lastLink & (LINKR + LINKL)] 1.1280 + [currLink & (LINKR + LINKL)]; 1.1281 + 1.1282 + if ((currLink & (LINKR+LINKL)) == 1) { 1.1283 + Shape &= 1; 1.1284 + } else if(isTashkeelChar(dest[i])) { 1.1285 + if( (lastLink & LINKL) && (nextLink & LINKR) && (tashkeelFlag == 1) && 1.1286 + dest[i] != 0x064C && dest[i] != 0x064D ) 1.1287 + { 1.1288 + Shape = 1; 1.1289 + if( (nextLink&ALEFTYPE) == ALEFTYPE && (lastLink&LAMTYPE) == LAMTYPE ) { 1.1290 + Shape = 0; 1.1291 + } 1.1292 + } else if(tashkeelFlag == 2 && dest[i] == SHADDA06_CHAR){ 1.1293 + Shape = 1; 1.1294 + } else { 1.1295 + Shape = 0; 1.1296 + } 1.1297 + } 1.1298 + if ((dest[i] ^ 0x0600) < 0x100) { 1.1299 + if ( isTashkeelChar(dest[i]) ){ 1.1300 + if (tashkeelFlag == 2 && dest[i] != SHADDA06_CHAR){ 1.1301 + dest[i] = TASHKEEL_SPACE_SUB; 1.1302 + tashkeelFound = 1; 1.1303 + } else { 1.1304 + /* to ensure the array index is within the range */ 1.1305 + U_ASSERT(dest[i] >= 0x064Bu 1.1306 + && dest[i]-0x064Bu < sizeof(IrrelevantPos)/sizeof(IrrelevantPos[0])); 1.1307 + dest[i] = 0xFE70 + IrrelevantPos[(dest[i] - 0x064B)] + Shape; 1.1308 + } 1.1309 + }else if ((currLink & APRESENT) > 0) { 1.1310 + dest[i] = (UChar)(0xFB50 + (currLink >> 8) + Shape); 1.1311 + }else if ((currLink >> 8) > 0 && (currLink & IRRELEVANT) == 0) { 1.1312 + dest[i] = (UChar)(0xFE70 + (currLink >> 8) + Shape); 1.1313 + } 1.1314 + } 1.1315 + } 1.1316 + 1.1317 + /* move one notch forward */ 1.1318 + if ((currLink & IRRELEVANT) == 0) { 1.1319 + prevLink = lastLink; 1.1320 + lastLink = currLink; 1.1321 + lastPos = i; 1.1322 + } 1.1323 + 1.1324 + i = i + step; 1.1325 + if (i == Nx) { 1.1326 + currLink = nextLink; 1.1327 + Nx = -2; 1.1328 + } else if(i != iend) { 1.1329 + currLink = getLink(dest[i]); 1.1330 + } 1.1331 + } 1.1332 + destSize = sourceLength; 1.1333 + if ( (lamalef_found != 0 ) || (tashkeelFound != 0) ){ 1.1334 + destSize = handleGeneratedSpaces(dest,sourceLength,destSize,options,pErrorCode, shapeVars); 1.1335 + } 1.1336 + 1.1337 + if ( (seenfamFound != 0) || (yehhamzaFound != 0) ) { 1.1338 + destSize = expandCompositChar(dest, sourceLength,destSize,options,pErrorCode, SHAPE_MODE,shapeVars); 1.1339 + } 1.1340 + return destSize; 1.1341 +} 1.1342 + 1.1343 +/* 1.1344 + *Name : deShapeUnicode 1.1345 + *Function : Converts an Arabic Unicode buffer in FExx Range into unshaped 1.1346 + * arabic Unicode buffer in 06xx Range 1.1347 + */ 1.1348 +static int32_t 1.1349 +deShapeUnicode(UChar *dest, int32_t sourceLength, 1.1350 + int32_t destSize,uint32_t options, 1.1351 + UErrorCode *pErrorCode, struct uShapeVariables shapeVars) { 1.1352 + int32_t i = 0; 1.1353 + int32_t lamalef_found = 0; 1.1354 + int32_t yehHamzaComposeEnabled = 0; 1.1355 + int32_t seenComposeEnabled = 0; 1.1356 + 1.1357 + yehHamzaComposeEnabled = ((options&U_SHAPE_YEHHAMZA_MASK) == U_SHAPE_YEHHAMZA_TWOCELL_NEAR) ? 1 : 0; 1.1358 + seenComposeEnabled = ((options&U_SHAPE_SEEN_MASK) == U_SHAPE_SEEN_TWOCELL_NEAR)? 1 : 0; 1.1359 + 1.1360 + /* 1.1361 + *This for loop changes the buffer from the Unicode FE range to 1.1362 + *the Unicode 06 range 1.1363 + */ 1.1364 + 1.1365 + for(i = 0; i < sourceLength; i++) { 1.1366 + UChar inputChar = dest[i]; 1.1367 + if ( (inputChar >= 0xFB50) && (inputChar <= 0xFBFF)) { /* FBxx Arabic range */ 1.1368 + UChar c = convertFBto06 [ (inputChar - 0xFB50) ]; 1.1369 + if (c != 0) 1.1370 + dest[i] = c; 1.1371 + } else if( (yehHamzaComposeEnabled == 1) && ((inputChar == HAMZA06_CHAR) || (inputChar == HAMZAFE_CHAR)) 1.1372 + && (i < (sourceLength - 1)) && isAlefMaksouraChar(dest[i+1] )) { 1.1373 + dest[i] = SPACE_CHAR; 1.1374 + dest[i+1] = YEH_HAMZA_CHAR; 1.1375 + } else if ( (seenComposeEnabled == 1) && (isTailChar(inputChar)) && (i< (sourceLength - 1)) 1.1376 + && (isSeenTailFamilyChar(dest[i+1])) ) { 1.1377 + dest[i] = SPACE_CHAR; 1.1378 + } else if (( inputChar >= 0xFE70) && (inputChar <= 0xFEF4 )) { /* FExx Arabic range */ 1.1379 + dest[i] = convertFEto06 [ (inputChar - 0xFE70) ]; 1.1380 + } else { 1.1381 + dest[i] = inputChar ; 1.1382 + } 1.1383 + 1.1384 + if( isLamAlefChar(dest[i]) ) 1.1385 + lamalef_found = 1; 1.1386 + } 1.1387 + 1.1388 + destSize = sourceLength; 1.1389 + if (lamalef_found != 0){ 1.1390 + destSize = expandCompositChar(dest,sourceLength,destSize,options,pErrorCode,DESHAPE_MODE, shapeVars); 1.1391 + } 1.1392 + return destSize; 1.1393 +} 1.1394 + 1.1395 +/* 1.1396 + **************************************** 1.1397 + * u_shapeArabic 1.1398 + **************************************** 1.1399 + */ 1.1400 + 1.1401 +U_CAPI int32_t U_EXPORT2 1.1402 +u_shapeArabic(const UChar *source, int32_t sourceLength, 1.1403 + UChar *dest, int32_t destCapacity, 1.1404 + uint32_t options, 1.1405 + UErrorCode *pErrorCode) { 1.1406 + 1.1407 + int32_t destLength; 1.1408 + struct uShapeVariables shapeVars = { OLD_TAIL_CHAR,U_SHAPE_LAMALEF_BEGIN,U_SHAPE_LAMALEF_END,U_SHAPE_TASHKEEL_BEGIN,U_SHAPE_TASHKEEL_END,0}; 1.1409 + 1.1410 + /* usual error checking */ 1.1411 + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 1.1412 + return 0; 1.1413 + } 1.1414 + 1.1415 + /* make sure that no reserved options values are used; allow dest==NULL only for preflighting */ 1.1416 + if( source==NULL || sourceLength<-1 || (dest==NULL && destCapacity!=0) || destCapacity<0 || 1.1417 + (((options&U_SHAPE_TASHKEEL_MASK) > 0) && 1.1418 + ((options&U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) == U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) ) || 1.1419 + (((options&U_SHAPE_TASHKEEL_MASK) > 0) && 1.1420 + ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_UNSHAPE)) || 1.1421 + (options&U_SHAPE_DIGIT_TYPE_RESERVED)==U_SHAPE_DIGIT_TYPE_RESERVED || 1.1422 + (options&U_SHAPE_DIGITS_MASK)==U_SHAPE_DIGITS_RESERVED || 1.1423 + ((options&U_SHAPE_LAMALEF_MASK) != U_SHAPE_LAMALEF_RESIZE && 1.1424 + (options&U_SHAPE_AGGREGATE_TASHKEEL_MASK) != 0) || 1.1425 + ((options&U_SHAPE_AGGREGATE_TASHKEEL_MASK) == U_SHAPE_AGGREGATE_TASHKEEL && 1.1426 + (options&U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) != U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) 1.1427 + ) 1.1428 + { 1.1429 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.1430 + return 0; 1.1431 + } 1.1432 + /* Validate lamalef options */ 1.1433 + if(((options&U_SHAPE_LAMALEF_MASK) > 0)&& 1.1434 + !(((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_BEGIN) || 1.1435 + ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_END ) || 1.1436 + ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_RESIZE )|| 1.1437 + ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_AUTO) || 1.1438 + ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_NEAR))) 1.1439 + { 1.1440 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.1441 + return 0; 1.1442 + } 1.1443 + /* Validate Tashkeel options */ 1.1444 + if(((options&U_SHAPE_TASHKEEL_MASK) > 0)&& 1.1445 + !(((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_BEGIN) || 1.1446 + ((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_END ) 1.1447 + ||((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_RESIZE )|| 1.1448 + ((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL))) 1.1449 + { 1.1450 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.1451 + return 0; 1.1452 + } 1.1453 + /* determine the source length */ 1.1454 + if(sourceLength==-1) { 1.1455 + sourceLength=u_strlen(source); 1.1456 + } 1.1457 + if(sourceLength<=0) { 1.1458 + return u_terminateUChars(dest, destCapacity, 0, pErrorCode); 1.1459 + } 1.1460 + 1.1461 + /* check that source and destination do not overlap */ 1.1462 + if( dest!=NULL && 1.1463 + ((source<=dest && dest<source+sourceLength) || 1.1464 + (dest<=source && source<dest+destCapacity))) { 1.1465 + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1.1466 + return 0; 1.1467 + } 1.1468 + 1.1469 + /* Does Options contain the new Seen Tail Unicode code point option */ 1.1470 + if ( (options&U_SHAPE_TAIL_TYPE_MASK) == U_SHAPE_TAIL_NEW_UNICODE){ 1.1471 + shapeVars.tailChar = NEW_TAIL_CHAR; 1.1472 + }else { 1.1473 + shapeVars.tailChar = OLD_TAIL_CHAR; 1.1474 + } 1.1475 + 1.1476 + if((options&U_SHAPE_LETTERS_MASK)!=U_SHAPE_LETTERS_NOOP) { 1.1477 + UChar buffer[300]; 1.1478 + UChar *tempbuffer, *tempsource = NULL; 1.1479 + int32_t outputSize, spacesCountl=0, spacesCountr=0; 1.1480 + 1.1481 + if((options&U_SHAPE_AGGREGATE_TASHKEEL_MASK)>0) { 1.1482 + int32_t logical_order = (options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL; 1.1483 + int32_t aggregate_tashkeel = 1.1484 + (options&(U_SHAPE_AGGREGATE_TASHKEEL_MASK+U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED)) == 1.1485 + (U_SHAPE_AGGREGATE_TASHKEEL+U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED); 1.1486 + int step=logical_order?1:-1; 1.1487 + int j=logical_order?-1:2*sourceLength; 1.1488 + int i=logical_order?-1:sourceLength; 1.1489 + int end=logical_order?sourceLength:-1; 1.1490 + int aggregation_possible = 1; 1.1491 + UChar prev = 0; 1.1492 + UChar prevLink, currLink = 0; 1.1493 + int newSourceLength = 0; 1.1494 + tempsource = (UChar *)uprv_malloc(2*sourceLength*U_SIZEOF_UCHAR); 1.1495 + if(tempsource == NULL) { 1.1496 + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 1.1497 + return 0; 1.1498 + } 1.1499 + 1.1500 + while ((i+=step) != end) { 1.1501 + prevLink = currLink; 1.1502 + currLink = getLink(source[i]); 1.1503 + if (aggregate_tashkeel && ((prevLink|currLink)&COMBINE) == COMBINE && aggregation_possible) { 1.1504 + aggregation_possible = 0; 1.1505 + tempsource[j] = (prev<source[i]?prev:source[i])-0x064C+0xFC5E; 1.1506 + currLink = getLink(tempsource[j]); 1.1507 + } else { 1.1508 + aggregation_possible = 1; 1.1509 + tempsource[j+=step] = source[i]; 1.1510 + prev = source[i]; 1.1511 + newSourceLength++; 1.1512 + } 1.1513 + } 1.1514 + source = tempsource+(logical_order?0:j); 1.1515 + sourceLength = newSourceLength; 1.1516 + } 1.1517 + 1.1518 + /* calculate destination size */ 1.1519 + /* TODO: do we ever need to do this pure preflighting? */ 1.1520 + if(((options&U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_RESIZE) || 1.1521 + ((options&U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_RESIZE)) { 1.1522 + outputSize=calculateSize(source,sourceLength,destCapacity,options); 1.1523 + } else { 1.1524 + outputSize=sourceLength; 1.1525 + } 1.1526 + 1.1527 + if(outputSize>destCapacity) { 1.1528 + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1.1529 + if (tempsource != NULL) uprv_free(tempsource); 1.1530 + return outputSize; 1.1531 + } 1.1532 + 1.1533 + /* 1.1534 + * need a temporary buffer of size max(outputSize, sourceLength) 1.1535 + * because at first we copy source->temp 1.1536 + */ 1.1537 + if(sourceLength>outputSize) { 1.1538 + outputSize=sourceLength; 1.1539 + } 1.1540 + 1.1541 + /* Start of Arabic letter shaping part */ 1.1542 + if(outputSize<=LENGTHOF(buffer)) { 1.1543 + outputSize=LENGTHOF(buffer); 1.1544 + tempbuffer=buffer; 1.1545 + } else { 1.1546 + tempbuffer = (UChar *)uprv_malloc(outputSize*U_SIZEOF_UCHAR); 1.1547 + 1.1548 + /*Test for NULL*/ 1.1549 + if(tempbuffer == NULL) { 1.1550 + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 1.1551 + if (tempsource != NULL) uprv_free(tempsource); 1.1552 + return 0; 1.1553 + } 1.1554 + } 1.1555 + uprv_memcpy(tempbuffer, source, sourceLength*U_SIZEOF_UCHAR); 1.1556 + if (tempsource != NULL){ 1.1557 + uprv_free(tempsource); 1.1558 + } 1.1559 + 1.1560 + if(sourceLength<outputSize) { 1.1561 + uprv_memset(tempbuffer+sourceLength, 0, (outputSize-sourceLength)*U_SIZEOF_UCHAR); 1.1562 + } 1.1563 + 1.1564 + if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL) { 1.1565 + countSpaces(tempbuffer,sourceLength,options,&spacesCountl,&spacesCountr); 1.1566 + invertBuffer(tempbuffer,sourceLength,options,spacesCountl,spacesCountr); 1.1567 + } 1.1568 + 1.1569 + if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_VISUAL_LTR) { 1.1570 + if((options&U_SHAPE_SPACES_RELATIVE_TO_TEXT_MASK) == U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END) { 1.1571 + shapeVars.spacesRelativeToTextBeginEnd = 1; 1.1572 + shapeVars.uShapeLamalefBegin = U_SHAPE_LAMALEF_END; 1.1573 + shapeVars.uShapeLamalefEnd = U_SHAPE_LAMALEF_BEGIN; 1.1574 + shapeVars.uShapeTashkeelBegin = U_SHAPE_TASHKEEL_END; 1.1575 + shapeVars.uShapeTashkeelEnd = U_SHAPE_TASHKEEL_BEGIN; 1.1576 + } 1.1577 + } 1.1578 + 1.1579 + switch(options&U_SHAPE_LETTERS_MASK) { 1.1580 + case U_SHAPE_LETTERS_SHAPE : 1.1581 + if( (options&U_SHAPE_TASHKEEL_MASK)> 0 1.1582 + && ((options&U_SHAPE_TASHKEEL_MASK) !=U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL)) { 1.1583 + /* Call the shaping function with tashkeel flag == 2 for removal of tashkeel */ 1.1584 + destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,2,shapeVars); 1.1585 + }else { 1.1586 + /* default Call the shaping function with tashkeel flag == 1 */ 1.1587 + destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,1,shapeVars); 1.1588 + 1.1589 + /*After shaping text check if user wants to remove tashkeel and replace it with tatweel*/ 1.1590 + if( (options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL){ 1.1591 + destLength = handleTashkeelWithTatweel(tempbuffer,destLength,destCapacity,options,pErrorCode); 1.1592 + } 1.1593 + } 1.1594 + break; 1.1595 + case U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED : 1.1596 + /* Call the shaping function with tashkeel flag == 0 */ 1.1597 + destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,0,shapeVars); 1.1598 + break; 1.1599 + 1.1600 + case U_SHAPE_LETTERS_UNSHAPE : 1.1601 + /* Call the deshaping function */ 1.1602 + destLength = deShapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,shapeVars); 1.1603 + break; 1.1604 + default : 1.1605 + /* will never occur because of validity checks above */ 1.1606 + destLength = 0; 1.1607 + break; 1.1608 + } 1.1609 + 1.1610 + /* 1.1611 + * TODO: (markus 2002aug01) 1.1612 + * For as long as we always preflight the outputSize above 1.1613 + * we should U_ASSERT(outputSize==destLength) 1.1614 + * except for the adjustment above before the tempbuffer allocation 1.1615 + */ 1.1616 + 1.1617 + if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL) { 1.1618 + countSpaces(tempbuffer,destLength,options,&spacesCountl,&spacesCountr); 1.1619 + invertBuffer(tempbuffer,destLength,options,spacesCountl,spacesCountr); 1.1620 + } 1.1621 + uprv_memcpy(dest, tempbuffer, uprv_min(destLength, destCapacity)*U_SIZEOF_UCHAR); 1.1622 + 1.1623 + if(tempbuffer!=buffer) { 1.1624 + uprv_free(tempbuffer); 1.1625 + } 1.1626 + 1.1627 + if(destLength>destCapacity) { 1.1628 + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1.1629 + return destLength; 1.1630 + } 1.1631 + 1.1632 + /* End of Arabic letter shaping part */ 1.1633 + } else { 1.1634 + /* 1.1635 + * No letter shaping: 1.1636 + * just make sure the destination is large enough and copy the string. 1.1637 + */ 1.1638 + if(destCapacity<sourceLength) { 1.1639 + /* this catches preflighting, too */ 1.1640 + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1.1641 + return sourceLength; 1.1642 + } 1.1643 + uprv_memcpy(dest, source, sourceLength*U_SIZEOF_UCHAR); 1.1644 + destLength=sourceLength; 1.1645 + } 1.1646 + 1.1647 + /* 1.1648 + * Perform number shaping. 1.1649 + * With UTF-16 or UTF-32, the length of the string is constant. 1.1650 + * The easiest way to do this is to operate on the destination and 1.1651 + * "shape" the digits in-place. 1.1652 + */ 1.1653 + if((options&U_SHAPE_DIGITS_MASK)!=U_SHAPE_DIGITS_NOOP) { 1.1654 + UChar digitBase; 1.1655 + int32_t i; 1.1656 + 1.1657 + /* select the requested digit group */ 1.1658 + switch(options&U_SHAPE_DIGIT_TYPE_MASK) { 1.1659 + case U_SHAPE_DIGIT_TYPE_AN: 1.1660 + digitBase=0x660; /* Unicode: "Arabic-Indic digits" */ 1.1661 + break; 1.1662 + case U_SHAPE_DIGIT_TYPE_AN_EXTENDED: 1.1663 + digitBase=0x6f0; /* Unicode: "Eastern Arabic-Indic digits (Persian and Urdu)" */ 1.1664 + break; 1.1665 + default: 1.1666 + /* will never occur because of validity checks above */ 1.1667 + digitBase=0; 1.1668 + break; 1.1669 + } 1.1670 + 1.1671 + /* perform the requested operation */ 1.1672 + switch(options&U_SHAPE_DIGITS_MASK) { 1.1673 + case U_SHAPE_DIGITS_EN2AN: 1.1674 + /* add (digitBase-'0') to each European (ASCII) digit code point */ 1.1675 + digitBase-=0x30; 1.1676 + for(i=0; i<destLength; ++i) { 1.1677 + if(((uint32_t)dest[i]-0x30)<10) { 1.1678 + dest[i]+=digitBase; 1.1679 + } 1.1680 + } 1.1681 + break; 1.1682 + case U_SHAPE_DIGITS_AN2EN: 1.1683 + /* subtract (digitBase-'0') from each Arabic digit code point */ 1.1684 + for(i=0; i<destLength; ++i) { 1.1685 + if(((uint32_t)dest[i]-(uint32_t)digitBase)<10) { 1.1686 + dest[i]-=digitBase-0x30; 1.1687 + } 1.1688 + } 1.1689 + break; 1.1690 + case U_SHAPE_DIGITS_ALEN2AN_INIT_LR: 1.1691 + _shapeToArabicDigitsWithContext(dest, destLength, 1.1692 + digitBase, 1.1693 + (UBool)((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL), 1.1694 + FALSE); 1.1695 + break; 1.1696 + case U_SHAPE_DIGITS_ALEN2AN_INIT_AL: 1.1697 + _shapeToArabicDigitsWithContext(dest, destLength, 1.1698 + digitBase, 1.1699 + (UBool)((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL), 1.1700 + TRUE); 1.1701 + break; 1.1702 + default: 1.1703 + /* will never occur because of validity checks above */ 1.1704 + break; 1.1705 + } 1.1706 + } 1.1707 + 1.1708 + return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); 1.1709 +}