diff options
Diffstat (limited to 'intl/uconv/util/ugen.c')
-rw-r--r-- | intl/uconv/util/ugen.c | 712 |
1 files changed, 712 insertions, 0 deletions
diff --git a/intl/uconv/util/ugen.c b/intl/uconv/util/ugen.c new file mode 100644 index 0000000000..9a11b9f397 --- /dev/null +++ b/intl/uconv/util/ugen.c @@ -0,0 +1,712 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include "unicpriv.h" +/*================================================================================= + +=================================================================================*/ +typedef int (*uSubGeneratorFunc) (uint16_t in, unsigned char* out); +/*================================================================================= + +=================================================================================*/ + +typedef int (*uGeneratorFunc) ( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); + +int uGenerate( + uScanClassID scanClass, + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); + +#define uSubGenerator(sub,in,out) (* m_subgenerator[sub])((in),(out)) + +int uCheckAndGenAlways1Byte( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); +int uCheckAndGenAlways2Byte( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); +int uCheckAndGenAlways2ByteShiftGR( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); +int uGenerateShift( + uShiftOutTable *shift, + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); +int uCheckAndGen2ByteGRPrefix8F( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); +int uCheckAndGen2ByteGRPrefix8EA2( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); + +int uCheckAndGen2ByteGRPrefix8EA3( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); + +int uCheckAndGen2ByteGRPrefix8EA4( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); + +int uCheckAndGen2ByteGRPrefix8EA5( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); + +int uCheckAndGen2ByteGRPrefix8EA6( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); + +int uCheckAndGen2ByteGRPrefix8EA7( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); +int uCnGAlways8BytesDecomposedHangul( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); + +int uCheckAndGenJohabHangul( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); + +int uCheckAndGenJohabSymbol( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); + + +int uCheckAndGen4BytesGB18030( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ); + +int uGenAlways2Byte( + uint16_t in, + unsigned char* out + ); +int uGenAlways2ByteShiftGR( + uint16_t in, + unsigned char* out + ); +int uGenAlways1Byte( + uint16_t in, + unsigned char* out + ); +int uGenAlways1BytePrefix8E( + uint16_t in, + unsigned char* out + ); +/*================================================================================= + +=================================================================================*/ +const uGeneratorFunc m_generator[uNumOfCharsetType] = +{ + uCheckAndGenAlways1Byte, + uCheckAndGenAlways2Byte, + uCheckAndGenAlways2ByteShiftGR, + uCheckAndGen2ByteGRPrefix8F, + uCheckAndGen2ByteGRPrefix8EA2, + uCheckAndGen2ByteGRPrefix8EA3, + uCheckAndGen2ByteGRPrefix8EA4, + uCheckAndGen2ByteGRPrefix8EA5, + uCheckAndGen2ByteGRPrefix8EA6, + uCheckAndGen2ByteGRPrefix8EA7, + uCnGAlways8BytesDecomposedHangul, + uCheckAndGenJohabHangul, + uCheckAndGenJohabSymbol, + uCheckAndGen4BytesGB18030, + uCheckAndGenAlways2Byte /* place-holder for GR128 */ +}; + +/*================================================================================= + +=================================================================================*/ + +const uSubGeneratorFunc m_subgenerator[uNumOfCharType] = +{ + uGenAlways1Byte, + uGenAlways2Byte, + uGenAlways2ByteShiftGR, + uGenAlways1BytePrefix8E +}; +/*================================================================================= + +=================================================================================*/ +int uGenerate( + uScanClassID scanClass, + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + return (* m_generator[scanClass]) (state,in,out,outbuflen,outlen); +} +/*================================================================================= + +=================================================================================*/ +int uGenAlways1Byte( + uint16_t in, + unsigned char* out + ) +{ + out[0] = (unsigned char)in; + return 1; +} + +/*================================================================================= + +=================================================================================*/ +int uGenAlways2Byte( + uint16_t in, + unsigned char* out + ) +{ + out[0] = (unsigned char)((in >> 8) & 0xff); + out[1] = (unsigned char)(in & 0xff); + return 1; +} +/*================================================================================= + +=================================================================================*/ +int uGenAlways2ByteShiftGR( + uint16_t in, + unsigned char* out + ) +{ + out[0] = (unsigned char)(((in >> 8) & 0xff) | 0x80); + out[1] = (unsigned char)((in & 0xff) | 0x80); + return 1; +} +/*================================================================================= + +=================================================================================*/ +int uGenAlways1BytePrefix8E( + uint16_t in, + unsigned char* out + ) +{ + out[0] = 0x8E; + out[1] = (unsigned char)(in & 0xff); + return 1; +} +/*================================================================================= + +=================================================================================*/ +int uCheckAndGenAlways1Byte( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + /* Don't check inlen. The caller should ensure it is larger than 0 */ + /* Oops, I don't agree. Code changed to check every time. [CATA] */ + if(outbuflen < 1) + return 0; + else + { + *outlen = 1; + out[0] = in & 0xff; + return 1; + } +} + +/*================================================================================= + +=================================================================================*/ +int uCheckAndGenAlways2Byte( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + if(outbuflen < 2) + return 0; + else + { + *outlen = 2; + out[0] = ((in >> 8 ) & 0xff); + out[1] = in & 0xff; + return 1; + } +} +/*================================================================================= + +=================================================================================*/ +int uCheckAndGenAlways2ByteShiftGR( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + if(outbuflen < 2) + return 0; + else + { + *outlen = 2; + out[0] = ((in >> 8 ) & 0xff) | 0x80; + out[1] = (in & 0xff) | 0x80; + return 1; + } +} +/*================================================================================= + +=================================================================================*/ +int uGenerateShift( + uShiftOutTable *shift, + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + int16_t i; + const uShiftOutCell* cell = &(shift->shiftcell[0]); + int16_t itemnum = shift->numOfItem; + unsigned char inH, inL; + inH = (in >> 8) & 0xff; + inL = (in & 0xff ); + for(i=0;i<itemnum;i++) + { + if( ( inL >= cell[i].shiftout_MinLB) && + ( inL <= cell[i].shiftout_MaxLB) && + ( inH >= cell[i].shiftout_MinHB) && + ( inH <= cell[i].shiftout_MaxHB) ) + { + if(outbuflen < cell[i].reserveLen) + { + return 0; + } + else + { + *outlen = cell[i].reserveLen; + return (uSubGenerator(cell[i].classID,in,out)); + } + } + } + return 0; +} +/*================================================================================= + +=================================================================================*/ +int uCheckAndGen2ByteGRPrefix8F(int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + if(outbuflen < 3) + return 0; + else + { + *outlen = 3; + out[0] = 0x8F; + out[1] = ((in >> 8 ) & 0xff) | 0x80; + out[2] = (in & 0xff) | 0x80; + return 1; + } +} +/*================================================================================= + +=================================================================================*/ +int uCheckAndGen2ByteGRPrefix8EA2(int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + if(outbuflen < 4) + return 0; + else + { + *outlen = 4; + out[0] = 0x8E; + out[1] = 0xA2; + out[2] = ((in >> 8 ) & 0xff) | 0x80; + out[3] = (in & 0xff) | 0x80; + return 1; + } +} + + +/*================================================================================= + +=================================================================================*/ +int uCheckAndGen2ByteGRPrefix8EA3(int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + if(outbuflen < 4) + return 0; + else + { + *outlen = 4; + out[0] = 0x8E; + out[1] = 0xA3; + out[2] = ((in >> 8 ) & 0xff) | 0x80; + out[3] = (in & 0xff) | 0x80; + return 1; + } +} +/*================================================================================= + +=================================================================================*/ +int uCheckAndGen2ByteGRPrefix8EA4(int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + if(outbuflen < 4) + return 0; + else + { + *outlen = 4; + out[0] = 0x8E; + out[1] = 0xA4; + out[2] = ((in >> 8 ) & 0xff) | 0x80; + out[3] = (in & 0xff) | 0x80; + return 1; + } +} +/*================================================================================= + +=================================================================================*/ +int uCheckAndGen2ByteGRPrefix8EA5(int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + if(outbuflen < 4) + return 0; + else + { + *outlen = 4; + out[0] = 0x8E; + out[1] = 0xA5; + out[2] = ((in >> 8 ) & 0xff) | 0x80; + out[3] = (in & 0xff) | 0x80; + return 1; + } +} +/*================================================================================= + +=================================================================================*/ +int uCheckAndGen2ByteGRPrefix8EA6(int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + if(outbuflen < 4) + return 0; + else + { + *outlen = 4; + out[0] = 0x8E; + out[1] = 0xA6; + out[2] = ((in >> 8 ) & 0xff) | 0x80; + out[3] = (in & 0xff) | 0x80; + return 1; + } +} +/*================================================================================= + +=================================================================================*/ +int uCheckAndGen2ByteGRPrefix8EA7(int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + if(outbuflen < 4) + return 0; + else + { + *outlen = 4; + out[0] = 0x8E; + out[1] = 0xA7; + out[2] = ((in >> 8 ) & 0xff) | 0x80; + out[3] = (in & 0xff) | 0x80; + return 1; + } +} +/*================================================================================= + +=================================================================================*/ +#define SBase 0xAC00 +#define LCount 19 +#define VCount 21 +#define TCount 28 +#define NCount (VCount * TCount) +/*================================================================================= + +=================================================================================*/ +int uCnGAlways8BytesDecomposedHangul( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + static const uint8_t lMap[LCount] = { + 0xa1, 0xa2, 0xa4, 0xa7, 0xa8, 0xa9, 0xb1, 0xb2, 0xb3, 0xb5, + 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe + }; + + static const uint8_t tMap[TCount] = { + 0xd4, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa9, 0xaa, + 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb4, 0xb5, + 0xb6, 0xb7, 0xb8, 0xba, 0xbb, 0xbc, 0xbd, 0xbe + }; + + uint16_t SIndex, LIndex, VIndex, TIndex; + + if(outbuflen < 8) + return 0; + + /* the following line are copy from Unicode 2.0 page 3-13 */ + /* item 1 of Hangul Syllabel Decomposition */ + SIndex = in - SBase; + + /* the following lines are copy from Unicode 2.0 page 3-14 */ + /* item 2 of Hangul Syllabel Decomposition w/ modification */ + LIndex = SIndex / NCount; + VIndex = (SIndex % NCount) / TCount; + TIndex = SIndex % TCount; + + /* + * A Hangul syllable not enumerated in KS X 1001 is represented + * by a sequence of 8 bytes beginning with Hangul-filler + * (0xA4D4 in EUC-KR and 0x2454 in ISO-2022-KR) followed by three + * Jamos (2 bytes each the first of which is 0xA4 in EUC-KR) making + * up the syllable. ref. KS X 1001:1998 Annex 3 + */ + *outlen = 8; + out[0] = out[2] = out[4] = out[6] = 0xa4; + out[1] = 0xd4; + out[3] = lMap[LIndex] ; + out[5] = (VIndex + 0xbf); + out[7] = tMap[TIndex]; + + return 1; +} + +int uCheckAndGenJohabHangul( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + if(outbuflen < 2) + return 0; + else + { + /* + See Table 4-45 (page 183) of CJKV Information Processing + for detail explanation of the following table. + */ + /* + static const uint8_t lMap[LCount] = { + 2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20 + }; + Therefore lMap[i] == i+2; + */ + + static const uint8_t vMap[VCount] = { + /* no 0,1,2 */ + 3,4,5,6,7, /* no 8,9 */ + 10,11,12,13,14,15, /* no 16,17 */ + 18,19,20,21,22,23, /* no 24,25 */ + 26,27,28,29 + }; + static const uint8_t tMap[TCount] = { + 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17, /* no 18 */ + 19,20,21,22,23,24,25,26,27,28,29 + }; + uint16_t SIndex, LIndex, VIndex, TIndex, ch; + /* the following line are copy from Unicode 2.0 page 3-13 */ + /* item 1 of Hangul Syllabel Decomposition */ + SIndex = in - SBase; + + /* the following lines are copy from Unicode 2.0 page 3-14 */ + /* item 2 of Hangul Syllabel Decomposition w/ modification */ + LIndex = SIndex / NCount; + VIndex = (SIndex % NCount) / TCount; + TIndex = SIndex % TCount; + + *outlen = 2; + ch = 0x8000 | + ((LIndex+2)<<10) | + (vMap[VIndex]<<5)| + tMap[TIndex]; + out[0] = (ch >> 8); + out[1] = ch & 0x00FF; +#if 0 + printf("Johab Hangul %x %x in=%x L=%d V=%d T=%d\n", out[0], out[1], in, LIndex, VIndex, TIndex); +#endif + return 1; + } +} +int uCheckAndGenJohabSymbol( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + if(outbuflen < 2) + return 0; + else + { + /* The following code are based on the Perl code listed under + * "ISO-2022-KR or EUC-KR to Johab Conversion" (page 1013) + * in the book "CJKV Information Processing" by + * Ken Lunde <lunde@adobe.com> + * + * sub convert2johab($) { # Convert ISO-2022-KR or EUC-KR to Johab + * my @euc = unpack("C*", $_[0]); + * my ($fe_off, $hi_off, $lo_off) = (0,0,1); + * my @out = (); + * while(($hi, $lo) = splice(@euc, 0, 2)) { + * $hi &= 127; $lo &= 127; + * $fe_off = 21 if $hi == 73; + * $fe_off = 34 if $hi == 126; + * ($hi_off, $lo_off) = ($lo_off, $hi_off) if ($hi <74 or $hi >125); + * push(@out, ((($hi+$hi_off) >> 1)+ ($hi <74 ? 200:187)- $fe_off), + * $lo + ((($hi+$lo_off) & 1) ? ($lo > 110 ? 34:16):128)); + * } + * return pack("C*", @out); + */ + + unsigned char fe_off = 0; + unsigned char hi_off = 0; + unsigned char lo_off = 1; + unsigned char hi = (in >> 8) & 0x7F; + unsigned char lo = in & 0x7F; + if(73 == hi) + fe_off = 21; + if(126 == hi) + fe_off = 34; + if( (hi < 74) || ( hi > 125) ) + { + hi_off = 1; + lo_off = 0; + } + *outlen = 2; + out[0] = ((hi+hi_off) >> 1) + ((hi<74) ? 200 : 187 ) - fe_off; + out[1] = lo + (((hi+lo_off) & 1) ? ((lo > 110) ? 34 : 16) : + 128); +#if 0 + printf("Johab Symbol %x %x in=%x\n", out[0], out[1], in); +#endif + return 1; + } +} +int uCheckAndGen4BytesGB18030( + int32_t* state, + uint16_t in, + unsigned char* out, + uint32_t outbuflen, + uint32_t* outlen + ) +{ + if(outbuflen < 4) + return 0; + out[0] = (in / (10*126*10)) + 0x81; + in %= (10*126*10); + out[1] = (in / (10*126)) + 0x30; + in %= (10*126); + out[2] = (in / (10)) + 0x81; + out[3] = (in % 10) + 0x30; + *outlen = 4; + return 1; +} |