summaryrefslogtreecommitdiff
path: root/intl/uconv/util/ugen.c
diff options
context:
space:
mode:
Diffstat (limited to 'intl/uconv/util/ugen.c')
-rw-r--r--intl/uconv/util/ugen.c712
1 files changed, 712 insertions, 0 deletions
diff --git a/intl/uconv/util/ugen.c b/intl/uconv/util/ugen.c
new file mode 100644
index 0000000000..9a11b9f397
--- /dev/null
+++ b/intl/uconv/util/ugen.c
@@ -0,0 +1,712 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include "unicpriv.h"
+/*=================================================================================
+
+=================================================================================*/
+typedef int (*uSubGeneratorFunc) (uint16_t in, unsigned char* out);
+/*=================================================================================
+
+=================================================================================*/
+
+typedef int (*uGeneratorFunc) (
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+
+int uGenerate(
+ uScanClassID scanClass,
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+
+#define uSubGenerator(sub,in,out) (* m_subgenerator[sub])((in),(out))
+
+int uCheckAndGenAlways1Byte(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+int uCheckAndGenAlways2Byte(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+int uCheckAndGenAlways2ByteShiftGR(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+int uGenerateShift(
+ uShiftOutTable *shift,
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+int uCheckAndGen2ByteGRPrefix8F(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+int uCheckAndGen2ByteGRPrefix8EA2(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+
+int uCheckAndGen2ByteGRPrefix8EA3(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+
+int uCheckAndGen2ByteGRPrefix8EA4(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+
+int uCheckAndGen2ByteGRPrefix8EA5(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+
+int uCheckAndGen2ByteGRPrefix8EA6(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+
+int uCheckAndGen2ByteGRPrefix8EA7(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+int uCnGAlways8BytesDecomposedHangul(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+
+int uCheckAndGenJohabHangul(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+
+int uCheckAndGenJohabSymbol(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+
+
+int uCheckAndGen4BytesGB18030(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ );
+
+int uGenAlways2Byte(
+ uint16_t in,
+ unsigned char* out
+ );
+int uGenAlways2ByteShiftGR(
+ uint16_t in,
+ unsigned char* out
+ );
+int uGenAlways1Byte(
+ uint16_t in,
+ unsigned char* out
+ );
+int uGenAlways1BytePrefix8E(
+ uint16_t in,
+ unsigned char* out
+ );
+/*=================================================================================
+
+=================================================================================*/
+const uGeneratorFunc m_generator[uNumOfCharsetType] =
+{
+ uCheckAndGenAlways1Byte,
+ uCheckAndGenAlways2Byte,
+ uCheckAndGenAlways2ByteShiftGR,
+ uCheckAndGen2ByteGRPrefix8F,
+ uCheckAndGen2ByteGRPrefix8EA2,
+ uCheckAndGen2ByteGRPrefix8EA3,
+ uCheckAndGen2ByteGRPrefix8EA4,
+ uCheckAndGen2ByteGRPrefix8EA5,
+ uCheckAndGen2ByteGRPrefix8EA6,
+ uCheckAndGen2ByteGRPrefix8EA7,
+ uCnGAlways8BytesDecomposedHangul,
+ uCheckAndGenJohabHangul,
+ uCheckAndGenJohabSymbol,
+ uCheckAndGen4BytesGB18030,
+ uCheckAndGenAlways2Byte /* place-holder for GR128 */
+};
+
+/*=================================================================================
+
+=================================================================================*/
+
+const uSubGeneratorFunc m_subgenerator[uNumOfCharType] =
+{
+ uGenAlways1Byte,
+ uGenAlways2Byte,
+ uGenAlways2ByteShiftGR,
+ uGenAlways1BytePrefix8E
+};
+/*=================================================================================
+
+=================================================================================*/
+int uGenerate(
+ uScanClassID scanClass,
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ return (* m_generator[scanClass]) (state,in,out,outbuflen,outlen);
+}
+/*=================================================================================
+
+=================================================================================*/
+int uGenAlways1Byte(
+ uint16_t in,
+ unsigned char* out
+ )
+{
+ out[0] = (unsigned char)in;
+ return 1;
+}
+
+/*=================================================================================
+
+=================================================================================*/
+int uGenAlways2Byte(
+ uint16_t in,
+ unsigned char* out
+ )
+{
+ out[0] = (unsigned char)((in >> 8) & 0xff);
+ out[1] = (unsigned char)(in & 0xff);
+ return 1;
+}
+/*=================================================================================
+
+=================================================================================*/
+int uGenAlways2ByteShiftGR(
+ uint16_t in,
+ unsigned char* out
+ )
+{
+ out[0] = (unsigned char)(((in >> 8) & 0xff) | 0x80);
+ out[1] = (unsigned char)((in & 0xff) | 0x80);
+ return 1;
+}
+/*=================================================================================
+
+=================================================================================*/
+int uGenAlways1BytePrefix8E(
+ uint16_t in,
+ unsigned char* out
+ )
+{
+ out[0] = 0x8E;
+ out[1] = (unsigned char)(in & 0xff);
+ return 1;
+}
+/*=================================================================================
+
+=================================================================================*/
+int uCheckAndGenAlways1Byte(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ /* Don't check inlen. The caller should ensure it is larger than 0 */
+ /* Oops, I don't agree. Code changed to check every time. [CATA] */
+ if(outbuflen < 1)
+ return 0;
+ else
+ {
+ *outlen = 1;
+ out[0] = in & 0xff;
+ return 1;
+ }
+}
+
+/*=================================================================================
+
+=================================================================================*/
+int uCheckAndGenAlways2Byte(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ if(outbuflen < 2)
+ return 0;
+ else
+ {
+ *outlen = 2;
+ out[0] = ((in >> 8 ) & 0xff);
+ out[1] = in & 0xff;
+ return 1;
+ }
+}
+/*=================================================================================
+
+=================================================================================*/
+int uCheckAndGenAlways2ByteShiftGR(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ if(outbuflen < 2)
+ return 0;
+ else
+ {
+ *outlen = 2;
+ out[0] = ((in >> 8 ) & 0xff) | 0x80;
+ out[1] = (in & 0xff) | 0x80;
+ return 1;
+ }
+}
+/*=================================================================================
+
+=================================================================================*/
+int uGenerateShift(
+ uShiftOutTable *shift,
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ int16_t i;
+ const uShiftOutCell* cell = &(shift->shiftcell[0]);
+ int16_t itemnum = shift->numOfItem;
+ unsigned char inH, inL;
+ inH = (in >> 8) & 0xff;
+ inL = (in & 0xff );
+ for(i=0;i<itemnum;i++)
+ {
+ if( ( inL >= cell[i].shiftout_MinLB) &&
+ ( inL <= cell[i].shiftout_MaxLB) &&
+ ( inH >= cell[i].shiftout_MinHB) &&
+ ( inH <= cell[i].shiftout_MaxHB) )
+ {
+ if(outbuflen < cell[i].reserveLen)
+ {
+ return 0;
+ }
+ else
+ {
+ *outlen = cell[i].reserveLen;
+ return (uSubGenerator(cell[i].classID,in,out));
+ }
+ }
+ }
+ return 0;
+}
+/*=================================================================================
+
+=================================================================================*/
+int uCheckAndGen2ByteGRPrefix8F(int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ if(outbuflen < 3)
+ return 0;
+ else
+ {
+ *outlen = 3;
+ out[0] = 0x8F;
+ out[1] = ((in >> 8 ) & 0xff) | 0x80;
+ out[2] = (in & 0xff) | 0x80;
+ return 1;
+ }
+}
+/*=================================================================================
+
+=================================================================================*/
+int uCheckAndGen2ByteGRPrefix8EA2(int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ if(outbuflen < 4)
+ return 0;
+ else
+ {
+ *outlen = 4;
+ out[0] = 0x8E;
+ out[1] = 0xA2;
+ out[2] = ((in >> 8 ) & 0xff) | 0x80;
+ out[3] = (in & 0xff) | 0x80;
+ return 1;
+ }
+}
+
+
+/*=================================================================================
+
+=================================================================================*/
+int uCheckAndGen2ByteGRPrefix8EA3(int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ if(outbuflen < 4)
+ return 0;
+ else
+ {
+ *outlen = 4;
+ out[0] = 0x8E;
+ out[1] = 0xA3;
+ out[2] = ((in >> 8 ) & 0xff) | 0x80;
+ out[3] = (in & 0xff) | 0x80;
+ return 1;
+ }
+}
+/*=================================================================================
+
+=================================================================================*/
+int uCheckAndGen2ByteGRPrefix8EA4(int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ if(outbuflen < 4)
+ return 0;
+ else
+ {
+ *outlen = 4;
+ out[0] = 0x8E;
+ out[1] = 0xA4;
+ out[2] = ((in >> 8 ) & 0xff) | 0x80;
+ out[3] = (in & 0xff) | 0x80;
+ return 1;
+ }
+}
+/*=================================================================================
+
+=================================================================================*/
+int uCheckAndGen2ByteGRPrefix8EA5(int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ if(outbuflen < 4)
+ return 0;
+ else
+ {
+ *outlen = 4;
+ out[0] = 0x8E;
+ out[1] = 0xA5;
+ out[2] = ((in >> 8 ) & 0xff) | 0x80;
+ out[3] = (in & 0xff) | 0x80;
+ return 1;
+ }
+}
+/*=================================================================================
+
+=================================================================================*/
+int uCheckAndGen2ByteGRPrefix8EA6(int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ if(outbuflen < 4)
+ return 0;
+ else
+ {
+ *outlen = 4;
+ out[0] = 0x8E;
+ out[1] = 0xA6;
+ out[2] = ((in >> 8 ) & 0xff) | 0x80;
+ out[3] = (in & 0xff) | 0x80;
+ return 1;
+ }
+}
+/*=================================================================================
+
+=================================================================================*/
+int uCheckAndGen2ByteGRPrefix8EA7(int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ if(outbuflen < 4)
+ return 0;
+ else
+ {
+ *outlen = 4;
+ out[0] = 0x8E;
+ out[1] = 0xA7;
+ out[2] = ((in >> 8 ) & 0xff) | 0x80;
+ out[3] = (in & 0xff) | 0x80;
+ return 1;
+ }
+}
+/*=================================================================================
+
+=================================================================================*/
+#define SBase 0xAC00
+#define LCount 19
+#define VCount 21
+#define TCount 28
+#define NCount (VCount * TCount)
+/*=================================================================================
+
+=================================================================================*/
+int uCnGAlways8BytesDecomposedHangul(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ static const uint8_t lMap[LCount] = {
+ 0xa1, 0xa2, 0xa4, 0xa7, 0xa8, 0xa9, 0xb1, 0xb2, 0xb3, 0xb5,
+ 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe
+ };
+
+ static const uint8_t tMap[TCount] = {
+ 0xd4, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa9, 0xaa,
+ 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb4, 0xb5,
+ 0xb6, 0xb7, 0xb8, 0xba, 0xbb, 0xbc, 0xbd, 0xbe
+ };
+
+ uint16_t SIndex, LIndex, VIndex, TIndex;
+
+ if(outbuflen < 8)
+ return 0;
+
+ /* the following line are copy from Unicode 2.0 page 3-13 */
+ /* item 1 of Hangul Syllabel Decomposition */
+ SIndex = in - SBase;
+
+ /* the following lines are copy from Unicode 2.0 page 3-14 */
+ /* item 2 of Hangul Syllabel Decomposition w/ modification */
+ LIndex = SIndex / NCount;
+ VIndex = (SIndex % NCount) / TCount;
+ TIndex = SIndex % TCount;
+
+ /*
+ * A Hangul syllable not enumerated in KS X 1001 is represented
+ * by a sequence of 8 bytes beginning with Hangul-filler
+ * (0xA4D4 in EUC-KR and 0x2454 in ISO-2022-KR) followed by three
+ * Jamos (2 bytes each the first of which is 0xA4 in EUC-KR) making
+ * up the syllable. ref. KS X 1001:1998 Annex 3
+ */
+ *outlen = 8;
+ out[0] = out[2] = out[4] = out[6] = 0xa4;
+ out[1] = 0xd4;
+ out[3] = lMap[LIndex] ;
+ out[5] = (VIndex + 0xbf);
+ out[7] = tMap[TIndex];
+
+ return 1;
+}
+
+int uCheckAndGenJohabHangul(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ if(outbuflen < 2)
+ return 0;
+ else
+ {
+ /*
+ See Table 4-45 (page 183) of CJKV Information Processing
+ for detail explanation of the following table.
+ */
+ /*
+ static const uint8_t lMap[LCount] = {
+ 2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
+ };
+ Therefore lMap[i] == i+2;
+ */
+
+ static const uint8_t vMap[VCount] = {
+ /* no 0,1,2 */
+ 3,4,5,6,7, /* no 8,9 */
+ 10,11,12,13,14,15, /* no 16,17 */
+ 18,19,20,21,22,23, /* no 24,25 */
+ 26,27,28,29
+ };
+ static const uint8_t tMap[TCount] = {
+ 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17, /* no 18 */
+ 19,20,21,22,23,24,25,26,27,28,29
+ };
+ uint16_t SIndex, LIndex, VIndex, TIndex, ch;
+ /* the following line are copy from Unicode 2.0 page 3-13 */
+ /* item 1 of Hangul Syllabel Decomposition */
+ SIndex = in - SBase;
+
+ /* the following lines are copy from Unicode 2.0 page 3-14 */
+ /* item 2 of Hangul Syllabel Decomposition w/ modification */
+ LIndex = SIndex / NCount;
+ VIndex = (SIndex % NCount) / TCount;
+ TIndex = SIndex % TCount;
+
+ *outlen = 2;
+ ch = 0x8000 |
+ ((LIndex+2)<<10) |
+ (vMap[VIndex]<<5)|
+ tMap[TIndex];
+ out[0] = (ch >> 8);
+ out[1] = ch & 0x00FF;
+#if 0
+ printf("Johab Hangul %x %x in=%x L=%d V=%d T=%d\n", out[0], out[1], in, LIndex, VIndex, TIndex);
+#endif
+ return 1;
+ }
+}
+int uCheckAndGenJohabSymbol(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ if(outbuflen < 2)
+ return 0;
+ else
+ {
+ /* The following code are based on the Perl code listed under
+ * "ISO-2022-KR or EUC-KR to Johab Conversion" (page 1013)
+ * in the book "CJKV Information Processing" by
+ * Ken Lunde <lunde@adobe.com>
+ *
+ * sub convert2johab($) { # Convert ISO-2022-KR or EUC-KR to Johab
+ * my @euc = unpack("C*", $_[0]);
+ * my ($fe_off, $hi_off, $lo_off) = (0,0,1);
+ * my @out = ();
+ * while(($hi, $lo) = splice(@euc, 0, 2)) {
+ * $hi &= 127; $lo &= 127;
+ * $fe_off = 21 if $hi == 73;
+ * $fe_off = 34 if $hi == 126;
+ * ($hi_off, $lo_off) = ($lo_off, $hi_off) if ($hi <74 or $hi >125);
+ * push(@out, ((($hi+$hi_off) >> 1)+ ($hi <74 ? 200:187)- $fe_off),
+ * $lo + ((($hi+$lo_off) & 1) ? ($lo > 110 ? 34:16):128));
+ * }
+ * return pack("C*", @out);
+ */
+
+ unsigned char fe_off = 0;
+ unsigned char hi_off = 0;
+ unsigned char lo_off = 1;
+ unsigned char hi = (in >> 8) & 0x7F;
+ unsigned char lo = in & 0x7F;
+ if(73 == hi)
+ fe_off = 21;
+ if(126 == hi)
+ fe_off = 34;
+ if( (hi < 74) || ( hi > 125) )
+ {
+ hi_off = 1;
+ lo_off = 0;
+ }
+ *outlen = 2;
+ out[0] = ((hi+hi_off) >> 1) + ((hi<74) ? 200 : 187 ) - fe_off;
+ out[1] = lo + (((hi+lo_off) & 1) ? ((lo > 110) ? 34 : 16) :
+ 128);
+#if 0
+ printf("Johab Symbol %x %x in=%x\n", out[0], out[1], in);
+#endif
+ return 1;
+ }
+}
+int uCheckAndGen4BytesGB18030(
+ int32_t* state,
+ uint16_t in,
+ unsigned char* out,
+ uint32_t outbuflen,
+ uint32_t* outlen
+ )
+{
+ if(outbuflen < 4)
+ return 0;
+ out[0] = (in / (10*126*10)) + 0x81;
+ in %= (10*126*10);
+ out[1] = (in / (10*126)) + 0x30;
+ in %= (10*126);
+ out[2] = (in / (10)) + 0x81;
+ out[3] = (in % 10) + 0x30;
+ *outlen = 4;
+ return 1;
+}