jweinst1
6/25/2019 - 6:37 AM

prints all the thai block of tis620 and unicode

prints all the thai block of tis620 and unicode

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

static size_t code_to_utf8(unsigned char* buffer, const unsigned int code)
{
    if (code <= 0x7F) {
        buffer[0] = code;
        return 1;
    }
    if (code <= 0x7FF) {
        buffer[0] = 0xC0 | (code >> 6);            /* 110xxxxx */
        buffer[1] = 0x80 | (code & 0x3F);          /* 10xxxxxx */
        return 2;
    }
    if (code <= 0xFFFF) {
        buffer[0] = 0xE0 | (code >> 12);           /* 1110xxxx */
        buffer[1] = 0x80 | ((code >> 6) & 0x3F);   /* 10xxxxxx */
        buffer[2] = 0x80 | (code & 0x3F);          /* 10xxxxxx */
        return 3;
    }
    if (code <= 0x10FFFF) {
        buffer[0] = 0xF0 | (code >> 18);           /* 11110xxx */
        buffer[1] = 0x80 | ((code >> 12) & 0x3F);  /* 10xxxxxx */
        buffer[2] = 0x80 | ((code >> 6) & 0x3F);   /* 10xxxxxx */
        buffer[3] = 0x80 | (code & 0x3F);          /* 10xxxxxx */
        return 4;
    }
    return 0;
}

static void print_utf8_and_bytes(const unsigned int code)
{
    size_t result;
    unsigned char buffer[4];
    buffer[3] = 0;
    result = code_to_utf8((unsigned char*)buffer, code);
    if(result == 3)
        printf("text: %s, code:%04x bytes: %u %u %u\n", (const char*)buffer, code, buffer[0], buffer[1], buffer[2]);
    else
        puts("Not a 3 byte utf8~~");
}

static void print_all_thai_utf8_bytes(void)
{
    unsigned start_val = 0x0e01;
    const unsigned end_Val = 0x0e5b;
    while(start_val <= end_Val) {
        if(start_val != 0x0e3b && start_val != 0x0e3c && start_val != 0x0e3d && start_val != 0x0e3e) {
            print_utf8_and_bytes(start_val);
        }
        start_val++;
    }
}

int main(void) {
    // first block
  print_all_thai_utf8_bytes();
  return 0;
}

/** Result ~~

text: ก, code:0e01 bytes: 224 184 129
text: ข, code:0e02 bytes: 224 184 130
text: ฃ, code:0e03 bytes: 224 184 131
text: ค, code:0e04 bytes: 224 184 132
text: ฅ, code:0e05 bytes: 224 184 133
text: ฆ, code:0e06 bytes: 224 184 134
text: ง, code:0e07 bytes: 224 184 135
text: จ, code:0e08 bytes: 224 184 136
text: ฉ, code:0e09 bytes: 224 184 137
text: ช, code:0e0a bytes: 224 184 138
text: ซ, code:0e0b bytes: 224 184 139
text: ฌ, code:0e0c bytes: 224 184 140
text: ญ, code:0e0d bytes: 224 184 141
text: ฎ, code:0e0e bytes: 224 184 142
text: ฏ, code:0e0f bytes: 224 184 143
text: ฐ, code:0e10 bytes: 224 184 144
text: ฑ, code:0e11 bytes: 224 184 145
text: ฒ, code:0e12 bytes: 224 184 146
text: ณ, code:0e13 bytes: 224 184 147
text: ด, code:0e14 bytes: 224 184 148
text: ต, code:0e15 bytes: 224 184 149
text: ถ, code:0e16 bytes: 224 184 150
text: ท, code:0e17 bytes: 224 184 151
text: ธ, code:0e18 bytes: 224 184 152
text: น, code:0e19 bytes: 224 184 153
text: บ, code:0e1a bytes: 224 184 154
text: ป, code:0e1b bytes: 224 184 155
text: ผ, code:0e1c bytes: 224 184 156
text: ฝ, code:0e1d bytes: 224 184 157
text: พ, code:0e1e bytes: 224 184 158
text: ฟ, code:0e1f bytes: 224 184 159
text: ภ, code:0e20 bytes: 224 184 160
text: ม, code:0e21 bytes: 224 184 161
text: ย, code:0e22 bytes: 224 184 162
text: ร, code:0e23 bytes: 224 184 163
text: ฤ, code:0e24 bytes: 224 184 164
text: ล, code:0e25 bytes: 224 184 165
text: ฦ, code:0e26 bytes: 224 184 166
text: ว, code:0e27 bytes: 224 184 167
text: ศ, code:0e28 bytes: 224 184 168
text: ษ, code:0e29 bytes: 224 184 169
text: ส, code:0e2a bytes: 224 184 170
text: ห, code:0e2b bytes: 224 184 171
text: ฬ, code:0e2c bytes: 224 184 172
text: อ, code:0e2d bytes: 224 184 173
text: ฮ, code:0e2e bytes: 224 184 174
text: ฯ, code:0e2f bytes: 224 184 175
text: ะ, code:0e30 bytes: 224 184 176
text: ั, code:0e31 bytes: 224 184 177
text: า, code:0e32 bytes: 224 184 178
text: ำ, code:0e33 bytes: 224 184 179
text: ิ, code:0e34 bytes: 224 184 180
text: ี, code:0e35 bytes: 224 184 181
text: ึ, code:0e36 bytes: 224 184 182
text: ื, code:0e37 bytes: 224 184 183
text: ุ, code:0e38 bytes: 224 184 184
text: ู, code:0e39 bytes: 224 184 185
text: ฺ, code:0e3a bytes: 224 184 186
text: ฿, code:0e3f bytes: 224 184 191
text: เ, code:0e40 bytes: 224 185 128
text: แ, code:0e41 bytes: 224 185 129
text: โ, code:0e42 bytes: 224 185 130
text: ใ, code:0e43 bytes: 224 185 131
text: ไ, code:0e44 bytes: 224 185 132
text: ๅ, code:0e45 bytes: 224 185 133
text: ๆ, code:0e46 bytes: 224 185 134
text: ็, code:0e47 bytes: 224 185 135
text: ่, code:0e48 bytes: 224 185 136
text: ้, code:0e49 bytes: 224 185 137
text: ๊, code:0e4a bytes: 224 185 138
text: ๋, code:0e4b bytes: 224 185 139
text: ์, code:0e4c bytes: 224 185 140
text: ํ, code:0e4d bytes: 224 185 141
text: ๎, code:0e4e bytes: 224 185 142
text: ๏, code:0e4f bytes: 224 185 143
text: ๐, code:0e50 bytes: 224 185 144
text: ๑, code:0e51 bytes: 224 185 145
text: ๒, code:0e52 bytes: 224 185 146
text: ๓, code:0e53 bytes: 224 185 147
text: ๔, code:0e54 bytes: 224 185 148
text: ๕, code:0e55 bytes: 224 185 149
text: ๖, code:0e56 bytes: 224 185 150
text: ๗, code:0e57 bytes: 224 185 151
text: ๘, code:0e58 bytes: 224 185 152
text: ๙, code:0e59 bytes: 224 185 153
text: ๚, code:0e5a bytes: 224 185 154
text: ๛, code:0e5b bytes: 224 185 155

 **/