prints all the thai block of tis620 and unicode
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
static size_t code_to_utf8(unsigned char* buffer, const unsigned int code)
{
if (code <= 0x7F) {
buffer[0] = code;
return 1;
}
if (code <= 0x7FF) {
buffer[0] = 0xC0 | (code >> 6); /* 110xxxxx */
buffer[1] = 0x80 | (code & 0x3F); /* 10xxxxxx */
return 2;
}
if (code <= 0xFFFF) {
buffer[0] = 0xE0 | (code >> 12); /* 1110xxxx */
buffer[1] = 0x80 | ((code >> 6) & 0x3F); /* 10xxxxxx */
buffer[2] = 0x80 | (code & 0x3F); /* 10xxxxxx */
return 3;
}
if (code <= 0x10FFFF) {
buffer[0] = 0xF0 | (code >> 18); /* 11110xxx */
buffer[1] = 0x80 | ((code >> 12) & 0x3F); /* 10xxxxxx */
buffer[2] = 0x80 | ((code >> 6) & 0x3F); /* 10xxxxxx */
buffer[3] = 0x80 | (code & 0x3F); /* 10xxxxxx */
return 4;
}
return 0;
}
static void print_utf8_and_bytes(const unsigned int code)
{
size_t result;
unsigned char buffer[4];
buffer[3] = 0;
result = code_to_utf8((unsigned char*)buffer, code);
if(result == 3)
printf("text: %s, code:%04x bytes: %u %u %u\n", (const char*)buffer, code, buffer[0], buffer[1], buffer[2]);
else
puts("Not a 3 byte utf8~~");
}
static void print_all_thai_utf8_bytes(void)
{
unsigned start_val = 0x0e01;
const unsigned end_Val = 0x0e5b;
while(start_val <= end_Val) {
if(start_val != 0x0e3b && start_val != 0x0e3c && start_val != 0x0e3d && start_val != 0x0e3e) {
print_utf8_and_bytes(start_val);
}
start_val++;
}
}
int main(void) {
// first block
print_all_thai_utf8_bytes();
return 0;
}
/** Result ~~
text: ก, code:0e01 bytes: 224 184 129
text: ข, code:0e02 bytes: 224 184 130
text: ฃ, code:0e03 bytes: 224 184 131
text: ค, code:0e04 bytes: 224 184 132
text: ฅ, code:0e05 bytes: 224 184 133
text: ฆ, code:0e06 bytes: 224 184 134
text: ง, code:0e07 bytes: 224 184 135
text: จ, code:0e08 bytes: 224 184 136
text: ฉ, code:0e09 bytes: 224 184 137
text: ช, code:0e0a bytes: 224 184 138
text: ซ, code:0e0b bytes: 224 184 139
text: ฌ, code:0e0c bytes: 224 184 140
text: ญ, code:0e0d bytes: 224 184 141
text: ฎ, code:0e0e bytes: 224 184 142
text: ฏ, code:0e0f bytes: 224 184 143
text: ฐ, code:0e10 bytes: 224 184 144
text: ฑ, code:0e11 bytes: 224 184 145
text: ฒ, code:0e12 bytes: 224 184 146
text: ณ, code:0e13 bytes: 224 184 147
text: ด, code:0e14 bytes: 224 184 148
text: ต, code:0e15 bytes: 224 184 149
text: ถ, code:0e16 bytes: 224 184 150
text: ท, code:0e17 bytes: 224 184 151
text: ธ, code:0e18 bytes: 224 184 152
text: น, code:0e19 bytes: 224 184 153
text: บ, code:0e1a bytes: 224 184 154
text: ป, code:0e1b bytes: 224 184 155
text: ผ, code:0e1c bytes: 224 184 156
text: ฝ, code:0e1d bytes: 224 184 157
text: พ, code:0e1e bytes: 224 184 158
text: ฟ, code:0e1f bytes: 224 184 159
text: ภ, code:0e20 bytes: 224 184 160
text: ม, code:0e21 bytes: 224 184 161
text: ย, code:0e22 bytes: 224 184 162
text: ร, code:0e23 bytes: 224 184 163
text: ฤ, code:0e24 bytes: 224 184 164
text: ล, code:0e25 bytes: 224 184 165
text: ฦ, code:0e26 bytes: 224 184 166
text: ว, code:0e27 bytes: 224 184 167
text: ศ, code:0e28 bytes: 224 184 168
text: ษ, code:0e29 bytes: 224 184 169
text: ส, code:0e2a bytes: 224 184 170
text: ห, code:0e2b bytes: 224 184 171
text: ฬ, code:0e2c bytes: 224 184 172
text: อ, code:0e2d bytes: 224 184 173
text: ฮ, code:0e2e bytes: 224 184 174
text: ฯ, code:0e2f bytes: 224 184 175
text: ะ, code:0e30 bytes: 224 184 176
text: ั, code:0e31 bytes: 224 184 177
text: า, code:0e32 bytes: 224 184 178
text: ำ, code:0e33 bytes: 224 184 179
text: ิ, code:0e34 bytes: 224 184 180
text: ี, code:0e35 bytes: 224 184 181
text: ึ, code:0e36 bytes: 224 184 182
text: ื, code:0e37 bytes: 224 184 183
text: ุ, code:0e38 bytes: 224 184 184
text: ู, code:0e39 bytes: 224 184 185
text: ฺ, code:0e3a bytes: 224 184 186
text: ฿, code:0e3f bytes: 224 184 191
text: เ, code:0e40 bytes: 224 185 128
text: แ, code:0e41 bytes: 224 185 129
text: โ, code:0e42 bytes: 224 185 130
text: ใ, code:0e43 bytes: 224 185 131
text: ไ, code:0e44 bytes: 224 185 132
text: ๅ, code:0e45 bytes: 224 185 133
text: ๆ, code:0e46 bytes: 224 185 134
text: ็, code:0e47 bytes: 224 185 135
text: ่, code:0e48 bytes: 224 185 136
text: ้, code:0e49 bytes: 224 185 137
text: ๊, code:0e4a bytes: 224 185 138
text: ๋, code:0e4b bytes: 224 185 139
text: ์, code:0e4c bytes: 224 185 140
text: ํ, code:0e4d bytes: 224 185 141
text: ๎, code:0e4e bytes: 224 185 142
text: ๏, code:0e4f bytes: 224 185 143
text: ๐, code:0e50 bytes: 224 185 144
text: ๑, code:0e51 bytes: 224 185 145
text: ๒, code:0e52 bytes: 224 185 146
text: ๓, code:0e53 bytes: 224 185 147
text: ๔, code:0e54 bytes: 224 185 148
text: ๕, code:0e55 bytes: 224 185 149
text: ๖, code:0e56 bytes: 224 185 150
text: ๗, code:0e57 bytes: 224 185 151
text: ๘, code:0e58 bytes: 224 185 152
text: ๙, code:0e59 bytes: 224 185 153
text: ๚, code:0e5a bytes: 224 185 154
text: ๛, code:0e5b bytes: 224 185 155
**/