#!/usr/bin/env ruby
def to_utf8(codepoint)
case codepoint
when (0..0x7F)
[codepoint]
when (0x80..0x7FF)
[
0b11000000 + (codepoint >> 6),
0b10000000 + (codepoint & 0b111111)
]
when (0x800..0xFFFF)
[
0b11100000 + (codepoint >> 12),
0b10000000 + ((codepoint >> 6) & 0b111111),
0b10000000 + (codepoint & 0b111111)
]
when (0x10000..0xFFFFF)
[
0b11110000 + (codepoint >> 18),
0b10000000 + ((codepoint >> 12) & 0b111111),
0b10000000 + ((codepoint >> 6) & 0b111111),
0b10000000 + (codepoint & 0b111111)
]
else
raise 'codepoint too large'
end
end
(0..0xFFFFF).each do |codepoint|
unless [codepoint].pack('U*').bytes == to_utf8(codepoint)
raise "WA
codepoint: #{codepoint}
to_utf8: #{to_utf8(codepoint)}
ruby native: #{[codepoint].pack('U*').bytes}
"
end
end
puts "success!"