Rubyでの文字コードの扱い
=begin
チルダ問題
| \xE3\x80\x9C | 0x301c |
| \xEF\xBD\x9E | 0xff5e |
| \x7E | 0x7e |
| \xCB\x9C | 0x2dc |
| \xE2\x88\xBC | 0x223c |
=end
"\xE3\x80\x9C"
# => "〜"
sprintf("%#x", "\xE3\x80\x9C".unpack("U")[0])
# => "0x301c"
[0x301c].pack("U").each_byte.to_a.map{ |i| sprintf("%#x", i) }
# => ["0xe3", "0x80", "0x9c"]
"\xEF\xBD\x9E"
# => "~"
sprintf("%#x", "\xEF\xBD\x9E".unpack("U")[0])
# => "0xff5e"
[0xff5e].pack("U").each_byte.to_a.map{ |i| sprintf("%#x", i) }
# => ["0xef", "0xbd", "0x9e"]
"\x7E"
# => "~"
sprintf("%#x", "\x7E".unpack("U")[0])
# => "0x7e"
[0x7e].pack("U").each_byte.to_a.map{ |i| sprintf("%#x", i) }
# => ["0x7e"]
"\xCB\x9C"
# => "˜"
sprintf("%#x", "\xCB\x9C".unpack("U")[0])
# => "0x2dc"
[0x2dc].pack("U").each_byte.to_a.map{ |i| sprintf("%#x", i) }
# => ["0xcb", "0x9c"]
"\xE2\x88\xBC"
# => "∼"
sprintf("%#x", "\xE2\x88\xBC".unpack("U")[0])
# => "0x223c"
[0x223c].pack("U").each_byte.to_a.map{ |i| sprintf("%#x", i) }
# => ["0xe2", "0x88", "0xbc"]
=begin
ハイフン問題
| \xEF\xBC\x8D | 0xff0d |
| \xE2\x88\x92 | 0x2212 |
| \xE3\x83\xBC | 0x30fc |
| \xE2\x80\x90 | 0x2010 |
| \x2D | 0x2d |
=end
"\xEF\xBC\x8D"
# => "-"
sprintf("%#x", "\xEF\xBC\x8D".unpack("U")[0])
# => "0xff0d"
[0xff0d].pack("U").each_byte.to_a.map{ |i| sprintf("%#x", i) }
# => ["0xef", "0xbc", "0x8d"]
"\xE2\x88\x92"
# => "−"
sprintf("%#x", "\xE2\x88\x92".unpack("U")[0])
# => "0x2212"
[0x2212].pack("U").each_byte.to_a.map{ |i| sprintf("%#x", i) }
# => ["0xe2", "0x88", "0x92"]
"\xE3\x83\xBC"
# => "ー"
sprintf("%#x", "\xE3\x83\xBC".unpack("U")[0])
# => "0x30fc"
[0x30fc].pack("U").each_byte.to_a.map{ |i| sprintf("%#x", i) }
# => ["0xe3", "0x83", "0xbc"]
"\xE2\x80\x90"
# => "‐"
sprintf("%#x", "\xE2\x80\x90".unpack("U")[0])
# => "0x2010"
[0x2010].pack("U").each_byte.to_a.map{ |i| sprintf("%#x", i) }
# => ["0xe2", "0x80", "0x90"]
"\x2D"
# => "-"
sprintf("%#x", "\x2D".unpack("U")[0])
# => "0x2d"
[0x2d].pack("U").each_byte.to_a.map{ |i| sprintf("%#x", i) }
# => ["0x2d"]