matsuda
2/23/2011 - 9:10 AM

Rubyでの文字コードの扱い

Rubyでの文字コードの扱い

=begin

 チルダ問題

| \xE3\x80\x9C | 0x301c |
| \xEF\xBD\x9E | 0xff5e |
| \x7E | 0x7e |
| \xCB\x9C | 0x2dc |
| \xE2\x88\xBC | 0x223c |

=end

"\xE3\x80\x9C"
# => "〜" 

sprintf("%#x", "\xE3\x80\x9C".unpack("U")[0])
# => "0x301c" 

[0x301c].pack("U").each_byte.to_a.map{ |i| sprintf("%#x", i) }
# => ["0xe3", "0x80", "0x9c"] 


"\xEF\xBD\x9E"
# => "~" 

sprintf("%#x", "\xEF\xBD\x9E".unpack("U")[0])
# => "0xff5e" 

[0xff5e].pack("U").each_byte.to_a.map{ |i| sprintf("%#x", i) }
# => ["0xef", "0xbd", "0x9e"] 


"\x7E"
# => "~" 

sprintf("%#x", "\x7E".unpack("U")[0])
# => "0x7e" 

[0x7e].pack("U").each_byte.to_a.map{ |i| sprintf("%#x", i) }
# => ["0x7e"] 


"\xCB\x9C"
# => "˜" 

sprintf("%#x", "\xCB\x9C".unpack("U")[0])
# => "0x2dc" 

[0x2dc].pack("U").each_byte.to_a.map{ |i| sprintf("%#x", i) }
# => ["0xcb", "0x9c"] 


"\xE2\x88\xBC"
# => "∼" 

sprintf("%#x", "\xE2\x88\xBC".unpack("U")[0])
# => "0x223c" 

[0x223c].pack("U").each_byte.to_a.map{ |i| sprintf("%#x", i) }
# => ["0xe2", "0x88", "0xbc"] 


=begin

 ハイフン問題

| \xEF\xBC\x8D | 0xff0d |
| \xE2\x88\x92 | 0x2212 |
| \xE3\x83\xBC | 0x30fc |
| \xE2\x80\x90 | 0x2010 |
| \x2D | 0x2d |

=end


"\xEF\xBC\x8D"
# => "-" 

sprintf("%#x", "\xEF\xBC\x8D".unpack("U")[0])
# => "0xff0d" 

[0xff0d].pack("U").each_byte.to_a.map{ |i| sprintf("%#x", i) }
# => ["0xef", "0xbc", "0x8d"] 


"\xE2\x88\x92"
# => "−" 

sprintf("%#x", "\xE2\x88\x92".unpack("U")[0])
# => "0x2212" 

[0x2212].pack("U").each_byte.to_a.map{ |i| sprintf("%#x", i) }
# => ["0xe2", "0x88", "0x92"] 


"\xE3\x83\xBC"
# => "ー" 

sprintf("%#x", "\xE3\x83\xBC".unpack("U")[0])
# => "0x30fc" 

[0x30fc].pack("U").each_byte.to_a.map{ |i| sprintf("%#x", i) }
# => ["0xe3", "0x83", "0xbc"] 


"\xE2\x80\x90"
# => "‐" 

sprintf("%#x", "\xE2\x80\x90".unpack("U")[0])
# => "0x2010" 

[0x2010].pack("U").each_byte.to_a.map{ |i| sprintf("%#x", i) }
# => ["0xe2", "0x80", "0x90"] 


"\x2D"
# => "-" 

sprintf("%#x", "\x2D".unpack("U")[0])
# => "0x2d" 

[0x2d].pack("U").each_byte.to_a.map{ |i| sprintf("%#x", i) }
# => ["0x2d"]