Watson1978
2/28/2012 - 2:12 AM

MacRuby : utf8

MacRuby : utf8

# -*- coding: utf-8 -*-
require 'benchmark'

Benchmark.bm(20) do |x|
  str_utf8_long = "あいうえお" * 10000
  str_utf8_long.concat("\n\n\n")

  str_utf8_short = "あいうえお" * 10
  str_utf8_short.concat("\n\n\n")

  str_ascii = "abcdefgh" * 1000
  str_ascii.concat("\n\n\n")

  x.report "long utf-8 string" do
    1000.times do
      str_utf8_long.chomp
    end
  end

  x.report "short utf-8 string" do
    1000.times do
      str_utf8_short.chomp
    end
  end

  x.report "ascii string" do
    1000.times do
      str_ascii.chomp
    end
  end

end
= before
$ macruby bm_str_utf8.rb                                       
                          user     system      total        real
long utf-8 string     1.980000   0.080000   2.060000 (  1.958647)
short utf-8 string    0.010000   0.000000   0.010000 (  0.004149)
ascii string          0.010000   0.000000   0.010000 (  0.007358)

= after
$ DYLD_LIBRARY_PATH=. ./macruby -I./lib ~/tmp/bm/bm_str_utf8.rb
                          user     system      total        real
long utf-8 string     0.530000   0.050000   0.580000 (  0.570899)
short utf-8 string    0.000000   0.000000   0.000000 (  0.003513)
ascii string          0.000000   0.000000   0.000000 (  0.008428)
diff --git a/string.c b/string.c
index 222e4a0..f39cf83 100644
--- a/string.c
+++ b/string.c
@@ -28,6 +28,7 @@
 #include <unicode/unum.h>
 #include <unicode/utrans.h>
 #include <unicode/uchar.h>
+#include <unicode/utext.h>
 
 #define SET_CLASS(dst, src)						\
     do{									\
@@ -546,6 +547,12 @@ str_get_uchar(rb_str_t *self, long pos)
 	return c;
     }
 
+    UErrorCode status = U_ZERO_ERROR;
+    UText *ut = utext_openUTF8(NULL, self->bytes, self->length_in_bytes, &status);
+    UChar return_value = utext_char32At(ut, pos);
+    utext_close(ut);
+
+#if 0
     __block UChar return_value = 0;
     __block long i = 0;
     str_each_uchar32(self, ^(UChar32 c, long start_index, long char_len, bool *stop) {
@@ -572,6 +579,7 @@ str_get_uchar(rb_str_t *self, long pos)
 	    }
 	}
     });
+#endif
 
     return return_value;
 }