395 lines
12 KiB
Ruby
395 lines
12 KiB
Ruby
|
# -*- coding: utf-8 -*-
|
|||
|
|
|||
|
class TestGLibUnicode < Test::Unit::TestCase
|
|||
|
include GLibTestUtils
|
|||
|
|
|||
|
def test_gunicode_type
|
|||
|
assert_nothing_raised do
|
|||
|
GLib::Unicode::CONTROL
|
|||
|
end
|
|||
|
end
|
|||
|
|
|||
|
def test_gunicode_break_type
|
|||
|
assert_nothing_raised do
|
|||
|
GLib::Unicode::BREAK_MANDATORY
|
|||
|
end
|
|||
|
end
|
|||
|
|
|||
|
def test_unichar_alnum?
|
|||
|
assert(GLib::UniChar.alnum?(unichar("a")))
|
|||
|
assert(GLib::UniChar.alnum?(unichar("1")))
|
|||
|
assert(!GLib::UniChar.alnum?(unichar("!")))
|
|||
|
end
|
|||
|
|
|||
|
def test_unichar_alpha?
|
|||
|
assert(GLib::UniChar.alpha?(unichar("a")))
|
|||
|
assert(GLib::UniChar.alpha?(unichar("A")))
|
|||
|
assert(!GLib::UniChar.alpha?(unichar("1")))
|
|||
|
end
|
|||
|
|
|||
|
def test_unichar_cntrl?
|
|||
|
assert(GLib::UniChar.cntrl?(unichar("\t")))
|
|||
|
assert(!GLib::UniChar.cntrl?(unichar("\h")))
|
|||
|
assert(!GLib::UniChar.cntrl?(unichar("a")))
|
|||
|
assert(!GLib::UniChar.cntrl?(unichar("1")))
|
|||
|
end
|
|||
|
|
|||
|
def test_unichar_digit?
|
|||
|
assert(GLib::UniChar.digit?(unichar("1")))
|
|||
|
assert(!GLib::UniChar.digit?(unichar("a")))
|
|||
|
end
|
|||
|
|
|||
|
def test_unichar_graph?
|
|||
|
assert(GLib::UniChar.graph?(unichar("a")))
|
|||
|
assert(!GLib::UniChar.graph?(unichar(" ")))
|
|||
|
assert(!GLib::UniChar.graph?(unichar("\t")))
|
|||
|
end
|
|||
|
|
|||
|
def test_unichar_lower?
|
|||
|
assert(GLib::UniChar.lower?(unichar("a")))
|
|||
|
assert(!GLib::UniChar.lower?(unichar("A")))
|
|||
|
assert(!GLib::UniChar.lower?(unichar("1")))
|
|||
|
end
|
|||
|
|
|||
|
def test_unichar_print?
|
|||
|
assert(GLib::UniChar.print?(unichar("a")))
|
|||
|
assert(GLib::UniChar.print?(unichar(" ")))
|
|||
|
assert(!GLib::UniChar.print?(unichar("\t")))
|
|||
|
end
|
|||
|
|
|||
|
def test_unichar_punct?
|
|||
|
assert(GLib::UniChar.punct?(unichar(",")))
|
|||
|
assert(GLib::UniChar.punct?(unichar(".")))
|
|||
|
assert(!GLib::UniChar.punct?(unichar("a")))
|
|||
|
assert(!GLib::UniChar.punct?(unichar("\t")))
|
|||
|
end
|
|||
|
|
|||
|
def test_unichar_space?
|
|||
|
assert(GLib::UniChar.space?(unichar(" ")))
|
|||
|
assert(GLib::UniChar.space?(unichar("\t")))
|
|||
|
assert(GLib::UniChar.space?(unichar("\r")))
|
|||
|
assert(GLib::UniChar.space?(unichar("\n")))
|
|||
|
assert(!GLib::UniChar.space?(unichar("a")))
|
|||
|
end
|
|||
|
|
|||
|
def test_unichar_upper?
|
|||
|
assert(GLib::UniChar.upper?(unichar("A")))
|
|||
|
assert(!GLib::UniChar.upper?(unichar("a")))
|
|||
|
assert(!GLib::UniChar.upper?(unichar("1")))
|
|||
|
end
|
|||
|
|
|||
|
def test_unichar_xdigit?
|
|||
|
assert(GLib::UniChar.xdigit?(unichar("1")))
|
|||
|
assert(GLib::UniChar.xdigit?(unichar("a")))
|
|||
|
assert(GLib::UniChar.xdigit?(unichar("A")))
|
|||
|
assert(GLib::UniChar.xdigit?(unichar("F")))
|
|||
|
assert(!GLib::UniChar.xdigit?(unichar("X")))
|
|||
|
end
|
|||
|
|
|||
|
def test_unichar_title?
|
|||
|
end
|
|||
|
|
|||
|
def test_unichar_defined?
|
|||
|
end
|
|||
|
|
|||
|
def test_unichar_wide?
|
|||
|
assert(GLib::UniChar.wide?(unichar("あ")))
|
|||
|
assert(GLib::UniChar.wide?(unichar("A")))
|
|||
|
assert(!GLib::UniChar.wide?(unichar("a")))
|
|||
|
end
|
|||
|
|
|||
|
def test_unichar_wide_cjk?
|
|||
|
only_glib_version(2, 12, 0)
|
|||
|
assert(GLib::UniChar.wide_cjk?(unichar("あ")))
|
|||
|
assert(GLib::UniChar.wide_cjk?(0xD55C)) # HANGUL SYLLABLE HAN
|
|||
|
assert(!GLib::UniChar.wide_cjk?(unichar("a")))
|
|||
|
end
|
|||
|
|
|||
|
def test_unichar_to_upper
|
|||
|
assert_equal(unichar("A"), GLib::UniChar.to_upper(unichar("a")))
|
|||
|
assert_equal(unichar("A"), GLib::UniChar.to_upper(unichar("A")))
|
|||
|
assert_equal(unichar("*"), GLib::UniChar.to_title(unichar("*")))
|
|||
|
end
|
|||
|
|
|||
|
def test_unichar_to_lower
|
|||
|
assert_equal(unichar("a"), GLib::UniChar.to_lower(unichar("A")))
|
|||
|
assert_equal(unichar("a"), GLib::UniChar.to_lower(unichar("a")))
|
|||
|
assert_equal(unichar("*"), GLib::UniChar.to_title(unichar("*")))
|
|||
|
end
|
|||
|
|
|||
|
def test_unichar_to_title
|
|||
|
assert_equal(unichar("A"), GLib::UniChar.to_title(unichar("a")))
|
|||
|
assert_equal(unichar("A"), GLib::UniChar.to_title(unichar("A")))
|
|||
|
assert_equal(unichar("*"), GLib::UniChar.to_title(unichar("*")))
|
|||
|
end
|
|||
|
|
|||
|
def test_unichar_digit_value
|
|||
|
assert_equal(0, GLib::UniChar.digit_value(unichar("0")))
|
|||
|
assert_equal(9, GLib::UniChar.digit_value(unichar("9")))
|
|||
|
assert_equal(-1, GLib::UniChar.digit_value(unichar("a")))
|
|||
|
end
|
|||
|
|
|||
|
def test_unichar_xdigit_value
|
|||
|
assert_equal(0, GLib::UniChar.xdigit_value(unichar("0")))
|
|||
|
assert_equal(9, GLib::UniChar.xdigit_value(unichar("9")))
|
|||
|
assert_equal(10, GLib::UniChar.xdigit_value(unichar("a")))
|
|||
|
assert_equal(15, GLib::UniChar.xdigit_value(unichar("F")))
|
|||
|
assert_equal(-1, GLib::UniChar.xdigit_value(unichar("g")))
|
|||
|
end
|
|||
|
|
|||
|
def test_unichar_type
|
|||
|
assert_equal(GLib::Unicode::DECIMAL_NUMBER,
|
|||
|
GLib::UniChar.type(unichar("0")))
|
|||
|
assert_equal(GLib::Unicode::LOWERCASE_LETTER,
|
|||
|
GLib::UniChar.type(unichar("a")))
|
|||
|
assert_equal(GLib::Unicode::UPPERCASE_LETTER,
|
|||
|
GLib::UniChar.type(unichar("A")))
|
|||
|
end
|
|||
|
|
|||
|
def test_unichar_break_type
|
|||
|
assert_equal(GLib::Unicode::BREAK_HYPHEN,
|
|||
|
GLib::UniChar.break_type(unichar("-")))
|
|||
|
assert_equal(GLib::Unicode::BREAK_NUMERIC,
|
|||
|
GLib::UniChar.break_type(unichar("0")))
|
|||
|
end
|
|||
|
|
|||
|
def test_unicode_canonical_ordering
|
|||
|
original = [unichar("a"), 0x0308, 0x0323,
|
|||
|
unichar("e"), 0x0304, 0x0301, 0x0323].pack("U*")
|
|||
|
expected = [unichar("a"), 0x0323, 0x0308,
|
|||
|
unichar("e"), 0x0323, 0x0304, 0x0301].pack("U*")
|
|||
|
assert_equal(utf8_to_utf32(expected),
|
|||
|
GLib::Unicode.canonical_ordering(utf8_to_utf32(original)))
|
|||
|
end
|
|||
|
|
|||
|
def test_unicode_canonical_decomposition
|
|||
|
a_with_acute = 0x00E1
|
|||
|
expected = [unichar("a"), 0x0301].pack("U*")
|
|||
|
assert_equal(utf8_to_utf32(expected),
|
|||
|
GLib::Unicode.canonical_decomposition(a_with_acute))
|
|||
|
|
|||
|
hiragana_ga = 0x304C
|
|||
|
hiragana_ka = 0x304B
|
|||
|
expected = [hiragana_ka, 0x3099].pack("U*")
|
|||
|
assert_equal(utf8_to_utf32(expected),
|
|||
|
GLib::Unicode.canonical_decomposition(hiragana_ga))
|
|||
|
end
|
|||
|
|
|||
|
def test_unichar_get_mirror_char
|
|||
|
assert_equal(unichar("("), GLib::UniChar.get_mirror_char(unichar(")")))
|
|||
|
assert_equal(unichar(")"), GLib::UniChar.get_mirror_char(unichar("(")))
|
|||
|
assert_equal(unichar("x"), GLib::UniChar.get_mirror_char(unichar("x")))
|
|||
|
end
|
|||
|
|
|||
|
def test_unichar_get_script
|
|||
|
only_glib_version(2, 14, 0)
|
|||
|
assert_equal(GLib::Unicode::SCRIPT_HIRAGANA,
|
|||
|
GLib::UniChar.get_script(unichar("あ")))
|
|||
|
end
|
|||
|
|
|||
|
def test_utf8_get_char
|
|||
|
assert_equal(utf8_to_utf32("あ").unpack("L*")[0],
|
|||
|
GLib::UTF8.get_char("あ"))
|
|||
|
|
|||
|
assert_equal(utf8_to_utf32("あ").unpack("L*")[0],
|
|||
|
GLib::UTF8.get_char("あ", true))
|
|||
|
partial_input = "あ".unpack("c*")[0..-2].pack("c*")
|
|||
|
assert_equal(-2, GLib::UTF8.get_char(partial_input, true))
|
|||
|
invalid_input = "あ".unpack("c*")[2..-1].pack("c*")
|
|||
|
assert_equal(-1, GLib::UTF8.get_char(invalid_input, true))
|
|||
|
end
|
|||
|
|
|||
|
def test_utf8_size
|
|||
|
assert_equal(1, GLib::UTF8.size("あ"))
|
|||
|
assert_equal(2, GLib::UTF8.size("あい"))
|
|||
|
end
|
|||
|
|
|||
|
def test_utf8_reverse
|
|||
|
assert_equal("おえういあ", GLib::UTF8.reverse("あいうえお"))
|
|||
|
end
|
|||
|
|
|||
|
def test_utf8_validate
|
|||
|
assert(GLib::UTF8.validate("あ"))
|
|||
|
assert(!GLib::UTF8.validate(binary("あ")[1..-1]))
|
|||
|
end
|
|||
|
|
|||
|
def test_utf8_upcase
|
|||
|
assert_equal("ABCあいう", GLib::UTF8.upcase("aBcあいう"))
|
|||
|
end
|
|||
|
|
|||
|
def test_utf8_downcase
|
|||
|
assert_equal("abcあいう", GLib::UTF8.downcase("aBcあいう"))
|
|||
|
end
|
|||
|
|
|||
|
def test_utf8_casefold
|
|||
|
assert_equal(GLib::UTF8.casefold("AbCあいう"),
|
|||
|
GLib::UTF8.casefold("aBcあいう"))
|
|||
|
end
|
|||
|
|
|||
|
def test_utf8_normalize
|
|||
|
original = [0x00c1].pack("U*") # A with acute
|
|||
|
|
|||
|
nfd = [0x0041, 0x0301].pack("U*")
|
|||
|
assert_equal(nfd,
|
|||
|
GLib::UTF8.normalize(original, GLib::NormalizeMode::NFD))
|
|||
|
|
|||
|
nfc = [0x00c1].pack("U*")
|
|||
|
assert_equal(nfc,
|
|||
|
GLib::UTF8.normalize(original, GLib::NormalizeMode::NFC))
|
|||
|
|
|||
|
nfkd = [0x0041, 0x0301].pack("U*")
|
|||
|
assert_equal(nfkd,
|
|||
|
GLib::UTF8.normalize(original, GLib::NormalizeMode::NFKD))
|
|||
|
|
|||
|
nfkc = [0x00c1].pack("U*")
|
|||
|
assert_equal(nfkc,
|
|||
|
GLib::UTF8.normalize(original, GLib::NormalizeMode::NFKC))
|
|||
|
end
|
|||
|
|
|||
|
def test_utf8_collate
|
|||
|
only_glib_version(2, 16, 0)
|
|||
|
assert_operator(0, :>, GLib::UTF8.collate("あ", "い"))
|
|||
|
assert_operator(0, :<, GLib::UTF8.collate("い", "あ"))
|
|||
|
assert_equal(0, GLib::UTF8.collate("あ", "あ"))
|
|||
|
end
|
|||
|
|
|||
|
def test_utf8_collate_key
|
|||
|
only_glib_version(2, 16, 0)
|
|||
|
assert_operator(0, :>,
|
|||
|
GLib::UTF8.collate_key("あ") <=>
|
|||
|
GLib::UTF8.collate_key("い"))
|
|||
|
assert_operator(0, :<,
|
|||
|
GLib::UTF8.collate_key("い") <=>
|
|||
|
GLib::UTF8.collate_key("あ"))
|
|||
|
assert_equal(0,
|
|||
|
GLib::UTF8.collate_key("あ") <=>
|
|||
|
GLib::UTF8.collate_key("あ"))
|
|||
|
end
|
|||
|
|
|||
|
def test_utf8_collate_key_for_filename
|
|||
|
assert_equal(["event.c", "event.h", "eventgenerator.c"],
|
|||
|
["event.c", "eventgenerator.c", "event.h"].sort_by do |f|
|
|||
|
GLib::UTF8.collate_key(f, true)
|
|||
|
end)
|
|||
|
|
|||
|
assert_equal(["file1", "file5", "file10"],
|
|||
|
["file1", "file10", "file5"].sort_by do |f|
|
|||
|
GLib::UTF8.collate_key(f, true)
|
|||
|
end)
|
|||
|
end
|
|||
|
|
|||
|
def test_utf8_to_utf16
|
|||
|
assert_equal(utf8_to_utf16("あいうえお"),
|
|||
|
GLib::UTF8.to_utf16("あいうえお"))
|
|||
|
end
|
|||
|
|
|||
|
def test_utf8_to_ucs4
|
|||
|
assert_equal(utf8_to_utf32("あいうえお"),
|
|||
|
GLib::UTF8.to_ucs4("あいうえお"))
|
|||
|
|
|||
|
assert_raise(GLib::ConvertError) do
|
|||
|
GLib::UTF8.to_ucs4(binary("あいうえお")[1..-1])
|
|||
|
end
|
|||
|
assert_nothing_raised do
|
|||
|
GLib::UTF8.to_ucs4(binary("あいうえお")[1..-1], true)
|
|||
|
end
|
|||
|
end
|
|||
|
|
|||
|
def test_utf16_to_ucs4
|
|||
|
assert_equal(utf8_to_utf32("あいうえお"),
|
|||
|
GLib::UTF16.to_ucs4(utf8_to_utf16("あいうえお")))
|
|||
|
end
|
|||
|
|
|||
|
def test_utf16_to_utf8
|
|||
|
assert_equal("あいうえお",
|
|||
|
GLib::UTF16.to_utf8(utf8_to_utf16("あいうえお")))
|
|||
|
end
|
|||
|
|
|||
|
|
|||
|
def test_ucs4_to_utf16
|
|||
|
assert_equal(utf8_to_utf16("あいうえお"),
|
|||
|
GLib::UCS4.to_utf16(utf8_to_utf32("あいうえお")))
|
|||
|
|
|||
|
assert_raise(GLib::ConvertError) do
|
|||
|
GLib::UCS4.to_utf16(binary(utf8_to_utf32("あいうえお"))[1..-1])
|
|||
|
end
|
|||
|
end
|
|||
|
|
|||
|
def test_ucs4_to_utf8
|
|||
|
assert_equal("あいうえお",
|
|||
|
GLib::UCS4.to_utf8(utf8_to_utf32("あいうえお")))
|
|||
|
end
|
|||
|
|
|||
|
def test_unichar_to_utf8
|
|||
|
assert_equal("あ",
|
|||
|
GLib::UniChar.to_utf8(utf8_to_utf32("あ").unpack("L*")[0]))
|
|||
|
end
|
|||
|
|
|||
|
def test_unichar_combining_class
|
|||
|
only_glib_version(2, 14, 0)
|
|||
|
assert_equal(0, GLib::UniChar.combining_class(unichar("a")))
|
|||
|
assert_equal(230, GLib::UniChar.combining_class(unichar("́")))
|
|||
|
end
|
|||
|
|
|||
|
def test_unichar_mark?
|
|||
|
only_glib_version(2, 14, 0)
|
|||
|
assert(!GLib::UniChar.mark?(unichar("a")))
|
|||
|
assert(!GLib::UniChar.mark?(0x200E)) # LEFT-TO-RIGHT MARK
|
|||
|
assert(GLib::UniChar.mark?(0x1DC3)) # COMBINING SUSPENSION MARK
|
|||
|
end
|
|||
|
|
|||
|
def test_unichar_zero_width?
|
|||
|
only_glib_version(2, 14, 0)
|
|||
|
assert(!GLib::UniChar.zero_width?(unichar("a")))
|
|||
|
assert(GLib::UniChar.zero_width?(0x200B)) # ZERO WIDTH SPACE
|
|||
|
end
|
|||
|
|
|||
|
private
|
|||
|
def unichar(char)
|
|||
|
GLib::UTF8.get_char(char)
|
|||
|
end
|
|||
|
|
|||
|
def utf8_to_utf32(string)
|
|||
|
if string.respond_to?(:encode)
|
|||
|
if little_endian?
|
|||
|
string.encode("UTF-32LE")
|
|||
|
else
|
|||
|
string.encode("UTF-32BE")
|
|||
|
end
|
|||
|
else
|
|||
|
require_uconv
|
|||
|
Uconv.u8tou4(string)
|
|||
|
end
|
|||
|
end
|
|||
|
|
|||
|
def utf8_to_utf16(string)
|
|||
|
if string.respond_to?(:encode)
|
|||
|
if little_endian?
|
|||
|
string.encode("UTF-16LE")
|
|||
|
else
|
|||
|
string.encode("UTF-16BE")
|
|||
|
end
|
|||
|
else
|
|||
|
require_uconv
|
|||
|
Uconv.u8tou16(string)
|
|||
|
end
|
|||
|
end
|
|||
|
|
|||
|
def require_uconv
|
|||
|
require 'uconv'
|
|||
|
rescue LoadError
|
|||
|
omit("Need uconv to run this test.")
|
|||
|
end
|
|||
|
|
|||
|
def binary(string)
|
|||
|
if string.respond_to?(:force_encoding)
|
|||
|
string.force_encoding("ascii-8bit")
|
|||
|
end
|
|||
|
string
|
|||
|
end
|
|||
|
|
|||
|
def little_endian?
|
|||
|
[1].pack("v") == [1].pack("S")
|
|||
|
end
|
|||
|
end
|