Mercurial > hg > CbC > CbC_llvm
comparison unittests/Support/UnicodeTest.cpp @ 0:95c75e76d11b LLVM3.4
LLVM 3.4
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Thu, 12 Dec 2013 13:56:28 +0900 |
parents | |
children | c2174574ed3a |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:95c75e76d11b |
---|---|
1 //===- unittests/Support/UnicodeTest.cpp - Unicode.h tests ----------------===// | |
2 // | |
3 // The LLVM Compiler Infrastructure | |
4 // | |
5 // This file is distributed under the University of Illinois Open Source | |
6 // License. See LICENSE.TXT for details. | |
7 // | |
8 //===----------------------------------------------------------------------===// | |
9 | |
10 #include "llvm/Support/Unicode.h" | |
11 #include "gtest/gtest.h" | |
12 | |
13 namespace llvm { | |
14 namespace sys { | |
15 namespace unicode { | |
16 namespace { | |
17 | |
18 TEST(Unicode, columnWidthUTF8) { | |
19 EXPECT_EQ(0, columnWidthUTF8("")); | |
20 EXPECT_EQ(1, columnWidthUTF8(" ")); | |
21 EXPECT_EQ(1, columnWidthUTF8("a")); | |
22 EXPECT_EQ(1, columnWidthUTF8("~")); | |
23 | |
24 EXPECT_EQ(6, columnWidthUTF8("abcdef")); | |
25 | |
26 EXPECT_EQ(-1, columnWidthUTF8("\x01")); | |
27 EXPECT_EQ(-1, columnWidthUTF8("aaaaaaaaaa\x01")); | |
28 EXPECT_EQ(-1, columnWidthUTF8("\342\200\213")); // 200B ZERO WIDTH SPACE | |
29 | |
30 // 00AD SOFT HYPHEN is displayed on most terminals as a space or a dash. Some | |
31 // text editors display it only when a line is broken at it, some use it as a | |
32 // line-break hint, but don't display. We choose terminal-oriented | |
33 // interpretation. | |
34 EXPECT_EQ(1, columnWidthUTF8("\302\255")); | |
35 | |
36 EXPECT_EQ(0, columnWidthUTF8("\314\200")); // 0300 COMBINING GRAVE ACCENT | |
37 EXPECT_EQ(1, columnWidthUTF8("\340\270\201")); // 0E01 THAI CHARACTER KO KAI | |
38 EXPECT_EQ(2, columnWidthUTF8("\344\270\200")); // CJK UNIFIED IDEOGRAPH-4E00 | |
39 | |
40 EXPECT_EQ(4, columnWidthUTF8("\344\270\200\344\270\200")); | |
41 EXPECT_EQ(3, columnWidthUTF8("q\344\270\200")); | |
42 EXPECT_EQ(3, columnWidthUTF8("\314\200\340\270\201\344\270\200")); | |
43 | |
44 // Invalid UTF-8 strings, columnWidthUTF8 should error out. | |
45 EXPECT_EQ(-2, columnWidthUTF8("\344")); | |
46 EXPECT_EQ(-2, columnWidthUTF8("\344\270")); | |
47 EXPECT_EQ(-2, columnWidthUTF8("\344\270\033")); | |
48 EXPECT_EQ(-2, columnWidthUTF8("\344\270\300")); | |
49 EXPECT_EQ(-2, columnWidthUTF8("\377\366\355")); | |
50 | |
51 EXPECT_EQ(-2, columnWidthUTF8("qwer\344")); | |
52 EXPECT_EQ(-2, columnWidthUTF8("qwer\344\270")); | |
53 EXPECT_EQ(-2, columnWidthUTF8("qwer\344\270\033")); | |
54 EXPECT_EQ(-2, columnWidthUTF8("qwer\344\270\300")); | |
55 EXPECT_EQ(-2, columnWidthUTF8("qwer\377\366\355")); | |
56 | |
57 // UTF-8 sequences longer than 4 bytes correspond to unallocated Unicode | |
58 // characters. | |
59 EXPECT_EQ(-2, columnWidthUTF8("\370\200\200\200\200")); // U+200000 | |
60 EXPECT_EQ(-2, columnWidthUTF8("\374\200\200\200\200\200")); // U+4000000 | |
61 } | |
62 | |
63 TEST(Unicode, isPrintable) { | |
64 EXPECT_FALSE(isPrintable(0)); // <control-0000>-<control-001F> | |
65 EXPECT_FALSE(isPrintable(0x01)); | |
66 EXPECT_FALSE(isPrintable(0x1F)); | |
67 EXPECT_TRUE(isPrintable(' ')); | |
68 EXPECT_TRUE(isPrintable('A')); | |
69 EXPECT_TRUE(isPrintable('~')); | |
70 EXPECT_FALSE(isPrintable(0x7F)); // <control-007F>..<control-009F> | |
71 EXPECT_FALSE(isPrintable(0x90)); | |
72 EXPECT_FALSE(isPrintable(0x9F)); | |
73 | |
74 EXPECT_TRUE(isPrintable(0xAC)); | |
75 EXPECT_TRUE(isPrintable(0xAD)); // SOFT HYPHEN is displayed on most terminals | |
76 // as either a space or a dash. | |
77 EXPECT_TRUE(isPrintable(0xAE)); | |
78 | |
79 EXPECT_TRUE(isPrintable(0x0377)); // GREEK SMALL LETTER PAMPHYLIAN DIGAMMA | |
80 EXPECT_FALSE(isPrintable(0x0378)); // <reserved-0378>..<reserved-0379> | |
81 | |
82 EXPECT_FALSE(isPrintable(0x0600)); // ARABIC NUMBER SIGN | |
83 | |
84 EXPECT_FALSE(isPrintable(0x1FFFF)); // <reserved-1F774>..<noncharacter-1FFFF> | |
85 EXPECT_TRUE(isPrintable(0x20000)); // CJK UNIFIED IDEOGRAPH-20000 | |
86 | |
87 EXPECT_FALSE(isPrintable(0x10FFFF)); // noncharacter | |
88 } | |
89 | |
90 } // namespace | |
91 } // namespace unicode | |
92 } // namespace sys | |
93 } // namespace llvm |