Fix narrow width: Added StrictEmojiNarrow
Yasuhiro Matsumoto
3 years ago
6 | 6 | |
7 | 7 | var benchSink int |
8 | 8 | |
9 | // | |
10 | // RuneWidth | |
11 | // | |
12 | ||
13 | func benchRuneWidth(b *testing.B, eastAsianWidth bool, start, stop rune, want int) int { | |
14 | n := 0 | |
15 | got := -1 | |
16 | c := NewCondition() | |
17 | c.EastAsianWidth = eastAsianWidth | |
18 | for i := 0; i < b.N; i++ { | |
19 | got = n | |
20 | for r := start; r < stop; r++ { | |
21 | n += c.RuneWidth(r) | |
22 | } | |
23 | got = n - got | |
24 | } | |
25 | if want != 0 && got != want { // some extra checks | |
26 | b.Errorf("got %d, want %d\n", got, want) | |
27 | } | |
28 | return n | |
29 | } | |
30 | func BenchmarkRuneWidthAll(b *testing.B) { | |
31 | benchSink = benchRuneWidth(b, false, 0, utf8.MaxRune+1, 1293932) | |
32 | } | |
33 | func BenchmarkRuneWidth768(b *testing.B) { | |
34 | benchSink = benchRuneWidth(b, false, 0, 0x300, 702) | |
35 | } | |
36 | func BenchmarkRuneWidthAllEastAsian(b *testing.B) { | |
37 | benchSink = benchRuneWidth(b, true, 0, utf8.MaxRune+1, 1432558) | |
38 | } | |
39 | func BenchmarkRuneWidth768EastAsian(b *testing.B) { | |
40 | benchSink = benchRuneWidth(b, true, 0, 0x300, 794) | |
41 | } | |
42 | ||
43 | // | |
44 | // String1Width - strings which consist of a single rune | |
45 | // | |
46 | ||
47 | func benchString1Width(b *testing.B, eastAsianWidth bool, start, stop rune, want int) int { | |
48 | n := 0 | |
49 | got := -1 | |
50 | c := NewCondition() | |
51 | c.EastAsianWidth = eastAsianWidth | |
52 | for i := 0; i < b.N; i++ { | |
53 | got = n | |
54 | for r := start; r < stop; r++ { | |
55 | s := string(r) | |
56 | n += c.StringWidth(s) | |
57 | } | |
58 | got = n - got | |
59 | } | |
60 | if want != 0 && got != want { // some extra checks | |
61 | b.Errorf("got %d, want %d\n", got, want) | |
62 | } | |
63 | return n | |
64 | } | |
65 | func BenchmarkString1WidthAll(b *testing.B) { | |
66 | benchSink = benchString1Width(b, false, 0, utf8.MaxRune+1, 1295980) | |
67 | } | |
68 | func BenchmarkString1Width768(b *testing.B) { | |
69 | benchSink = benchString1Width(b, false, 0, 0x300, 702) | |
70 | } | |
71 | func BenchmarkString1WidthAllEastAsian(b *testing.B) { | |
72 | benchSink = benchString1Width(b, true, 0, utf8.MaxRune+1, 1436654) | |
73 | } | |
74 | func BenchmarkString1Width768EastAsian(b *testing.B) { | |
75 | benchSink = benchString1Width(b, true, 0, 0x300, 794) | |
76 | } | |
77 | ||
78 | // | |
79 | // tables | |
80 | // | |
9 | 81 | func benchTable(b *testing.B, tbl table) int { |
10 | 82 | n := 0 |
11 | 83 | for i := 0; i < b.N; i++ { |
11 | 11 | // EastAsianWidth will be set true if the current locale is CJK |
12 | 12 | EastAsianWidth bool |
13 | 13 | |
14 | // StrictEmojiNeutral should be set false if handle broken fonts | |
15 | StrictEmojiNeutral bool = true | |
16 | ||
14 | 17 | // DefaultCondition is a condition in current locale |
15 | DefaultCondition = &Condition{} | |
18 | DefaultCondition = &Condition{ | |
19 | EastAsianWidth: false, | |
20 | StrictEmojiNeutral: true, | |
21 | } | |
16 | 22 | ) |
17 | 23 | |
18 | 24 | func init() { |
82 | 88 | |
83 | 89 | // Condition have flag EastAsianWidth whether the current locale is CJK or not. |
84 | 90 | type Condition struct { |
85 | EastAsianWidth bool | |
91 | EastAsianWidth bool | |
92 | StrictEmojiNeutral bool | |
86 | 93 | } |
87 | 94 | |
88 | 95 | // NewCondition return new instance of Condition which is current locale. |
89 | 96 | func NewCondition() *Condition { |
90 | 97 | return &Condition{ |
91 | EastAsianWidth: EastAsianWidth, | |
98 | EastAsianWidth: EastAsianWidth, | |
99 | StrictEmojiNeutral: StrictEmojiNeutral, | |
92 | 100 | } |
93 | 101 | } |
94 | 102 | |
95 | 103 | // RuneWidth returns the number of cells in r. |
96 | 104 | // See http://www.unicode.org/reports/tr11/ |
97 | 105 | func (c *Condition) RuneWidth(r rune) int { |
98 | switch { | |
99 | case r < 0 || r > 0x10FFFF || inTables(r, nonprint, combining): | |
100 | return 0 | |
101 | case inTables(r, narrow): | |
102 | return 1 | |
103 | case (c.EastAsianWidth && IsAmbiguousWidth(r)) || inTables(r, doublewidth, neutral): | |
104 | return 2 | |
105 | default: | |
106 | return 1 | |
106 | // optimized version, verified by TestRuneWidthChecksums() | |
107 | if !c.EastAsianWidth { | |
108 | switch { | |
109 | case r < 0x20 || r > 0x10FFFF: | |
110 | return 0 | |
111 | case (r >= 0x7F && r <= 0x9F) || r == 0xAD: // nonprint | |
112 | return 0 | |
113 | case r < 0x300: | |
114 | return 1 | |
115 | case inTable(r, narrow): | |
116 | return 1 | |
117 | case inTables(r, nonprint, combining): | |
118 | return 0 | |
119 | case inTable(r, doublewidth): | |
120 | return 2 | |
121 | default: | |
122 | return 1 | |
123 | } | |
124 | } else { | |
125 | switch { | |
126 | case r < 0 || r > 0x10FFFF || inTables(r, nonprint, combining): | |
127 | return 0 | |
128 | case inTable(r, narrow): | |
129 | return 1 | |
130 | case inTables(r, ambiguous, doublewidth): | |
131 | return 2 | |
132 | case !c.StrictEmojiNeutral && inTables(r, ambiguous, emoji, narrow): | |
133 | return 2 | |
134 | default: | |
135 | return 1 | |
136 | } | |
107 | 137 | } |
108 | 138 | } |
109 | 139 |
64 | 64 | } |
65 | 65 | } |
66 | 66 | |
67 | func TestRuneWidthChecksums(t *testing.T) { | |
68 | var testcases = []struct { | |
69 | name string | |
70 | eastAsianWidth bool | |
71 | wantSHA string | |
72 | }{ | |
73 | {"ea-no", false, "4eb632b105d3b2c800dda9141381d0b8a95250a3a5c7f1a5ca2c4d4daaa85234"}, | |
74 | {"ea-yes", true, "c2ddc3bdf42d81d4c23050e21eda46eb639b38b15322d35e8eb6c26f3b83ce92"}, | |
75 | } | |
76 | ||
77 | for _, testcase := range testcases { | |
78 | c := NewCondition() | |
79 | c.EastAsianWidth = testcase.eastAsianWidth | |
80 | buf := make([]byte, utf8.MaxRune+1) | |
81 | for r := rune(0); r <= utf8.MaxRune; r++ { | |
82 | buf[r] = byte(c.RuneWidth(r)) | |
83 | } | |
84 | gotSHA := fmt.Sprintf("%x", sha256.Sum256(buf)) | |
85 | if gotSHA != testcase.wantSHA { | |
86 | t.Errorf("TestRuneWidthChecksums = %s,\n\tsha256 = %s want %s", | |
87 | testcase.name, gotSHA, testcase.wantSHA) | |
88 | } | |
89 | } | |
90 | } | |
91 | ||
67 | 92 | func checkInterval(first, last rune) bool { |
68 | 93 | return first >= 0 && first <= utf8.MaxRune && |
69 | 94 | last >= 0 && last <= utf8.MaxRune && |
86 | 111 | return true |
87 | 112 | } |
88 | 113 | |
89 | // This is a utility function in case that a table has changed. | |
90 | func printCompactTable(tbl table) { | |
91 | counter := 0 | |
92 | printEntry := func(first, last rune) { | |
93 | if counter%3 == 0 { | |
94 | fmt.Printf("\t") | |
95 | } | |
96 | fmt.Printf("{0x%04X, 0x%04X},", first, last) | |
97 | if (counter+1)%3 == 0 { | |
98 | fmt.Printf("\n") | |
99 | } else { | |
100 | fmt.Printf(" ") | |
101 | } | |
102 | counter++ | |
103 | } | |
104 | ||
105 | sort.Sort(&tbl) // just in case | |
106 | first := rune(-1) | |
107 | for i := range tbl { | |
108 | e := tbl[i] | |
109 | if !checkInterval(e.first, e.last) { // sanity check | |
110 | panic("invalid table") | |
111 | } | |
112 | if first < 0 { | |
113 | first = e.first | |
114 | } | |
115 | if i+1 < len(tbl) && e.last+1 >= tbl[i+1].first { // can be combined into one entry | |
116 | continue | |
117 | } | |
118 | printEntry(first, e.last) | |
119 | first = -1 | |
120 | } | |
121 | fmt.Printf("\n\n") | |
122 | } | |
123 | ||
124 | 114 | func TestSorted(t *testing.T) { |
125 | 115 | for _, ti := range tables { |
126 | 116 | if !sort.IsSorted(&ti.tbl) { |
128 | 118 | } |
129 | 119 | if !isCompact(t, &ti) { |
130 | 120 | t.Errorf("table not compact: %s", ti.name) |
131 | //printCompactTable(ti.tbl) | |
132 | 121 | } |
133 | 122 | } |
134 | 123 | } |
135 | 124 | |
136 | 125 | var runewidthtests = []struct { |
137 | in rune | |
138 | out int | |
139 | eaout int | |
126 | in rune | |
127 | out int | |
128 | eaout int | |
129 | nseout int | |
140 | 130 | }{ |
141 | {'世', 2, 2}, | |
142 | {'界', 2, 2}, | |
143 | {'セ', 1, 1}, | |
144 | {'カ', 1, 1}, | |
145 | {'イ', 1, 1}, | |
146 | {'☆', 1, 2}, // double width in ambiguous | |
147 | {'☺', 1, 1}, | |
148 | {'☻', 1, 1}, | |
149 | {'♥', 1, 2}, | |
150 | {'♦', 1, 1}, | |
151 | {'♣', 1, 2}, | |
152 | {'♠', 1, 2}, | |
153 | {'♂', 1, 2}, | |
154 | {'♀', 1, 2}, | |
155 | {'♪', 1, 2}, | |
156 | {'♫', 1, 1}, | |
157 | {'☼', 1, 1}, | |
158 | {'↕', 1, 2}, | |
159 | {'‼', 1, 1}, | |
160 | {'↔', 1, 2}, | |
161 | {'\x00', 0, 0}, | |
162 | {'\x01', 0, 0}, | |
163 | {'\u0300', 0, 0}, | |
164 | {'\u2028', 0, 0}, | |
165 | {'\u2029', 0, 0}, | |
166 | {'a', 1, 1}, // ASCII classified as "na" (narrow) | |
167 | {'⟦', 1, 1}, // non-ASCII classified as "na" (narrow) | |
131 | {'世', 2, 2, 2}, | |
132 | {'界', 2, 2, 2}, | |
133 | {'セ', 1, 1, 1}, | |
134 | {'カ', 1, 1, 1}, | |
135 | {'イ', 1, 1, 1}, | |
136 | {'☆', 1, 2, 2}, // double width in ambiguous | |
137 | {'☺', 1, 1, 2}, | |
138 | {'☻', 1, 1, 2}, | |
139 | {'♥', 1, 2, 2}, | |
140 | {'♦', 1, 1, 2}, | |
141 | {'♣', 1, 2, 2}, | |
142 | {'♠', 1, 2, 2}, | |
143 | {'♂', 1, 2, 2}, | |
144 | {'♀', 1, 2, 2}, | |
145 | {'♪', 1, 2, 2}, | |
146 | {'♫', 1, 1, 2}, | |
147 | {'☼', 1, 1, 2}, | |
148 | {'↕', 1, 2, 2}, | |
149 | {'‼', 1, 1, 2}, | |
150 | {'↔', 1, 2, 2}, | |
151 | {'\x00', 0, 0, 0}, | |
152 | {'\x01', 0, 0, 0}, | |
153 | {'\u0300', 0, 0, 0}, | |
154 | {'\u2028', 0, 0, 0}, | |
155 | {'\u2029', 0, 0, 0}, | |
156 | {'a', 1, 1, 1}, // ASCII classified as "na" (narrow) | |
157 | {'⟦', 1, 1, 1}, // non-ASCII classified as "na" (narrow) | |
158 | {'👁', 1, 1, 2}, | |
168 | 159 | } |
169 | 160 | |
170 | 161 | func TestRuneWidth(t *testing.T) { |
172 | 163 | c.EastAsianWidth = false |
173 | 164 | for _, tt := range runewidthtests { |
174 | 165 | if out := c.RuneWidth(tt.in); out != tt.out { |
175 | t.Errorf("RuneWidth(%q) = %d, want %d", tt.in, out, tt.out) | |
166 | t.Errorf("RuneWidth(%q) = %d, want %d (EastAsianWidth=false)", tt.in, out, tt.out) | |
176 | 167 | } |
177 | 168 | } |
178 | 169 | c.EastAsianWidth = true |
179 | 170 | for _, tt := range runewidthtests { |
180 | 171 | if out := c.RuneWidth(tt.in); out != tt.eaout { |
181 | t.Errorf("RuneWidth(%q) = %d, want %d", tt.in, out, tt.eaout) | |
172 | t.Errorf("RuneWidth(%q) = %d, want %d (EastAsianWidth=true)", tt.in, out, tt.eaout) | |
173 | } | |
174 | } | |
175 | c.StrictEmojiNeutral = false | |
176 | for _, tt := range runewidthtests { | |
177 | if out := c.RuneWidth(tt.in); out != tt.nseout { | |
178 | t.Errorf("RuneWidth(%q) = %d, want %d (StrictEmojiNeutral=false)", tt.in, out, tt.eaout) | |
182 | 179 | } |
183 | 180 | } |
184 | 181 | } |