1 | # This file contains a collection of tests for tclUtf.c |
---|
2 | # Sourcing this file into Tcl runs the tests and generates output for |
---|
3 | # errors. No output means no errors were found. |
---|
4 | # |
---|
5 | # Copyright (c) 1997 Sun Microsystems, Inc. |
---|
6 | # Copyright (c) 1998-1999 by Scriptics Corporation. |
---|
7 | # |
---|
8 | # See the file "license.terms" for information on usage and redistribution |
---|
9 | # of this file, and for a DISCLAIMER OF ALL WARRANTIES. |
---|
10 | # |
---|
11 | # RCS: @(#) $Id: utf.test,v 1.14 2007/05/02 01:37:28 kennykb Exp $ |
---|
12 | |
---|
13 | if {[lsearch [namespace children] ::tcltest] == -1} { |
---|
14 | package require tcltest 2 |
---|
15 | namespace import -force ::tcltest::* |
---|
16 | } |
---|
17 | |
---|
18 | catch {unset x} |
---|
19 | |
---|
20 | test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} { |
---|
21 | set x \x01 |
---|
22 | } [bytestring "\x01"] |
---|
23 | test utf-1.2 {Tcl_UniCharToUtf: 2 byte sequences} { |
---|
24 | set x "\x00" |
---|
25 | } [bytestring "\xc0\x80"] |
---|
26 | test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} { |
---|
27 | set x "\xe0" |
---|
28 | } [bytestring "\xc3\xa0"] |
---|
29 | test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} { |
---|
30 | set x "\u4e4e" |
---|
31 | } [bytestring "\xe4\xb9\x8e"] |
---|
32 | test utf-1.5 {Tcl_UniCharToUtf: negative Tcl_UniChar} { |
---|
33 | string length [format %c -1] |
---|
34 | } 1 |
---|
35 | |
---|
36 | test utf-2.1 {Tcl_UtfToUniChar: low ascii} { |
---|
37 | string length "abc" |
---|
38 | } {3} |
---|
39 | test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} { |
---|
40 | string length [bytestring "\x82\x83\x84"] |
---|
41 | } {3} |
---|
42 | test utf-2.3 {Tcl_UtfToUniChar: lead (2-byte) followed by non-trail} { |
---|
43 | string length [bytestring "\xC2"] |
---|
44 | } {1} |
---|
45 | test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} { |
---|
46 | string length [bytestring "\xC2\xa2"] |
---|
47 | } {1} |
---|
48 | test utf-2.5 {Tcl_UtfToUniChar: lead (3-byte) followed by non-trail} { |
---|
49 | string length [bytestring "\xE2"] |
---|
50 | } {1} |
---|
51 | test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} { |
---|
52 | string length [bytestring "\xE2\xA2"] |
---|
53 | } {2} |
---|
54 | test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} { |
---|
55 | string length [bytestring "\xE4\xb9\x8e"] |
---|
56 | } {1} |
---|
57 | test utf-2.8 {Tcl_UtfToUniChar: longer UTF sequences not supported} { |
---|
58 | string length [bytestring "\xF4\xA2\xA2\xA2"] |
---|
59 | } {4} |
---|
60 | |
---|
61 | test utf-3.1 {Tcl_UtfCharComplete} { |
---|
62 | } {} |
---|
63 | |
---|
64 | testConstraint testnumutfchars [llength [info commands testnumutfchars]] |
---|
65 | test utf-4.1 {Tcl_NumUtfChars: zero length} testnumutfchars { |
---|
66 | testnumutfchars "" |
---|
67 | } {0} |
---|
68 | test utf-4.2 {Tcl_NumUtfChars: length 1} testnumutfchars { |
---|
69 | testnumutfchars [bytestring "\xC2\xA2"] |
---|
70 | } {1} |
---|
71 | test utf-4.3 {Tcl_NumUtfChars: long string} testnumutfchars { |
---|
72 | testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] |
---|
73 | } {7} |
---|
74 | test utf-4.4 {Tcl_NumUtfChars: #u0000} testnumutfchars { |
---|
75 | testnumutfchars [bytestring "\xC0\x80"] |
---|
76 | } {1} |
---|
77 | test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars { |
---|
78 | testnumutfchars "" 1 |
---|
79 | } {0} |
---|
80 | test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} testnumutfchars { |
---|
81 | testnumutfchars [bytestring "\xC2\xA2"] 1 |
---|
82 | } {1} |
---|
83 | test utf-4.7 {Tcl_NumUtfChars: long string, calc len} testnumutfchars { |
---|
84 | testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] 1 |
---|
85 | } {7} |
---|
86 | test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} testnumutfchars { |
---|
87 | testnumutfchars [bytestring "\xC0\x80"] 1 |
---|
88 | } {1} |
---|
89 | |
---|
90 | test utf-5.1 {Tcl_UtfFindFirsts} { |
---|
91 | } {} |
---|
92 | |
---|
93 | test utf-6.1 {Tcl_UtfNext} { |
---|
94 | } {} |
---|
95 | |
---|
96 | test utf-7.1 {Tcl_UtfPrev} { |
---|
97 | } {} |
---|
98 | |
---|
99 | test utf-8.1 {Tcl_UniCharAtIndex: index = 0} { |
---|
100 | string index abcd 0 |
---|
101 | } {a} |
---|
102 | test utf-8.2 {Tcl_UniCharAtIndex: index = 0} { |
---|
103 | string index \u4e4e\u25a 0 |
---|
104 | } "\u4e4e" |
---|
105 | test utf-8.3 {Tcl_UniCharAtIndex: index > 0} { |
---|
106 | string index abcd 2 |
---|
107 | } {c} |
---|
108 | test utf-8.4 {Tcl_UniCharAtIndex: index > 0} { |
---|
109 | string index \u4e4e\u25a\xff\u543 2 |
---|
110 | } "\uff" |
---|
111 | |
---|
112 | test utf-9.1 {Tcl_UtfAtIndex: index = 0} { |
---|
113 | string range abcd 0 2 |
---|
114 | } {abc} |
---|
115 | test utf-9.2 {Tcl_UtfAtIndex: index > 0} { |
---|
116 | string range \u4e4e\u25a\xff\u543klmnop 1 5 |
---|
117 | } "\u25a\xff\u543kl" |
---|
118 | |
---|
119 | |
---|
120 | test utf-10.1 {Tcl_UtfBackslash: dst == NULL} { |
---|
121 | set x \n |
---|
122 | } { |
---|
123 | } |
---|
124 | test utf-10.2 {Tcl_UtfBackslash: \u subst} { |
---|
125 | set x \ua2 |
---|
126 | } [bytestring "\xc2\xa2"] |
---|
127 | test utf-10.3 {Tcl_UtfBackslash: longer \u subst} { |
---|
128 | set x \u4e21 |
---|
129 | } [bytestring "\xe4\xb8\xa1"] |
---|
130 | test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} { |
---|
131 | set x \u4e2k |
---|
132 | } "[bytestring \xd3\xa2]k" |
---|
133 | test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} { |
---|
134 | set x \u4e216 |
---|
135 | } "[bytestring \xe4\xb8\xa1]6" |
---|
136 | proc bsCheck {char num} { |
---|
137 | global errNum |
---|
138 | test utf-10.$errNum {backslash substitution} { |
---|
139 | scan $char %c value |
---|
140 | set value |
---|
141 | } $num |
---|
142 | incr errNum |
---|
143 | } |
---|
144 | set errNum 6 |
---|
145 | bsCheck \b 8 |
---|
146 | bsCheck \e 101 |
---|
147 | bsCheck \f 12 |
---|
148 | bsCheck \n 10 |
---|
149 | bsCheck \r 13 |
---|
150 | bsCheck \t 9 |
---|
151 | bsCheck \v 11 |
---|
152 | bsCheck \{ 123 |
---|
153 | bsCheck \} 125 |
---|
154 | bsCheck \[ 91 |
---|
155 | bsCheck \] 93 |
---|
156 | bsCheck \$ 36 |
---|
157 | bsCheck \ 32 |
---|
158 | bsCheck \; 59 |
---|
159 | bsCheck \\ 92 |
---|
160 | bsCheck \Ca 67 |
---|
161 | bsCheck \Ma 77 |
---|
162 | bsCheck \CMa 67 |
---|
163 | # prior to 8.3, this returned 8, as \8 as accepted as an |
---|
164 | # octal value - but it isn't! [Bug: 3975] |
---|
165 | bsCheck \8a 56 |
---|
166 | bsCheck \14 12 |
---|
167 | bsCheck \141 97 |
---|
168 | bsCheck b\0 98 |
---|
169 | bsCheck \x 120 |
---|
170 | bsCheck \xa 10 |
---|
171 | bsCheck \xA 10 |
---|
172 | bsCheck \x41 65 |
---|
173 | bsCheck \x541 65 |
---|
174 | bsCheck \u 117 |
---|
175 | bsCheck \uk 117 |
---|
176 | bsCheck \u41 65 |
---|
177 | bsCheck \ua 10 |
---|
178 | bsCheck \uA 10 |
---|
179 | bsCheck \340 224 |
---|
180 | bsCheck \ua1 161 |
---|
181 | bsCheck \u4e21 20001 |
---|
182 | |
---|
183 | test utf-11.1 {Tcl_UtfToUpper} { |
---|
184 | string toupper {} |
---|
185 | } {} |
---|
186 | test utf-11.2 {Tcl_UtfToUpper} { |
---|
187 | string toupper abc |
---|
188 | } ABC |
---|
189 | test utf-11.3 {Tcl_UtfToUpper} { |
---|
190 | string toupper \u00e3ab |
---|
191 | } \u00c3AB |
---|
192 | test utf-11.4 {Tcl_UtfToUpper} { |
---|
193 | string toupper \u01e3ab |
---|
194 | } \u01e2AB |
---|
195 | |
---|
196 | test utf-12.1 {Tcl_UtfToLower} { |
---|
197 | string tolower {} |
---|
198 | } {} |
---|
199 | test utf-12.2 {Tcl_UtfToLower} { |
---|
200 | string tolower ABC |
---|
201 | } abc |
---|
202 | test utf-12.3 {Tcl_UtfToLower} { |
---|
203 | string tolower \u00c3AB |
---|
204 | } \u00e3ab |
---|
205 | test utf-12.4 {Tcl_UtfToLower} { |
---|
206 | string tolower \u01e2AB |
---|
207 | } \u01e3ab |
---|
208 | |
---|
209 | test utf-13.1 {Tcl_UtfToTitle} { |
---|
210 | string totitle {} |
---|
211 | } {} |
---|
212 | test utf-13.2 {Tcl_UtfToTitle} { |
---|
213 | string totitle abc |
---|
214 | } Abc |
---|
215 | test utf-13.3 {Tcl_UtfToTitle} { |
---|
216 | string totitle \u00e3ab |
---|
217 | } \u00c3ab |
---|
218 | test utf-13.4 {Tcl_UtfToTitle} { |
---|
219 | string totitle \u01f3ab |
---|
220 | } \u01f2ab |
---|
221 | |
---|
222 | test utf-14.1 {Tcl_UtfNcasecmp} { |
---|
223 | string compare -nocase a b |
---|
224 | } -1 |
---|
225 | test utf-14.2 {Tcl_UtfNcasecmp} { |
---|
226 | string compare -nocase b a |
---|
227 | } 1 |
---|
228 | test utf-14.3 {Tcl_UtfNcasecmp} { |
---|
229 | string compare -nocase B a |
---|
230 | } 1 |
---|
231 | test utf-14.4 {Tcl_UtfNcasecmp} { |
---|
232 | string compare -nocase aBcB abca |
---|
233 | } 1 |
---|
234 | |
---|
235 | test utf-15.1 {Tcl_UniCharToUpper, negative delta} { |
---|
236 | string toupper aA |
---|
237 | } AA |
---|
238 | test utf-15.2 {Tcl_UniCharToUpper, positive delta} { |
---|
239 | string toupper \u0178\u00ff |
---|
240 | } \u0178\u0178 |
---|
241 | test utf-15.3 {Tcl_UniCharToUpper, no delta} { |
---|
242 | string toupper ! |
---|
243 | } ! |
---|
244 | |
---|
245 | test utf-16.1 {Tcl_UniCharToLower, negative delta} { |
---|
246 | string tolower aA |
---|
247 | } aa |
---|
248 | test utf-16.2 {Tcl_UniCharToLower, positive delta} { |
---|
249 | string tolower \u0178\u00ff |
---|
250 | } \u00ff\u00ff |
---|
251 | test utf-17.1 {Tcl_UniCharToLower, no delta} { |
---|
252 | string tolower ! |
---|
253 | } ! |
---|
254 | |
---|
255 | test utf-18.1 {Tcl_UniCharToTitle, add one for title} { |
---|
256 | string totitle \u01c4 |
---|
257 | } \u01c5 |
---|
258 | test utf-18.2 {Tcl_UniCharToTitle, subtract one for title} { |
---|
259 | string totitle \u01c6 |
---|
260 | } \u01c5 |
---|
261 | test utf-18.3 {Tcl_UniCharToTitle, subtract delta for title (positive)} { |
---|
262 | string totitle \u017f |
---|
263 | } \u0053 |
---|
264 | test utf-18.4 {Tcl_UniCharToTitle, subtract delta for title (negative)} { |
---|
265 | string totitle \u00ff |
---|
266 | } \u0178 |
---|
267 | test utf-18.5 {Tcl_UniCharToTitle, no delta} { |
---|
268 | string totitle ! |
---|
269 | } ! |
---|
270 | |
---|
271 | test utf-19.1 {TclUniCharLen} { |
---|
272 | list [regexp \\d abc456def foo] $foo |
---|
273 | } {1 4} |
---|
274 | |
---|
275 | test utf-20.1 {TclUniCharNcmp} { |
---|
276 | } {} |
---|
277 | |
---|
278 | test utf-21.1 {TclUniCharIsAlnum} { |
---|
279 | # this returns 1 with Unicode 3 compliance |
---|
280 | string is alnum \u1040\u021f |
---|
281 | } {1} |
---|
282 | test utf-21.2 {unicode alnum char in regc_locale.c} { |
---|
283 | # this returns 1 with Unicode 3 compliance |
---|
284 | list [regexp {^[[:alnum:]]+$} \u1040\u021f] [regexp {^\w+$} \u1040\u021f] |
---|
285 | } {1 1} |
---|
286 | |
---|
287 | test utf-22.1 {TclUniCharIsWordChar} { |
---|
288 | string wordend "xyz123_bar fg" 0 |
---|
289 | } 10 |
---|
290 | test utf-22.2 {TclUniCharIsWordChar} { |
---|
291 | string wordend "x\u5080z123_bar\u203c fg" 0 |
---|
292 | } 10 |
---|
293 | |
---|
294 | test utf-23.1 {TclUniCharIsAlpha} { |
---|
295 | # this returns 1 with Unicode 3 compliance |
---|
296 | string is alpha \u021f |
---|
297 | } {1} |
---|
298 | test utf-23.2 {unicode alpha char in regc_locale.c} { |
---|
299 | # this returns 1 with Unicode 3 compliance |
---|
300 | regexp {^[[:alpha:]]+$} \u021f |
---|
301 | } {1} |
---|
302 | |
---|
303 | test utf-24.1 {TclUniCharIsDigit} { |
---|
304 | # this returns 1 with Unicode 3 compliance |
---|
305 | string is digit \u1040 |
---|
306 | } {1} |
---|
307 | test utf-24.2 {unicode digit char in regc_locale.c} { |
---|
308 | # this returns 1 with Unicode 3 compliance |
---|
309 | list [regexp {^[[:digit:]]+$} \u1040] [regexp {^\d+$} \u1040] |
---|
310 | } {1 1} |
---|
311 | |
---|
312 | test utf-24.3 {TclUniCharIsSpace} { |
---|
313 | # this returns 1 with Unicode 3 compliance |
---|
314 | string is space \u1680 |
---|
315 | } {1} |
---|
316 | test utf-24.4 {unicode space char in regc_locale.c} { |
---|
317 | # this returns 1 with Unicode 3 compliance |
---|
318 | list [regexp {^[[:space:]]+$} \u1680] [regexp {^\s+$} \u1680] |
---|
319 | } {1 1} |
---|
320 | |
---|
321 | testConstraint teststringobj [llength [info commands teststringobj]] |
---|
322 | |
---|
323 | test utf-25.1 {Tcl_UniCharNcasecmp} -constraints teststringobj \ |
---|
324 | -setup { |
---|
325 | testobj freeallvars |
---|
326 | } \ |
---|
327 | -body { |
---|
328 | teststringobj set 1 a |
---|
329 | teststringobj set 2 b |
---|
330 | teststringobj getunicode 1 |
---|
331 | teststringobj getunicode 2 |
---|
332 | string compare -nocase [teststringobj get 1] [teststringobj get 2] |
---|
333 | } \ |
---|
334 | -cleanup { |
---|
335 | testobj freeallvars |
---|
336 | } \ |
---|
337 | -result -1 |
---|
338 | test utf-25.2 {Tcl_UniCharNcasecmp} -constraints teststringobj \ |
---|
339 | -setup { |
---|
340 | testobj freeallvars |
---|
341 | } \ |
---|
342 | -body { |
---|
343 | teststringobj set 1 b |
---|
344 | teststringobj set 2 a |
---|
345 | teststringobj getunicode 1 |
---|
346 | teststringobj getunicode 2 |
---|
347 | string compare -nocase [teststringobj get 1] [teststringobj get 2] |
---|
348 | } \ |
---|
349 | -cleanup { |
---|
350 | testobj freeallvars |
---|
351 | } \ |
---|
352 | -result 1 |
---|
353 | test utf-25.3 {Tcl_UniCharNcasecmp} -constraints teststringobj \ |
---|
354 | -setup { |
---|
355 | testobj freeallvars |
---|
356 | } \ |
---|
357 | -body { |
---|
358 | teststringobj set 1 B |
---|
359 | teststringobj set 2 a |
---|
360 | teststringobj getunicode 1 |
---|
361 | teststringobj getunicode 2 |
---|
362 | string compare -nocase [teststringobj get 1] [teststringobj get 2] |
---|
363 | } \ |
---|
364 | -cleanup { |
---|
365 | testobj freeallvars |
---|
366 | } \ |
---|
367 | -result 1 |
---|
368 | |
---|
369 | test utf-25.4 {Tcl_UniCharNcasecmp} -constraints teststringobj \ |
---|
370 | -setup { |
---|
371 | testobj freeallvars |
---|
372 | } \ |
---|
373 | -body { |
---|
374 | teststringobj set 1 aBcB |
---|
375 | teststringobj set 2 abca |
---|
376 | teststringobj getunicode 1 |
---|
377 | teststringobj getunicode 2 |
---|
378 | string compare -nocase [teststringobj get 1] [teststringobj get 2] |
---|
379 | } \ |
---|
380 | -cleanup { |
---|
381 | testobj freeallvars |
---|
382 | } \ |
---|
383 | -result 1 |
---|
384 | |
---|
385 | # cleanup |
---|
386 | ::tcltest::cleanupTests |
---|
387 | return |
---|
388 | |
---|
389 | # Local Variables: |
---|
390 | # mode: tcl |
---|
391 | # End: |
---|