Planet
navi homePPSaboutscreenshotsdownloaddevelopmentforum

source: downloads/tcl8.5.2/tests/utf.test @ 47

Last change on this file since 47 was 25, checked in by landauf, 17 years ago

added tcl to libs

File size: 9.8 KB
Line 
1# This file contains a collection of tests for tclUtf.c
2# Sourcing this file into Tcl runs the tests and generates output for
3# errors.  No output means no errors were found.
4#
5# Copyright (c) 1997 Sun Microsystems, Inc.
6# Copyright (c) 1998-1999 by Scriptics Corporation.
7#
8# See the file "license.terms" for information on usage and redistribution
9# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
10#
11# RCS: @(#) $Id: utf.test,v 1.14 2007/05/02 01:37:28 kennykb Exp $
12
13if {[lsearch [namespace children] ::tcltest] == -1} {
14    package require tcltest 2
15    namespace import -force ::tcltest::*
16}
17
18catch {unset x}
19
20test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} {
21    set x \x01
22} [bytestring "\x01"]
23test utf-1.2 {Tcl_UniCharToUtf: 2 byte sequences} {
24    set x "\x00"
25} [bytestring "\xc0\x80"]
26test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} {
27    set x "\xe0"
28} [bytestring "\xc3\xa0"]
29test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} {
30    set x "\u4e4e"
31} [bytestring "\xe4\xb9\x8e"]
32test utf-1.5 {Tcl_UniCharToUtf: negative Tcl_UniChar} {
33    string length [format %c -1]
34} 1
35
36test utf-2.1 {Tcl_UtfToUniChar: low ascii} {
37    string length "abc"
38} {3}
39test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} {
40    string length [bytestring "\x82\x83\x84"]
41} {3}
42test utf-2.3 {Tcl_UtfToUniChar: lead (2-byte) followed by non-trail} {
43    string length [bytestring "\xC2"]
44} {1}
45test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} {
46    string length [bytestring "\xC2\xa2"]
47} {1}
48test utf-2.5 {Tcl_UtfToUniChar: lead (3-byte) followed by non-trail} {
49    string length [bytestring "\xE2"]
50} {1}
51test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} {
52    string length [bytestring "\xE2\xA2"]
53} {2}
54test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} {
55    string length [bytestring "\xE4\xb9\x8e"]
56} {1}
57test utf-2.8 {Tcl_UtfToUniChar: longer UTF sequences not supported} {
58    string length [bytestring "\xF4\xA2\xA2\xA2"]
59} {4}
60
61test utf-3.1 {Tcl_UtfCharComplete} {
62} {}
63
64testConstraint testnumutfchars [llength [info commands testnumutfchars]]
65test utf-4.1 {Tcl_NumUtfChars: zero length} testnumutfchars {
66    testnumutfchars ""
67} {0}
68test utf-4.2 {Tcl_NumUtfChars: length 1} testnumutfchars {
69    testnumutfchars [bytestring "\xC2\xA2"]
70} {1}
71test utf-4.3 {Tcl_NumUtfChars: long string} testnumutfchars {
72    testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"]
73} {7}
74test utf-4.4 {Tcl_NumUtfChars: #u0000} testnumutfchars {
75    testnumutfchars [bytestring "\xC0\x80"]
76} {1}
77test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars {
78    testnumutfchars "" 1
79} {0}
80test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} testnumutfchars {
81    testnumutfchars [bytestring "\xC2\xA2"] 1
82} {1}
83test utf-4.7 {Tcl_NumUtfChars: long string, calc len} testnumutfchars {
84    testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] 1
85} {7}
86test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} testnumutfchars {
87    testnumutfchars [bytestring "\xC0\x80"] 1
88} {1}
89
90test utf-5.1 {Tcl_UtfFindFirsts} {
91} {}
92
93test utf-6.1 {Tcl_UtfNext} {
94} {}
95
96test utf-7.1 {Tcl_UtfPrev} {
97} {}
98
99test utf-8.1 {Tcl_UniCharAtIndex: index = 0} {
100    string index abcd 0
101} {a}
102test utf-8.2 {Tcl_UniCharAtIndex: index = 0} {
103    string index \u4e4e\u25a 0
104} "\u4e4e"
105test utf-8.3 {Tcl_UniCharAtIndex: index > 0} {
106    string index abcd 2
107} {c}
108test utf-8.4 {Tcl_UniCharAtIndex: index > 0} {
109    string index \u4e4e\u25a\xff\u543 2
110} "\uff"
111
112test utf-9.1 {Tcl_UtfAtIndex: index = 0} {
113    string range abcd 0 2
114} {abc}
115test utf-9.2 {Tcl_UtfAtIndex: index > 0} {
116    string range \u4e4e\u25a\xff\u543klmnop 1 5
117} "\u25a\xff\u543kl"
118
119
120test utf-10.1 {Tcl_UtfBackslash: dst == NULL} {
121    set x \n
122} {
123}
124test utf-10.2 {Tcl_UtfBackslash: \u subst} {
125    set x \ua2
126} [bytestring "\xc2\xa2"]
127test utf-10.3 {Tcl_UtfBackslash: longer \u subst} {
128    set x \u4e21
129} [bytestring "\xe4\xb8\xa1"]
130test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} {
131    set x \u4e2k
132} "[bytestring \xd3\xa2]k"
133test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} {
134    set x \u4e216
135} "[bytestring \xe4\xb8\xa1]6"
136proc bsCheck {char num} {
137    global errNum
138    test utf-10.$errNum {backslash substitution} {
139        scan $char %c value
140        set value
141    } $num
142    incr errNum
143}
144set errNum 6
145bsCheck \b      8
146bsCheck \e      101
147bsCheck \f      12
148bsCheck \n      10
149bsCheck \r      13
150bsCheck \t      9
151bsCheck \v      11
152bsCheck \{      123
153bsCheck \}      125
154bsCheck \[      91
155bsCheck \]      93
156bsCheck \$      36
157bsCheck \       32
158bsCheck \;      59
159bsCheck \\      92
160bsCheck \Ca     67
161bsCheck \Ma     77
162bsCheck \CMa    67
163# prior to 8.3, this returned 8, as \8 as accepted as an
164# octal value - but it isn't! [Bug: 3975]
165bsCheck \8a     56
166bsCheck \14     12
167bsCheck \141    97
168bsCheck b\0     98
169bsCheck \x      120
170bsCheck \xa     10
171bsCheck \xA     10
172bsCheck \x41    65
173bsCheck \x541   65
174bsCheck \u      117
175bsCheck \uk     117
176bsCheck \u41    65
177bsCheck \ua     10
178bsCheck \uA     10
179bsCheck \340    224
180bsCheck \ua1    161
181bsCheck \u4e21  20001
182
183test utf-11.1 {Tcl_UtfToUpper} {
184    string toupper {}
185} {}
186test utf-11.2 {Tcl_UtfToUpper} {
187    string toupper abc
188} ABC
189test utf-11.3 {Tcl_UtfToUpper} {
190    string toupper \u00e3ab
191} \u00c3AB
192test utf-11.4 {Tcl_UtfToUpper} {
193    string toupper \u01e3ab
194} \u01e2AB
195
196test utf-12.1 {Tcl_UtfToLower} {
197    string tolower {}
198} {}
199test utf-12.2 {Tcl_UtfToLower} {
200    string tolower ABC
201} abc
202test utf-12.3 {Tcl_UtfToLower} {
203    string tolower \u00c3AB
204} \u00e3ab
205test utf-12.4 {Tcl_UtfToLower} {
206    string tolower \u01e2AB
207} \u01e3ab
208
209test utf-13.1 {Tcl_UtfToTitle} {
210    string totitle {}
211} {}
212test utf-13.2 {Tcl_UtfToTitle} {
213    string totitle abc
214} Abc
215test utf-13.3 {Tcl_UtfToTitle} {
216    string totitle \u00e3ab
217} \u00c3ab
218test utf-13.4 {Tcl_UtfToTitle} {
219    string totitle \u01f3ab
220} \u01f2ab
221
222test utf-14.1 {Tcl_UtfNcasecmp} {
223    string compare -nocase a b
224} -1
225test utf-14.2 {Tcl_UtfNcasecmp} {
226    string compare -nocase b a
227} 1
228test utf-14.3 {Tcl_UtfNcasecmp} {
229    string compare -nocase B a
230} 1
231test utf-14.4 {Tcl_UtfNcasecmp} {
232    string compare -nocase aBcB abca
233} 1
234
235test utf-15.1 {Tcl_UniCharToUpper, negative delta} {
236    string toupper aA
237} AA
238test utf-15.2 {Tcl_UniCharToUpper, positive delta} {
239    string toupper \u0178\u00ff
240} \u0178\u0178
241test utf-15.3 {Tcl_UniCharToUpper, no delta} {
242    string toupper !
243} !
244
245test utf-16.1 {Tcl_UniCharToLower, negative delta} {
246    string tolower aA
247} aa
248test utf-16.2 {Tcl_UniCharToLower, positive delta} {
249    string tolower \u0178\u00ff
250} \u00ff\u00ff
251test utf-17.1 {Tcl_UniCharToLower, no delta} {
252    string tolower !
253} !
254
255test utf-18.1 {Tcl_UniCharToTitle, add one for title} {
256    string totitle \u01c4
257} \u01c5
258test utf-18.2 {Tcl_UniCharToTitle, subtract one for title} {
259    string totitle \u01c6
260} \u01c5
261test utf-18.3 {Tcl_UniCharToTitle, subtract delta for title (positive)} {
262    string totitle \u017f
263} \u0053
264test utf-18.4 {Tcl_UniCharToTitle, subtract delta for title (negative)} {
265    string totitle \u00ff
266} \u0178
267test utf-18.5 {Tcl_UniCharToTitle, no delta} {
268    string totitle !
269} !
270
271test utf-19.1 {TclUniCharLen} {
272    list [regexp \\d abc456def foo] $foo
273} {1 4}
274
275test utf-20.1 {TclUniCharNcmp} {
276} {}
277
278test utf-21.1 {TclUniCharIsAlnum} {
279    # this returns 1 with Unicode 3 compliance
280    string is alnum \u1040\u021f
281} {1}
282test utf-21.2 {unicode alnum char in regc_locale.c} {
283    # this returns 1 with Unicode 3 compliance
284    list [regexp {^[[:alnum:]]+$} \u1040\u021f] [regexp {^\w+$} \u1040\u021f]
285} {1 1}
286
287test utf-22.1 {TclUniCharIsWordChar} {
288    string wordend "xyz123_bar fg" 0
289} 10
290test utf-22.2 {TclUniCharIsWordChar} {
291    string wordend "x\u5080z123_bar\u203c fg" 0
292} 10
293
294test utf-23.1 {TclUniCharIsAlpha} {
295    # this returns 1 with Unicode 3 compliance
296    string is alpha \u021f
297} {1}
298test utf-23.2 {unicode alpha char in regc_locale.c} {
299    # this returns 1 with Unicode 3 compliance
300    regexp {^[[:alpha:]]+$} \u021f
301} {1}
302
303test utf-24.1 {TclUniCharIsDigit} {
304    # this returns 1 with Unicode 3 compliance
305    string is digit \u1040
306} {1}
307test utf-24.2 {unicode digit char in regc_locale.c} {
308    # this returns 1 with Unicode 3 compliance
309    list [regexp {^[[:digit:]]+$} \u1040] [regexp {^\d+$} \u1040]
310} {1 1}
311
312test utf-24.3 {TclUniCharIsSpace} {
313    # this returns 1 with Unicode 3 compliance
314    string is space \u1680
315} {1}
316test utf-24.4 {unicode space char in regc_locale.c} {
317    # this returns 1 with Unicode 3 compliance
318    list [regexp {^[[:space:]]+$} \u1680] [regexp {^\s+$} \u1680]
319} {1 1}
320
321testConstraint teststringobj [llength [info commands teststringobj]]
322
323test utf-25.1 {Tcl_UniCharNcasecmp} -constraints teststringobj \
324    -setup {
325        testobj freeallvars
326    } \
327    -body {
328        teststringobj set 1 a
329        teststringobj set 2 b
330        teststringobj getunicode 1
331        teststringobj getunicode 2
332        string compare -nocase [teststringobj get 1] [teststringobj get 2]
333    } \
334    -cleanup {
335        testobj freeallvars
336    } \
337    -result -1
338test utf-25.2 {Tcl_UniCharNcasecmp} -constraints teststringobj \
339    -setup {
340        testobj freeallvars
341    } \
342    -body {
343        teststringobj set 1 b
344        teststringobj set 2 a
345        teststringobj getunicode 1
346        teststringobj getunicode 2
347        string compare -nocase [teststringobj get 1] [teststringobj get 2]
348    } \
349    -cleanup {
350        testobj freeallvars
351    } \
352    -result 1
353test utf-25.3 {Tcl_UniCharNcasecmp} -constraints teststringobj \
354    -setup {
355        testobj freeallvars
356    } \
357    -body {
358        teststringobj set 1 B
359        teststringobj set 2 a
360        teststringobj getunicode 1
361        teststringobj getunicode 2
362        string compare -nocase [teststringobj get 1] [teststringobj get 2]
363    } \
364    -cleanup {
365        testobj freeallvars
366    } \
367    -result 1
368
369test utf-25.4 {Tcl_UniCharNcasecmp} -constraints teststringobj \
370    -setup {
371        testobj freeallvars
372    } \
373    -body {
374        teststringobj set 1 aBcB
375        teststringobj set 2 abca
376        teststringobj getunicode 1
377        teststringobj getunicode 2
378        string compare -nocase [teststringobj get 1] [teststringobj get 2]
379    } \
380    -cleanup {
381        testobj freeallvars
382    } \
383    -result 1
384
385# cleanup
386::tcltest::cleanupTests
387return
388
389# Local Variables:
390# mode: tcl
391# End:
Note: See TracBrowser for help on using the repository browser.