1 | # word.tcl -- |
---|
2 | # |
---|
3 | # This file defines various procedures for computing word boundaries in |
---|
4 | # strings. This file is primarily needed so Tk text and entry widgets behave |
---|
5 | # properly for different platforms. |
---|
6 | # |
---|
7 | # Copyright (c) 1996 by Sun Microsystems, Inc. |
---|
8 | # Copyright (c) 1998 by Scritpics Corporation. |
---|
9 | # |
---|
10 | # See the file "license.terms" for information on usage and redistribution of |
---|
11 | # this file, and for a DISCLAIMER OF ALL WARRANTIES. |
---|
12 | # |
---|
13 | # RCS: @(#) $Id: word.tcl,v 1.10 2007/12/13 15:26:03 dgp Exp $ |
---|
14 | |
---|
15 | # The following variables are used to determine which characters are |
---|
16 | # interpreted as white space. |
---|
17 | |
---|
18 | if {$::tcl_platform(platform) eq "windows"} { |
---|
19 | # Windows style - any but a unicode space char |
---|
20 | set ::tcl_wordchars {\S} |
---|
21 | set ::tcl_nonwordchars {\s} |
---|
22 | } else { |
---|
23 | # Motif style - any unicode word char (number, letter, or underscore) |
---|
24 | set ::tcl_wordchars {\w} |
---|
25 | set ::tcl_nonwordchars {\W} |
---|
26 | } |
---|
27 | |
---|
28 | # Arrange for caches of the real matcher REs to be kept, which enables the REs |
---|
29 | # themselves to be cached for greater performance (and somewhat greater |
---|
30 | # clarity too). |
---|
31 | |
---|
32 | namespace eval ::tcl { |
---|
33 | variable WordBreakRE |
---|
34 | array set WordBreakRE {} |
---|
35 | |
---|
36 | proc UpdateWordBreakREs args { |
---|
37 | # Ignores the arguments |
---|
38 | global tcl_wordchars tcl_nonwordchars |
---|
39 | variable WordBreakRE |
---|
40 | |
---|
41 | # To keep the RE strings short... |
---|
42 | set letter $tcl_wordchars |
---|
43 | set space $tcl_nonwordchars |
---|
44 | |
---|
45 | set WordBreakRE(after) "$letter$space|$space$letter" |
---|
46 | set WordBreakRE(before) "^.*($letter$space|$space$letter)" |
---|
47 | set WordBreakRE(end) "$space*$letter+$space" |
---|
48 | set WordBreakRE(next) "$letter*$space+$letter" |
---|
49 | set WordBreakRE(previous) "$space*($letter+)$space*\$" |
---|
50 | } |
---|
51 | |
---|
52 | # Initialize the cache |
---|
53 | UpdateWordBreakREs |
---|
54 | trace add variable ::tcl_wordchars write ::tcl::UpdateWordBreakREs |
---|
55 | trace add variable ::tcl_nonwordchars write ::tcl::UpdateWordBreakREs |
---|
56 | } |
---|
57 | |
---|
58 | # tcl_wordBreakAfter -- |
---|
59 | # |
---|
60 | # This procedure returns the index of the first word boundary after the |
---|
61 | # starting point in the given string, or -1 if there are no more boundaries in |
---|
62 | # the given string. The index returned refers to the first character of the |
---|
63 | # pair that comprises a boundary. |
---|
64 | # |
---|
65 | # Arguments: |
---|
66 | # str - String to search. |
---|
67 | # start - Index into string specifying starting point. |
---|
68 | |
---|
69 | proc tcl_wordBreakAfter {str start} { |
---|
70 | variable ::tcl::WordBreakRE |
---|
71 | set result {-1 -1} |
---|
72 | regexp -indices -start $start $WordBreakRE(after) $str result |
---|
73 | return [lindex $result 1] |
---|
74 | } |
---|
75 | |
---|
76 | # tcl_wordBreakBefore -- |
---|
77 | # |
---|
78 | # This procedure returns the index of the first word boundary before the |
---|
79 | # starting point in the given string, or -1 if there are no more boundaries in |
---|
80 | # the given string. The index returned refers to the second character of the |
---|
81 | # pair that comprises a boundary. |
---|
82 | # |
---|
83 | # Arguments: |
---|
84 | # str - String to search. |
---|
85 | # start - Index into string specifying starting point. |
---|
86 | |
---|
87 | proc tcl_wordBreakBefore {str start} { |
---|
88 | variable ::tcl::WordBreakRE |
---|
89 | set result {-1 -1} |
---|
90 | regexp -indices $WordBreakRE(before) [string range $str 0 $start] result |
---|
91 | return [lindex $result 1] |
---|
92 | } |
---|
93 | |
---|
94 | # tcl_endOfWord -- |
---|
95 | # |
---|
96 | # This procedure returns the index of the first end-of-word location after a |
---|
97 | # starting index in the given string. An end-of-word location is defined to be |
---|
98 | # the first whitespace character following the first non-whitespace character |
---|
99 | # after the starting point. Returns -1 if there are no more words after the |
---|
100 | # starting point. |
---|
101 | # |
---|
102 | # Arguments: |
---|
103 | # str - String to search. |
---|
104 | # start - Index into string specifying starting point. |
---|
105 | |
---|
106 | proc tcl_endOfWord {str start} { |
---|
107 | variable ::tcl::WordBreakRE |
---|
108 | set result {-1 -1} |
---|
109 | regexp -indices -start $start $WordBreakRE(end) $str result |
---|
110 | return [lindex $result 1] |
---|
111 | } |
---|
112 | |
---|
113 | # tcl_startOfNextWord -- |
---|
114 | # |
---|
115 | # This procedure returns the index of the first start-of-word location after a |
---|
116 | # starting index in the given string. A start-of-word location is defined to |
---|
117 | # be a non-whitespace character following a whitespace character. Returns -1 |
---|
118 | # if there are no more start-of-word locations after the starting point. |
---|
119 | # |
---|
120 | # Arguments: |
---|
121 | # str - String to search. |
---|
122 | # start - Index into string specifying starting point. |
---|
123 | |
---|
124 | proc tcl_startOfNextWord {str start} { |
---|
125 | variable ::tcl::WordBreakRE |
---|
126 | set result {-1 -1} |
---|
127 | regexp -indices -start $start $WordBreakRE(next) $str result |
---|
128 | return [lindex $result 1] |
---|
129 | } |
---|
130 | |
---|
131 | # tcl_startOfPreviousWord -- |
---|
132 | # |
---|
133 | # This procedure returns the index of the first start-of-word location before |
---|
134 | # a starting index in the given string. |
---|
135 | # |
---|
136 | # Arguments: |
---|
137 | # str - String to search. |
---|
138 | # start - Index into string specifying starting point. |
---|
139 | |
---|
140 | proc tcl_startOfPreviousWord {str start} { |
---|
141 | variable ::tcl::WordBreakRE |
---|
142 | set word {-1 -1} |
---|
143 | regexp -indices $WordBreakRE(previous) [string range $str 0 $start-1] \ |
---|
144 | result word |
---|
145 | return [lindex $word 0] |
---|
146 | } |
---|