[5049] | 1 | # word.tcl -- |
---|
| 2 | # |
---|
| 3 | # This file defines various procedures for computing word boundaries in |
---|
| 4 | # strings. This file is primarily needed so Tk text and entry widgets behave |
---|
| 5 | # properly for different platforms. |
---|
| 6 | # |
---|
| 7 | # Copyright (c) 1996 by Sun Microsystems, Inc. |
---|
| 8 | # Copyright (c) 1998 by Scritpics Corporation. |
---|
| 9 | # |
---|
| 10 | # See the file "license.terms" for information on usage and redistribution of |
---|
| 11 | # this file, and for a DISCLAIMER OF ALL WARRANTIES. |
---|
| 12 | # |
---|
| 13 | # RCS: @(#) $Id: word.tcl,v 1.10 2007/12/13 15:26:03 dgp Exp $ |
---|
| 14 | |
---|
| 15 | # The following variables are used to determine which characters are |
---|
| 16 | # interpreted as white space. |
---|
| 17 | |
---|
| 18 | if {$::tcl_platform(platform) eq "windows"} { |
---|
| 19 | # Windows style - any but a unicode space char |
---|
| 20 | set ::tcl_wordchars {\S} |
---|
| 21 | set ::tcl_nonwordchars {\s} |
---|
| 22 | } else { |
---|
| 23 | # Motif style - any unicode word char (number, letter, or underscore) |
---|
| 24 | set ::tcl_wordchars {\w} |
---|
| 25 | set ::tcl_nonwordchars {\W} |
---|
| 26 | } |
---|
| 27 | |
---|
| 28 | # Arrange for caches of the real matcher REs to be kept, which enables the REs |
---|
| 29 | # themselves to be cached for greater performance (and somewhat greater |
---|
| 30 | # clarity too). |
---|
| 31 | |
---|
| 32 | namespace eval ::tcl { |
---|
| 33 | variable WordBreakRE |
---|
| 34 | array set WordBreakRE {} |
---|
| 35 | |
---|
| 36 | proc UpdateWordBreakREs args { |
---|
| 37 | # Ignores the arguments |
---|
| 38 | global tcl_wordchars tcl_nonwordchars |
---|
| 39 | variable WordBreakRE |
---|
| 40 | |
---|
| 41 | # To keep the RE strings short... |
---|
| 42 | set letter $tcl_wordchars |
---|
| 43 | set space $tcl_nonwordchars |
---|
| 44 | |
---|
| 45 | set WordBreakRE(after) "$letter$space|$space$letter" |
---|
| 46 | set WordBreakRE(before) "^.*($letter$space|$space$letter)" |
---|
| 47 | set WordBreakRE(end) "$space*$letter+$space" |
---|
| 48 | set WordBreakRE(next) "$letter*$space+$letter" |
---|
| 49 | set WordBreakRE(previous) "$space*($letter+)$space*\$" |
---|
| 50 | } |
---|
| 51 | |
---|
| 52 | # Initialize the cache |
---|
| 53 | UpdateWordBreakREs |
---|
| 54 | trace add variable ::tcl_wordchars write ::tcl::UpdateWordBreakREs |
---|
| 55 | trace add variable ::tcl_nonwordchars write ::tcl::UpdateWordBreakREs |
---|
| 56 | } |
---|
| 57 | |
---|
| 58 | # tcl_wordBreakAfter -- |
---|
| 59 | # |
---|
| 60 | # This procedure returns the index of the first word boundary after the |
---|
| 61 | # starting point in the given string, or -1 if there are no more boundaries in |
---|
| 62 | # the given string. The index returned refers to the first character of the |
---|
| 63 | # pair that comprises a boundary. |
---|
| 64 | # |
---|
| 65 | # Arguments: |
---|
| 66 | # str - String to search. |
---|
| 67 | # start - Index into string specifying starting point. |
---|
| 68 | |
---|
| 69 | proc tcl_wordBreakAfter {str start} { |
---|
| 70 | variable ::tcl::WordBreakRE |
---|
| 71 | set result {-1 -1} |
---|
| 72 | regexp -indices -start $start $WordBreakRE(after) $str result |
---|
| 73 | return [lindex $result 1] |
---|
| 74 | } |
---|
| 75 | |
---|
| 76 | # tcl_wordBreakBefore -- |
---|
| 77 | # |
---|
| 78 | # This procedure returns the index of the first word boundary before the |
---|
| 79 | # starting point in the given string, or -1 if there are no more boundaries in |
---|
| 80 | # the given string. The index returned refers to the second character of the |
---|
| 81 | # pair that comprises a boundary. |
---|
| 82 | # |
---|
| 83 | # Arguments: |
---|
| 84 | # str - String to search. |
---|
| 85 | # start - Index into string specifying starting point. |
---|
| 86 | |
---|
| 87 | proc tcl_wordBreakBefore {str start} { |
---|
| 88 | variable ::tcl::WordBreakRE |
---|
| 89 | set result {-1 -1} |
---|
| 90 | regexp -indices $WordBreakRE(before) [string range $str 0 $start] result |
---|
| 91 | return [lindex $result 1] |
---|
| 92 | } |
---|
| 93 | |
---|
| 94 | # tcl_endOfWord -- |
---|
| 95 | # |
---|
| 96 | # This procedure returns the index of the first end-of-word location after a |
---|
| 97 | # starting index in the given string. An end-of-word location is defined to be |
---|
| 98 | # the first whitespace character following the first non-whitespace character |
---|
| 99 | # after the starting point. Returns -1 if there are no more words after the |
---|
| 100 | # starting point. |
---|
| 101 | # |
---|
| 102 | # Arguments: |
---|
| 103 | # str - String to search. |
---|
| 104 | # start - Index into string specifying starting point. |
---|
| 105 | |
---|
| 106 | proc tcl_endOfWord {str start} { |
---|
| 107 | variable ::tcl::WordBreakRE |
---|
| 108 | set result {-1 -1} |
---|
| 109 | regexp -indices -start $start $WordBreakRE(end) $str result |
---|
| 110 | return [lindex $result 1] |
---|
| 111 | } |
---|
| 112 | |
---|
| 113 | # tcl_startOfNextWord -- |
---|
| 114 | # |
---|
| 115 | # This procedure returns the index of the first start-of-word location after a |
---|
| 116 | # starting index in the given string. A start-of-word location is defined to |
---|
| 117 | # be a non-whitespace character following a whitespace character. Returns -1 |
---|
| 118 | # if there are no more start-of-word locations after the starting point. |
---|
| 119 | # |
---|
| 120 | # Arguments: |
---|
| 121 | # str - String to search. |
---|
| 122 | # start - Index into string specifying starting point. |
---|
| 123 | |
---|
| 124 | proc tcl_startOfNextWord {str start} { |
---|
| 125 | variable ::tcl::WordBreakRE |
---|
| 126 | set result {-1 -1} |
---|
| 127 | regexp -indices -start $start $WordBreakRE(next) $str result |
---|
| 128 | return [lindex $result 1] |
---|
| 129 | } |
---|
| 130 | |
---|
| 131 | # tcl_startOfPreviousWord -- |
---|
| 132 | # |
---|
| 133 | # This procedure returns the index of the first start-of-word location before |
---|
| 134 | # a starting index in the given string. |
---|
| 135 | # |
---|
| 136 | # Arguments: |
---|
| 137 | # str - String to search. |
---|
| 138 | # start - Index into string specifying starting point. |
---|
| 139 | |
---|
| 140 | proc tcl_startOfPreviousWord {str start} { |
---|
| 141 | variable ::tcl::WordBreakRE |
---|
| 142 | set word {-1 -1} |
---|
| 143 | regexp -indices $WordBreakRE(previous) [string range $str 0 $start-1] \ |
---|
| 144 | result word |
---|
| 145 | return [lindex $word 0] |
---|
| 146 | } |
---|