1 package OWA_PATTERN is
2
3 /*
4 The package OWA_PATTERN is a "regular expression" pattern matching
5 package. There are 3 fundamental subprograms in OWA_PATTERN.
6 The are: AMATCH, MATCH, and CHANGE.
7
8 MATCH provides the ability to determine *if* a pattern exists in a
9 string.
10
11 AMATCH provides more flexibilty to specify *WHERE* in the string to
12 search for the pattern and also gives more information in return by
13 indicating *WHERE* in the string the end of the pattern was found.
14
15 CHANGE provides the ability to change occurances of a matched pattern
16 to a new string.
17
18 The algorithms used here are derived from "Software Tools" by Brian
19 Kernighan. These algorithms have been extended to support most of
20 Perl's pattern matching functionality.
21
22 The regular expression elements that are supported are:
23
24 Assertions:
25 ----------
26 ^ Matches the beginning of a line (or string)
27 $ Matches the end of a line (or string)
28
29 Quantifiers:
30 -----------
31 {n,m} Must match at least n times, but not more than m times
32 {n,} Must match at least n times
33 {n} Must match exactly n times.
34 * 0 or more occurances
35 + 1 or more occurances
36 ? 0 or 1 occurance(s)
37
38 Legal atoms:
39 -----------
40 . matches any character except \n
41
42 A list of characters in square brackets [] is a class of characters,
43 for example [0-9] indicates match any character from 0 to 9.
44
45 \n matches newlines
46 \t matches tabs
47 \d matches digits [0-9]
48 \D matches non-digits [^0-9]
49 \w matches word characters (alphanumeric) [0-9a-z_A-Z]
50 \W matches non-word characters [^0-9a-z_A-Z]
51 \s matches whitespace characters [ \t\n]
52 \S matches non-whitespace characters [^ \t\n]
53 \b matches on "word" boundaries (between \w and \W)
54
55 A backslashed x followed by two hexadecimal digits, such as \x7f,
56 matches the character having that hexadecimal value.
57
58 A backslashed 2 or 3 digit octal number such as \033 matches the
59 character with the specified value.
60
61 Any other "backslashed" character matches itself.
62
63 Valid flags passed to CHANGE, MATCH, AMATCH:
64 -------------------------------------------
65 i - perform pattern matching in a case-insensitive manner.
66 g - perform all changes globally (all occurances)
67
68 Replacements
69 ------------
70 & can be used in the substitution string to "re-place" that which
71 has been matched.
72
73 For example: change('Oracle 7.1.3', '\d\.\d\.\d', 'Version &');
74
75 yields: Oracle Version 7.1.3
76
77 \<n> can be used to do backreferences, meaning to replace portions of
78 the matched string:
79
80 change('Matt Bookman','(Matt) (Bookman)','\2, \1')
81 --> Bookman, Matt
82
83 Match Extraction
84 ----------------
85 One can extract the matched values from the parenthesized patterns,
86 for example:
87
88 declare
89 string varchar2(32767);
90 components owa_text.vc_arr;
91 begin
92 string := 'Today is 01/04/72';
93 if (owa_pattern.match(string, '(\d\d)/(\d\d)/(\d\d)', components))
94 then
95 htp.print('The month is '||components(1));
96 htp.print('The day is '||components(2));
97 htp.print('The year is '||components(3));
98 end if;
99 end;
100
101 Possible future enhancements:
102 -----------------------------
103 * \B - match on non-"word" boundaries (between \w and \w, or \W and \W)
104
105 * "or" character matches:
106 change(text,'(Unix|unix)','UNIX') would change both occurances
107
108 * Using control character references:
109
110 A backslashed c followed by a single character, such as \cD, matches
111 the corresponding control character.
112
113 -- No support for:
114 -- \b == Backspace (in a character class)
115 -- \r == Carriage return
116 -- \f == Form feed
117 -- Modified support for:
118 -- \s == A whitespace charcter -> [ \t\n\r\f]
119 -- \S == A non-whitespace character
120
121 */
122
123 type pattern is table of varchar2(4) index by binary_integer;
124 /* pattern must be able to hold a value for "Character Classes"
125 indicating the number of items in that character class. For
126 single-byte character sets, which this currently supports,
127 there are no more than 256 characters. */
128
129 procedure getpat(arg in varchar2, pat in out pattern);
130
131 /* The easiest to use of the "match" functions is the first. */
132 /* The second one would be used in the case where you wanted */
133 /* to perform some optimizations and you were matching against */
134 /* the same pattern repeatedly. You could use getpat to build */
135 /* the pattern, then call match (2nd version) and amatch */
136 /* repeatedly. */
137 function match(line in varchar2,
138 pat in varchar2,
139 flags in varchar2 DEFAULT NULL) return boolean;
140 function match(line in varchar2,
141 pat in out pattern,
142 flags in varchar2 DEFAULT NULL) return boolean;
143
144 function match(line in varchar2,
145 pat in varchar2,
146 backrefs out owa_text.vc_arr,
147 flags in varchar2 DEFAULT NULL) return boolean;
148 function match(line in varchar2,
149 pat in out pattern,
150 backrefs out owa_text.vc_arr,
151 flags in varchar2 DEFAULT NULL) return boolean;
152
153 /* Parameters to MATCH */
154 /* line - Any text string. */
155 /* pat - In the first call, pat is a regular expression. */
156 /* In the second, pat has been generated by getpat. */
157 /* flags - only valid value currently is 'i' for case-insensitive */
158 /* searches. */
159
160 /* Function returns whether or not a match was made. */
161
162 /* The following MATCH functions perform matches on multi-line text */
163 /* objects. */
164 function match(mline in owa_text.multi_line,
165 pat in varchar2,
166 rlist out owa_text.row_list,
167 flags in varchar2 DEFAULT NULL) return boolean;
168 function match(mline in owa_text.multi_line,
169 pat in out pattern,
170 rlist out owa_text.row_list,
171 flags in varchar2 DEFAULT NULL) return boolean;
172
173 /* AMATCH */
174 function amatch(line in varchar2,
175 from_loc in integer,
176 pat in varchar2,
177 flags in varchar2 DEFAULT NULL) return integer;
178 function amatch(line in varchar2,
179 from_loc in integer,
180 pat in out pattern,
181 flags in varchar2 DEFAULT NULL) return integer;
182
183 function amatch(line in varchar2,
184 from_loc in integer,
185 pat in varchar2,
186 backrefs out owa_text.vc_arr,
187 flags in varchar2 DEFAULT NULL) return integer;
188 function amatch(line in varchar2,
189 from_loc in integer,
190 pat in out pattern,
191 backrefs out owa_text.vc_arr,
192 flags in varchar2 DEFAULT NULL) return integer;
193
194 /* Parameters to AMATCH */
195 /* line - Any text string. */
196 /* from_loc - Indicates the index of the first character in "line" */
197 /* to try to match. */
198 /* pat - See MATCH above. */
199 /* flags - See MATCH above. */
200
201 /* Function returns the index of the first character after the end */
202 /* of the match. */
203
204 function change(line in out varchar2,
205 from_str in varchar2,
206 to_str in varchar2,
207 flags in varchar2 DEFAULT NULL) return integer;
208
209 procedure change(line in out varchar2,
210 from_str in varchar2,
211 to_str in varchar2,
212 flags in varchar2 DEFAULT NULL);
213
214 /* Parameters to CHANGE */
215 /* line - Any text string. */
216 /* from_str - The regular expression to match in "line". */
217 /* to_str - The substitution pattern to replace "from_str" */
218 /* flags - i - case-insensitive search */
219 /* g - make changes "g"lobally - each occurance. */
220 /* By default CHANGE quits after the first match. */
221 /* Function returns the number of matches made. */
222
223 function change(mline in out owa_text.multi_line,
224 from_str in varchar2,
225 to_str in varchar2,
226 flags in varchar2 DEFAULT NULL) return integer;
227
228 procedure change(mline in out owa_text.multi_line,
229 from_str in varchar2,
230 to_str in varchar2,
231 flags in varchar2 DEFAULT NULL);
232
233 /* Parameters to CHANGE */
234 /* mline - A multi-line structure containing text strings. */
235 /* from_str - The regular expression to match in "mline". */
236 /* to_str - The substitution pattern to replace "from_str" */
237 /* flags - i - case-insensitive search */
238 /* g - make changes "g"lobally - each occurance. */
239 /* By default CHANGE quits after the first match on */
240 /* each line. */
241 /* Function returns the number of matches made. */
242
243 end;