DBA Data[Home] [Help]

PACKAGE: OWAPUB.OWA_PATTERN

Source


1 package OWA_PATTERN is
2 
3    /*
4    The package OWA_PATTERN is a "regular expression" pattern matching
5    package.  There are 3 fundamental subprograms in OWA_PATTERN.
6    The are: AMATCH, MATCH, and CHANGE.
7 
8    MATCH provides the ability to determine *if* a pattern exists in a
9    string.
10 
11    AMATCH provides more flexibilty to specify *WHERE* in the string to
12    search for the pattern and also gives more information in return by
13    indicating *WHERE* in the string the end of the pattern was found.
14 
15    CHANGE provides the ability to change occurances of a matched pattern
16    to a new string.
17 
18    The algorithms used here are derived from "Software Tools" by Brian
19    Kernighan.  These algorithms have been extended to support most of
20    Perl's pattern matching functionality.
21 
22    The regular expression elements that are supported are:
23 
24    Assertions:
25    ----------
26    ^ Matches the beginning of a line (or string)
27    $ Matches the end of a line (or string)
28 
29    Quantifiers:
30    -----------
31    {n,m} Must match at least n times, but not more than m times
32     {n,} Must match at least n times
33      {n} Must match exactly n times.
34        * 0 or more occurances
35        + 1 or more occurances
36        ? 0 or 1 occurance(s)
37 
38    Legal atoms:
39    -----------
40    . matches any character except \n
41 
42    A list of characters in square brackets [] is a class of characters,
43    for example [0-9] indicates match any character from 0 to 9.
44 
45    \n matches newlines
46    \t matches tabs
47    \d matches digits [0-9]
48    \D matches non-digits [^0-9]
49    \w matches word characters (alphanumeric) [0-9a-z_A-Z]
50    \W matches non-word characters [^0-9a-z_A-Z]
51    \s matches whitespace characters [ \t\n]
52    \S matches non-whitespace characters [^ \t\n]
53    \b matches on "word" boundaries (between \w and \W)
54 
55    A backslashed x followed by two hexadecimal digits, such as \x7f,
56    matches the character having that hexadecimal value.
57 
58    A backslashed 2 or 3 digit octal number such as \033 matches the
59    character with the specified value.
60 
61    Any other "backslashed" character matches itself.
62 
63    Valid flags passed to CHANGE, MATCH, AMATCH:
64    -------------------------------------------
65    i - perform pattern matching in a case-insensitive manner.
66    g - perform all changes globally (all occurances)
67 
68    Replacements
69    ------------
70    & can be used in the substitution string to "re-place" that which
71    has been matched.
72 
73    For example: change('Oracle 7.1.3', '\d\.\d\.\d', 'Version &');
74 
75                 yields: Oracle Version 7.1.3
76 
77    \<n> can be used to do backreferences, meaning to replace portions of
78       the matched string:
79 
80       change('Matt Bookman','(Matt) (Bookman)','\2, \1')
81           --> Bookman, Matt
82 
83    Match Extraction
84    ----------------
85    One can extract the matched values from the parenthesized patterns,
86    for example:
87 
88    declare
89       string     varchar2(32767);
90       components owa_text.vc_arr;
91    begin
92       string := 'Today is 01/04/72';
93       if (owa_pattern.match(string, '(\d\d)/(\d\d)/(\d\d)', components))
94       then
95          htp.print('The month is '||components(1));
96          htp.print('The day is '||components(2));
97          htp.print('The year is '||components(3));
98       end if;
99    end;
100 
101    Possible future enhancements:
102    -----------------------------
103    * \B - match on non-"word" boundaries (between \w and \w, or \W and \W)
104 
105    * "or" character matches:
106        change(text,'(Unix|unix)','UNIX') would change both occurances
107 
108    * Using control character references:
109 
110         A backslashed c followed by a single character, such as \cD, matches
111         the corresponding control character.
112 
113    -- No support for:
114    --   \b == Backspace (in a character class)
115    --   \r == Carriage return
116    --   \f == Form feed
117    -- Modified support for:
118    --   \s == A whitespace charcter -> [ \t\n\r\f]
119    --   \S == A non-whitespace character
120 
121    */
122 
123    type pattern is table of varchar2(4) index by binary_integer;
124    /* pattern must be able to hold a value for "Character Classes"
125       indicating the number of items in that character class.  For
126       single-byte character sets, which this currently supports,
127       there are no more than 256 characters. */
128 
129    procedure getpat(arg in varchar2, pat in out pattern);
130 
131    /* The easiest to use of the "match" functions is the first.  */
132    /* The second one would be used in the case where you wanted   */
133    /* to perform some optimizations and you were matching against */
134    /* the same pattern repeatedly.  You could use getpat to build */
135    /* the pattern, then call match (2nd version) and amatch       */
136    /* repeatedly.                                                 */
137    function match(line  in varchar2,
138                   pat   in varchar2,
139                   flags in varchar2 DEFAULT NULL) return boolean;
140    function match(line  in     varchar2,
141                   pat   in out pattern,
142                   flags in     varchar2 DEFAULT NULL) return boolean;
143 
144    function match(line  in        varchar2,
145                   pat   in        varchar2,
146                   backrefs    out owa_text.vc_arr,
147                   flags in        varchar2 DEFAULT NULL) return boolean;
148    function match(line     in     varchar2,
149                   pat      in out pattern,
150                   backrefs    out owa_text.vc_arr,
151                   flags    in     varchar2 DEFAULT NULL) return boolean;
152 
153    /* Parameters to MATCH */
154    /* line  - Any text string.                                       */
155    /* pat   - In the first call, pat is a regular expression.        */
156    /*         In the second, pat has been generated by getpat.       */
157    /* flags - only valid value currently is 'i' for case-insensitive */
158    /*         searches.                                              */
159 
160    /* Function returns whether or not a match was made.              */
161 
162    /* The following MATCH functions perform matches on multi-line text */
163    /* objects.                                                         */
164    function match(mline  in     owa_text.multi_line,
165                   pat    in     varchar2,
166                   rlist     out owa_text.row_list,
167                   flags  in     varchar2 DEFAULT NULL) return boolean;
168    function match(mline  in     owa_text.multi_line,
169                   pat    in out pattern,
170                   rlist     out owa_text.row_list,
171                   flags  in     varchar2 DEFAULT NULL) return boolean;
172 
173    /* AMATCH */
174    function amatch(line     in varchar2,
175                    from_loc in integer,
176                    pat      in varchar2,
177                    flags    in varchar2 DEFAULT NULL) return integer;
178    function amatch(line     in     varchar2,
179                    from_loc in     integer,
180                    pat      in out pattern,
181                    flags    in     varchar2 DEFAULT NULL) return integer;
182 
183    function amatch(line     in     varchar2,
184                    from_loc in     integer,
185                    pat      in     varchar2,
186                    backrefs    out owa_text.vc_arr,
187                    flags    in     varchar2 DEFAULT NULL) return integer;
188    function amatch(line     in     varchar2,
189                    from_loc in     integer,
190                    pat      in out pattern,
191                    backrefs    out owa_text.vc_arr,
192                    flags    in     varchar2 DEFAULT NULL) return integer;
193 
194    /* Parameters to AMATCH */
195    /* line  - Any text string.                                        */
196    /* from_loc - Indicates the index of the first character in "line" */
197    /*            to try to match.                                     */
198    /* pat   - See MATCH above.                                        */
199    /* flags - See MATCH above.                                        */
200 
201    /* Function returns the index of the first character after the end */
202    /* of the match.                                                   */
203 
204    function change(line     in out varchar2,
205                    from_str in     varchar2,
206                    to_str   in     varchar2,
207                    flags    in     varchar2 DEFAULT NULL) return integer;
208 
209    procedure change(line     in out varchar2,
210                     from_str in     varchar2,
211                     to_str   in     varchar2,
212                     flags    in     varchar2 DEFAULT NULL);
213 
214    /* Parameters to CHANGE */
215    /* line     - Any text string.                                     */
216    /* from_str - The regular expression to match in "line".           */
217    /* to_str   - The substitution pattern to replace "from_str"       */
218    /* flags    - i - case-insensitive search                          */
219    /*            g - make changes "g"lobally - each occurance.        */
220    /*            By default CHANGE quits after the first match.       */
221    /* Function returns the number of matches made.                    */
222 
223    function change(mline    in out owa_text.multi_line,
224                    from_str in     varchar2,
225                    to_str   in     varchar2,
226                    flags    in     varchar2 DEFAULT NULL) return integer;
227 
228    procedure change(mline    in out owa_text.multi_line,
229                     from_str in     varchar2,
230                     to_str   in     varchar2,
231                     flags    in     varchar2 DEFAULT NULL);
232 
233    /* Parameters to CHANGE */
234    /* mline    - A multi-line structure containing text strings.      */
235    /* from_str - The regular expression to match in "mline".          */
236    /* to_str   - The substitution pattern to replace "from_str"       */
237    /* flags    - i - case-insensitive search                          */
238    /*            g - make changes "g"lobally - each occurance.        */
239    /*            By default CHANGE quits after the first match on     */
240    /*            each line.                                           */
241    /* Function returns the number of matches made.                    */
242 
243 end;