1 PACKAGE BODY irc_query_parser_pkg
2 /* $Header: irctxqpr.pkb 120.0 2005/07/26 15:02:04 mbocutt noship $ */
3 AS
4 --
5 --
6 --
7 -- ---------------------------------------------------------------------------
8 -- |------------------------< remove_sequence >-------------------------------|
9 -- ---------------------------------------------------------------------------
10 -- Description:
11 -- This function replaces the sequential occurrences of a character with a
12 -- single occurrence of the character in the input text and returns the
13 -- resulting text.
14 -- Arguments
15 -- 1)The first argument,input_text, is the input string.
16 -- 2)The second argument,onechar, is the character whose sequential occurrences
17 -- should be replaced with a single occurrence.
18 -- -------------------------------------------------------------------------
19 FUNCTION remove_sequence(input_text in varchar2, onechar in varchar2)
20 return varchar2
21 AS
22 result varchar2(2000);
23 twochar varchar2(2);
24 BEGIN
25 twochar := onechar || onechar;
26 result:=ltrim(rtrim(input_text));
27 --
28 while(instrb(result,twochar)>0) loop
29 result:=replace(result,twochar,onechar);
30 end loop;
31 --
32 return result;
33 END remove_sequence;
34 --
35 --
36 -- -------------------------------------------------------------------------
37 -- |----------------------< remove_spl_chars >-----------------------------|
38 -- -------------------------------------------------------------------------
39 -- Description:
40 -- This function removes the non-allowed characters from the input text
41 -- and returns the remaining text.
42 --
43 -- The ascii values of the allowed characters are:-
44 -- 32,34,37,40, 41,42,45,
45 -- 48-57(both inclusive),
46 -- 65-90(both inclusive),
47 -- 97-122(both inclusive)
48 -- -------------------------------------------------------------------------
49 FUNCTION remove_spl_chars(input_text in varchar2)
50 return varchar2
51 AS
52 result VARCHAR2 (2000); -- String w/o special characters
53 len number := 1;
54 l_count number :=1;
55 onechar varchar2(30); -- Each Character of the i/p parse_text
56 asciiVal number;
57 BEGIN
58 result := input_text;
59 len := length(result);
60 --
61 while(l_count<=len)loop
62 onechar := substr (result, l_count, 1);
63 asciiVal := ascii(onechar);
64 if((asciiVal<32)
65 or (asciiVal >32 and asciiVal <34)
66 or (asciiVal >34 and asciiVal <37)
67 or (asciiVal >37 and asciiVal <40)
68 or (asciiVal >42 and asciiVal <45)
69 or (asciiVal >45 and asciiVal <48)
70 or (asciiVal >57 and asciiVal <65)
71 or (asciiVal >90 and asciiVal <97)
72 or (asciiVal >122 and asciiVal<170)
73 or (asciiVal >170 and asciiVal<181)
74 or (asciiVal >181 and asciiVal<192)
75 or (asciiVal =215)
76 or (asciiVal =247)
77 )
78 then
79 result := replace(result,onechar);
80 l_count:=l_count-1;
81 len := length(result);
82 end if;
83 l_count := l_count +1;
84 end loop;
85 --
86 --remove unneccesary spaces that would have
87 --cropped due to removing the special characters
88 --For e.g, 'a / b' becomes 'a b'due to the above code.
89 --
90 result := remove_sequence(result,' ');
91 return result;
92 END remove_spl_chars;
93 --
94 --
95 -- -------------------------------------------------------------------------
96 -- |--------------------------< query_parser >-----------------------------|
97 -- -------------------------------------------------------------------------
98 -- Description:
99 -- This function returns the parsed text of the input text
100 --
101 -- -------------------------------------------------------------------------
102 FUNCTION query_parser (input_text IN VARCHAR2)
103 RETURN VARCHAR2
104 AS
105 result varchar2(2000);
106 pos number;
107 pos2 number;
108 begin
109 result := upper(input_text);
110 result := remove_sequence(result,' ');
111
112 --remove +- as an invalid set of characters
113 result:= replace(result,' +-',' ');
114 result:= replace(result,'-+ ',' ');
115 result:=remove_spl_chars(result);
116 result:= replace(result,'*','%');
117 --Replace sequential occurrences of % with a single occurrence
118 result := remove_sequence(result,'%');
119
120 --Remove trailing '-' characters.
121 --We need to handle '-' character only as other
122 --operators are removed by remove_spl_chars()
123
124 while(substrb(result,length(result),1) = '-') loop
125 result := rtrim(rtrim(result,'-'));
126 end loop;
127
128 result:=replace(result,' - ',' ~');
129 result:=replace(result,' -',' ~');
130 result:=replace(result,'-','\-');
131 result:=replace(result,' AND ',' ');
132 result:=replace(result,' OR ',' |');
133 result:=replace(result,' NOT ',' ~');
134 result:=replace(result,'( ','(');
135 result:=replace(result,' )',')');
136 result:=replace(result,' % ',' ');
137 if(substrb(result,length(result)-1,2)=' %') then
138 result := substrb(result,1,length(result)-2);
139 end if;
140
141 result:=replace(result,' |','#OR#');
142 result:=replace(result,' ~','#NOT#');
143 result:=replace(result,' ','#AND#');
144 result:=replace(result,'#',' ');
145
146 result:=remove_sequence(result,'"');
147 pos:=instrb(result,'"',1,1);
148 while(pos>0) loop
149 pos2:=instrb(result,'"',1,2);
150 if(pos2>0) then
151 result:=substrb(result,1,pos-1)||'('||replace(substrb(result,pos+1,pos2-pos-1),' AND ',' ')||')'||substrb(result,pos2+1);
152 else
153 result := substrb(result,1,pos-1) || substrb(result,pos+1);
154 end if;
155 pos:=instrb(result,'"',1,1);
156 end loop;
157 result := remove_sequence(result,' ');
158 while(instrb(result,' AND AND ')>0) loop
159 result:=replace(result,' AND AND ',' AND ');
160 end loop;
161 result:=ltrim(rtrim(result));
162 while (substrb(result,length(result)-3,4) = ' AND') loop
163 result:=substrb(result,1,length(result)-4);
164 end loop;
165 return result;
166 END query_parser;
167 --
168 --
169 -- -------------------------------------------------------------------------
170 -- |------------------------< isInvalidToken >-------------------------------|
171 -- -------------------------------------------------------------------------
172 -- Description:
173 -- This function checks the validity of the parsed text and returns TRUE
174 -- in the following cases:-
175 -- a)the parsed text contains at least a wildcard in a token of length 2
176 -- OR
177 -- b)the parsed text contains atleast two wildcards in a token of length 3
178 -- OR
179 -- c)the parsed text contains a token that starts with '%(' or ends with ')%'
180 -- d)the parsed text contains only logical operators like AND, NOT, OR
181 -- Else, false is returned.
182 -- -------------------------------------------------------------------------
183 FUNCTION isInvalidToken (input_text IN VARCHAR2)
184 RETURN Boolean
185 AS
186 result varchar2(2000);
187 token irc_search_criteria.keywords%type;
188 pos number;
189 pos2 number;
190 isInvalidOperator boolean := true;
191 BEGIN
192 result := input_text;
193 while(length(result)>0) loop
194 pos := instrb(result,' ');
195 if(pos=0) then
196 pos:=length(result)+1;
197 end if;
198 token:=substrb(result,1,pos-1);
199 result := substrb(result,pos+1);
200 --dbms_output.put_line(token);
201 if(((length(token) = 2) and (instrb(token,'%') > 0))
202 OR((length(token) = 3) and (instrb(token,'%',1,2) > 0))) then
203 return true;
204 elsif (substrb(token,1,2)='%(' OR substrb(token,1,2)='%)' OR substrb(token,length(token)-1,2) = ')%' OR substrb(token,length(token)-1,2) = '(%') then
205 return true;
206 elsif (NOT((token = 'OR') OR (token = 'AND') OR (token = 'NOT'))) then
207 isInvalidOperator := false;
208 end if;
209 end loop;
210 return isInvalidOperator;
211 END isInvalidToken;
212 --
213 --
214 -- -------------------------------------------------------------------------
215 -- |------------------------ isInvalidKeyword >----------------------------|
216 -- -------------------------------------------------------------------------
217 -- Description:
218 -- This function checks the validity of the keyword and returns
219 -- TRUE if it is invalid and false otherwise.
220 -- A parsed keyword is invalid if
221 -- a)it has null tokens like () or ( )
222 -- b)any token contains only wildcard characters, eg, (%)
223 -- c)the parsed keyword contains '(IRC%)'
224 -- d)the parsed keyword has a leading negative term
225 -- e)the parsed keyword contains at least one wildcard in a token of
226 -- length 2 or at least two wildcards in a token of length 3
227 -- -------------------------------------------------------------------------
228 FUNCTION isInvalidKeyword (input_text IN VARCHAR2)
229 RETURN Boolean
230 AS
231 result varchar2(2000);
232 BEGIN
233 -- Obtain the parsed text from the query_parser()
234 result := query_parser(input_text);
235 --
236 --case 1 : Return true if the parsed keyword contains '()' or '( )'
237 -- or is null
238 if(result is null or instrb(result,'()')>0 or instrb(result,'( )')>0) then
239 return true;
240 -- case2 Return true if the parsed keyword contains just %
241 elsif(instrb(result,' % ')>0) then
242 return true;
243 elsif(substrb(result,1,2)='% ') then
244 return true;
245 elsif(substrb(result,-2,2)=' %') then
246 return true;
247 elsif(result='%') then
248 return true;
249 --case 3 : Return true if the parsed keyword is IRC% or contains 'IRC%'
250 elsif(instrb(result,' IRC% ')>0) then
251 return true;
252 elsif(substrb(result,1,5)='IRC% ') then
253 return true;
254 elsif(substrb(result,-5,5)=' IRC%') then
255 return true;
256 elsif(result='IRC%') then
257 return true;
258 -- case 4 check for starting with a - which always fails
259 elsif (substrb(result,1,2)='\-') then
260 return true;
261 -- case5 Return true if the parsed keyword contains
262 -- one wildcard in a token of length 2
263 -- or two wildcards in a token of length 3
264 elsif(isInvalidToken(result)) then
265 return true;
266 end if;
267 --
268 return false;
269 END isInvalidKeyword;
270 --
271 --
272 END irc_query_parser_pkg;