DBA Data[Home] [Help]

PACKAGE BODY: APPS.CS_KB_KWIC_UTIL

Source


4 /*
1 PACKAGE BODY CS_KB_KWIC_UTIL AS
2 /* $Header: cskbkwicb.pls 120.5.12020000.2 2012/07/05 05:37:08 gasankar ship $ */
3 
5  *
6  * +======================================================================+
7  * |                Copyright (c) 2005 Oracle Corporation                 |
8  * |                   Redwood Shores, California, USA                    |
9  * |                        All rights reserved.                          |
10  * +======================================================================+
11  *
12  *   FILENAME
13  *     cskbkwicb.pls
14  *   PURPOSE
15  *     Creates the package body for CS_KB_KWIC_UTIL
16  *     CS_KB_KWIC_UTIL supports the Keywords In Context implementation
17  *
18  *   HISTORY
19  *   12-APR-2005 HMEI Created.
20  *   11-JUL-2005 HMEI Changed get_segment_kwic to use different regular
21  *                    expressions for pre/post segement. Removed
22  *                    string reverse logic
23  *   12-JUL-2005 HMEI Changed get_segment_kwic to use custom logic for
24  *                    determining pre-segment.  Cannot use a single
25  *                    pattern ending in '$', since performance suffers.
26  *   18-JUN-2006 KLOU Fix bug 5217204
27  *                    Change size of l_kwic_segment to 32000 as the regexp_replace
28  *                    can return up to 32K of content.
29  *    06-May-2011 isugavan Bug 11837564 - WEIRD CHARACTERS SHOWS UP IN THE
30  *                                        SEARCH RESULT WHEN WE SEARCH KNOWLEDGE BASE DOC
31  */
32 
33 
34   -- Maximum keyword segment length
35   MAX_SEGMENT_LENGTH CONSTANT NUMBER := 200;
36   -- Left and Right padding used when calculating KWIC
37   SEGMENT_PADDING CONSTANT NUMBER := MAX_SEGMENT_LENGTH/2;
38 
39   --
40   -- This API highlights all keywords (p_text_query) in a given document
41   -- (p_document) using the specified start and end tags. This API is intended
42   -- for highlighting a short document, e.g. solution title or SR summary.
43   --
44   -- Parameters:
45   --   p_text_query VARCHAR2, keywords used for highlighting
46   --   p_document   VARCHAR2, document to be highlighted
47   --   p_starttag   VARCHAR2, start highlighting tag
48   --   p_endtag     VARCHAR2, end highlighting tag
49   -- Returns:
50   --   The highlighted document
51   -- Since 12.0
52   --
53   FUNCTION highlight_text
54   (
55   p_text_query  IN VARCHAR2,
56   p_document    IN VARCHAR2,
57   p_starttag    IN VARCHAR2,
58   p_endtag      IN VARCHAR2
59   ) RETURN VARCHAR2
60   IS
61     l_result_segment CLOB;
62   BEGIN
63     CTX_DOC.POLICY_MARKUP (
64       policy_name => 'CS_KB_KWIC_POLICY',
65       document => p_document,
66       text_query => p_text_query,
67       starttag => p_starttag,
68       endtag => p_endtag,
69       restab => l_result_segment);
70     return l_result_segment;
71   END highlight_text;
72 
73   --
74   -- This Utility method escapes regular expression operators in a string.
75   -- This is useful if you want to literally match a string that may contain
76   -- a regular expression operator.  (e.g. 'e*trade')
77   --
78   -- Parameters:
79   --   p_string VARCHAR2, the string to escape
80   -- Returns:
81   --   string with regexp special characters escaped.
82   -- Since 12.0
83   --
84   FUNCTION regexp_escape
85   (
86   p_string IN VARCHAR2
87   ) RETURN VARCHAR2
88   IS
89     l_string VARCHAR2(32767); -- the final string after escaping
90     l_special_chars VARCHAR2(100); --chars to escape
91     l_char VARCHAR2(8);
92   BEGIN
93     l_string := p_string;
94     l_special_chars := '\[](){}*+$^?|.'; -- '\' must come first
95 
96     FOR i in 1..length(l_special_chars) LOOP
97       l_char := substr(l_special_chars, i, 1);
98       l_string := replace(l_string, l_char, '\' || l_char);
99     END LOOP;
100 
101     return l_string;
102   END regexp_escape;
103 
104   --
105   -- This API highlights all keywords (p_text_query) in a given document
106   -- (p_document) using the specified start and end tags. It scans the
107   -- document to construct a highlighted segment that contains the most
108   -- number of distinct keywords, with some surrounding context.
109   -- If no keywords found, will return NULL
110   --
111   -- Parameters:
112   --   p_text_query VARCHAR2, keywords used for highlighting
113   --   p_starttag   VARCHAR2, start highlighting tag
114   --   p_endtag     VARCHAR2, end highlighting tag
115   --   p_document   VARCHAR2, document to be highlighted
116   -- Returns:
120   --
117   --   The highlighted KWIC (Key Words In Context) segment,
118   --   or NULL if no keywords found.
119   -- Since 12.0
121   FUNCTION get_segment_kwic
122   (
123   p_text_query  IN VARCHAR2,
124   p_starttag    IN VARCHAR2,
125   p_endtag      IN VARCHAR2,
126   p_document    IN CLOB
127   ) RETURN VARCHAR2
128   IS
129     l_restab CTX_DOC.highlight_tab; -- Keyword match info returned by CTX_DOC
130 
131     TYPE number_table IS TABLE OF INTEGER INDEX BY PLS_INTEGER;
132     -- word occupies the range [open, close)
133     l_open_positions number_table;   -- open positions of matching words
134     l_close_positions number_table;  -- close positions of matching words
135 
136     i INTEGER;
137     idx INTEGER;
138     openIdx INTEGER; -- index for looping through the open positions
139     closeIdx INTEGER; -- index for looping through the close positions
140 
141     -- Map to hold distinct keywords
142     TYPE distinct_word_map IS TABLE OF INTEGER INDEX BY VARCHAR2(32767);
143     l_distinct_words_master distinct_word_map; -- master list of distinct words
144     l_distinct_words distinct_word_map; -- distinct words for a given segment
145 
146     l_word VARCHAR2(32767);
147     l_len INTEGER;
148 
149     --used when calculating the best segment
150     l_word_start INTEGER;
151     l_word_end INTEGER;
152     l_segment_start INTEGER;
153     l_segment_end INTEGER;
154 
155     --represent the best segment
156     l_best_count INTEGER := 0; --best # of distinct keywords in any segment
157     l_best_segment_start INTEGER := null;
158     l_best_segment_end INTEGER := null;
159     l_best_segment VARCHAR2(1000);
160 
161     --pre and post segments (the "Context" part of KWIC)
162     l_pre_segment VARCHAR2(1000);
163     l_post_segment VARCHAR2(1000);
164     l_pre_segment_start INTEGER;
165     l_post_segment_end INTEGER;
166 
167     l_space_positions number_table;  -- used to calculate pre-segement
168     l_pre_pattern VARCHAR2(50); -- regular expression
169     l_post_pattern VARCHAR2(50); -- regular expression
170 
171     -- (5217204)
172     l_kwic_segment VARCHAR2(32000); -- the final kwic segment
173 
174     -- Bug fix 11837564
175      l_char   varchar2(20);
176    l_left Number := 0;
177    l_right Number := 0;
178    l_length Number;
179 
180   BEGIN
181 
182     -- positions of matching words returned in l_restab
183     CTX_DOC.POLICY_HIGHLIGHT (
184       policy_name => 'CS_KB_KWIC_POLICY',
185       document => p_document,
186       text_query => p_text_query,
187       restab => l_restab );
188 
189     -- form master list of distinct keywords found in the document
190     i := l_restab.FIRST;
191     WHILE (i IS NOT NULL) LOOP
192       l_open_positions(i) := l_restab(i).offset;
193       l_close_positions(i) := l_restab(i).offset + l_restab(i).length;
194 
195       dbms_lob.read(p_document,
196                     l_restab(i).length,
197                     l_restab(i).offset,
198                     l_word );
199 
200       l_distinct_words_master( LOWER(l_word) ) := 1; --keys are the keywords
201 
202       i := l_restab.NEXT(i);
203     END LOOP;
204 
205     --dbms_output.put_line('# Distinct keywords:' || l_distinct_words_master.COUNT);
206 
207     -- Find the best segment (that with the most distinct keywords)
208     -- Loop through open and close positions and form the largest segment
209     -- possible. For each segment, keep track of distinct keywords.  Note
210     -- each segment begins and ends with a matching keyword
211     openIdx := l_open_positions.FIRST;
212     WHILE (openIdx IS NOT NULL AND
213            l_best_count < l_distinct_words_master.COUNT
214           ) LOOP
215       --segment start and end positions for this iteration
216       l_segment_start := l_open_positions(openIdx);
217       l_segment_end := null;
218 
219       l_distinct_words.DELETE; -- clear out keyword map.
220 
221       closeIdx := openIdx; -- slide the closeIdx to determine the segment close.
222       WHILE (closeIdx IS NOT NULL AND
223              l_close_positions(closeIdx) - l_segment_start <= MAX_SEGMENT_LENGTH
224              ) LOOP
225         l_segment_end := l_close_positions(closeIdx); --current end of segment
226 
227         -- read the current keyword
228         l_word_start := l_open_positions(closeIdx);
229         l_word_end := l_close_positions(closeIdx);
230         l_len := l_word_end - l_word_start;
231         dbms_lob.read(p_document,
232                       l_len,
233                       l_word_start,
234                       l_word );
235         --dbms_output.put_line(l_word_start || ' ' || l_word_end || ' [' || l_word || ']');
236         l_distinct_words( LOWER(l_word) ) := 1; -- add keyword to map
237 
238         closeIdx := l_close_positions.NEXT(closeIdx);
239       END LOOP;
240 
241       -- Note: A Corner Case:
242       -- l_segment_end is null if the keyword length > MAX_SEGMENT_LENGTH
243 
244       if(l_distinct_words.COUNT > l_best_count AND
245          l_segment_end IS NOT NULL)
246       then
247         l_best_count := l_distinct_words.COUNT;
248         l_best_segment_start := l_segment_start;
249         l_best_segment_end := l_segment_end;
250       end if;
251       --dbms_output.put_line( '[' || l_segment_start || ',' || l_segment_end || '] = ' || l_distinct_words.COUNT);
252 
253       if(closeIdx IS NOT NULL AND
254          l_segment_end IS NOT NULL AND
255          l_close_positions(closeIdx) - l_segment_end >= MAX_SEGMENT_LENGTH)
256       then
257         openIdx := l_open_positions.NEXT(closeIdx); --look-ahead optimization
258         --dbms_output.put_line( 'Looking AHEAD');
262 
259       else
260         openIdx := l_open_positions.NEXT(openIdx);
261       end if;
263     END LOOP;
264 
265     -- could not find a best segment
266     if(l_best_segment_start IS NULL) then return NULL; end if;
267 
268     -- now calculate the context surrounding the snippet
269     -- find pre segment start position
270     l_pre_segment_start := l_best_segment_start - SEGMENT_PADDING;
271     if(l_pre_segment_start < 1) then
272       l_pre_segment_start := 1;
273     end if;
274     -- find the post segment end position
275     l_post_segment_end := l_best_segment_end + SEGMENT_PADDING;
276     l_len := dbms_lob.getlength(p_document);
277     if(l_post_segment_end > l_len) then
278       l_post_segment_end := l_len + 1; --last position + 1
279     end if;
280 
281     -- get the middle segment (the one with the densest keywords)
282     l_len := l_best_segment_end - l_best_segment_start;
283     dbms_lob.read(p_document,
284                     l_len,
285                     l_best_segment_start,
286                     l_best_segment );
287 
288     -- now get the prefix segment
289     l_len := l_best_segment_start - l_pre_segment_start;
290     if(l_len > 0) then
291       dbms_lob.read(p_document,
292                     l_len,
293                     l_pre_segment_start,
294                     l_pre_segment );
295     end if;
296 
297     -- now get the postfix segment
298     l_len := l_post_segment_end - l_best_segment_end;
299     if(l_len > 0) then
300       dbms_lob.read(p_document,
301                     l_len,
302                     l_best_segment_end,
303                     l_post_segment );
304     end if;
305 
306 --    dbms_output.put_line('PRE: [' || l_pre_segment || ']');
307 
308     -- Now refine the pre and post segments
309     -- try to match up to 8 words on either side (pre and post)
310 
311     --NOTE: cannot use a single expression ending in '$'.  This
312     -- has terrible performance, probably due to excessive backtracking
313     --l_pre_pattern := '([^[:space:]]+[[:space:]]*){1,8}$'; -- greedy
314     l_pre_pattern := '[[:space:]]+'; -- just match spaces
315 
316     -- Refine the pre segment
317     -- Loop find all the occurences spaces in the pre-segment
318     -- Trim the pre-segment to include the last N words.
319     l_len := 1;
320     i := 0;
321     WHILE(l_len > 0) LOOP
322       l_len := regexp_instr( l_pre_segment,
323                              l_pre_pattern,
324                              l_len,  -- start search at position
325                              1,  -- return the first occurence
326                     	     1   -- return the position of last character + 1
327                              );
328       IF (l_len > 0) THEN
329         l_space_positions(i) := l_len;
330         i := i + 1;
331       END IF;
332 
333     END LOOP;
334 
335     IF( i > 0 ) THEN
336       i := i - 8; -- get up to eight words preceding the main snippet
337       IF( i < 0 ) THEN i := 0; END IF;
338       l_pre_segment := SUBSTR (l_pre_segment, l_space_positions(i));
339     END IF;
340     -- Bug fix 11837564 Start
341      if(l_pre_segment is not null) then
342      l_length := length(l_pre_segment);
343 	   For i in 1.. l_length loop
344 	      l_char := substr(l_pre_segment,i,1);
345 	      If l_char = '<' then
346 		 l_left := l_left + 1;
347 	      End if;
348 	      If l_char = '>' then
349 		 l_right := l_right + 1;
350 	      End if;
351 	   end loop;
352      If l_right > l_left then
353            l_pre_segment := '<' || l_pre_segment;
354     End if;
355      End if;
356      -- Bug fix 11837564 End
357     /*l_len := regexp_instr( l_pre_segment,
358                            l_pre_pattern,
359                            1,  -- start search at position 1
360                            1,  -- return the first occurence
361                     	   0   -- return the position of first char in match
362                            );
363     IF(l_len > 0) THEN
364       l_pre_segment := SUBSTR ( l_pre_segment, l_len );
365     ELSE
366       l_pre_segment := '';
367     END IF;
368     */
369 
370 --    dbms_output.put_line('NEW PRE: [' || l_pre_segment || ']');
371 --    dbms_output.put_line('MID: [' || l_best_segment || ']');
372 --    dbms_output.put_line('POST: [' || l_post_segment || ']');
373 
374     l_post_pattern := '([[:space:]]*[^[:space:]]+){1,8}'; -- greedy
375     -- Refine the post segment
376     l_len := regexp_instr( l_post_segment,
377                            l_post_pattern,
378                            1,  -- start search at position 1
379                            1,  -- return the first occurence
380                            1   -- return the position of last character + 1
381                            );
382     IF(l_len > 0) THEN
383       l_post_segment := SUBSTR ( l_post_segment, 1, l_len - 1 );
384     ELSE
385       l_post_segment := '';
386     END IF;
387 
388 
389 --    dbms_output.put_line('NEW POST: [' || l_post_segment || ']');
390     -- combine the pre, mid, and post to form the KWIC segment
391     l_kwic_segment := l_pre_segment || l_best_segment || l_post_segment;
392 
393     -- surround keywords with the start and end tag, case insensitive
394     l_word := l_distinct_words_master.FIRST;
395     while(l_word IS NOT NULL) LOOP
396       --dbms_output.put_line(l_word || ' ==> ' || regexp_escape(l_word));
397       --cannot use regular replace(): need case insensitive match/replace
398       l_kwic_segment := regexp_replace(l_kwic_segment,
402                                        0,  --occurences (0 means ALL)
399                                        '(' || regexp_escape(l_word) || ')',
400                                        p_starttag || '\1' || p_endtag,
401                                        1,  --start index
403                                        'i' --case insensitive
404                                       );
405       l_word := l_distinct_words_master.NEXT(l_word);
406     END LOOP;
407 
408     return l_kwic_segment;
409   END get_segment_kwic;
410 
411   -- This function synthesizes a snippet from the SR notes of the given p_sr_id
412   -- and highlights it with the keywords (p_text_query)
413   -- using the given tags (p_starttag and p_endtag);
414   -- Note: this function does not include private notes of the SR
415   --
416   -- Parameters:
417   --   p_sr_id      NUMBER, service request id
418   --   p_text_query VARCHAR2, keywords used for highlighting
419   --   p_starttag   VARCHAR2, start highlighting tag
420   --   p_endtag     VARCHAR2, end highlighting tag
421   -- Returns:
422   --   Snippet with the keywords wrapped in tags
423   -- Since R12
424   FUNCTION get_sr_snippet
425   (
426   p_sr_id       IN NUMBER,
427   p_text_query  IN VARCHAR2,
428   p_starttag    IN VARCHAR2,
429   p_endtag      IN VARCHAR2
430   ) RETURN VARCHAR2
431   IS
432     l_document CLOB; -- the document from which the snippet will be calculated
433     l_data VARCHAR2(32767);
434     l_len NUMBER;
435 
436     CURSOR get_sr_notes(c_obj_id NUMBER) IS
437      SELECT notes, notes_detail
438       FROM jtf_notes_vl
439       WHERE source_object_code  = 'SR'
440       AND source_object_id = c_obj_id
441       AND note_status <> 'P'; -- IN ('E', 'I'); ignore private notes
442 
443   BEGIN
444     --Synthesize the document out of SR notes
445     dbms_lob.createtemporary(l_document, TRUE, dbms_lob.call);
446     FOR element IN get_sr_notes( p_sr_id ) LOOP
447       --append the note (VARCHAR)
448       l_data := element.notes;
449       IF(l_data IS NOT NULL AND
450         length (l_data) > 0) then
451         dbms_lob.writeappend(l_document, length(l_data)+3, '...' || l_data);
452       END IF;
453 
454       --append the note detail (CLOB)
455       IF(element.notes_detail IS NOT NULL AND
456          dbms_lob.getlength(element.notes_detail) > 0)
457       THEN
458         dbms_lob.writeappend(l_document, 3, '...');
459         dbms_lob.append(l_document, element.notes_detail);
460       END IF;
461 
462     END LOOP;
463 
464 --    dbms_output.put_line('DOC_LENGTH = ' || dbms_lob.getlength(l_document));
465 --    dbms_output.put_line(substr(l_document,1,255));
466 
467     return get_segment_kwic(p_text_query, p_starttag, p_endtag, l_document);
468 
469   END get_sr_snippet;
470 
471   --
472   -- This function synthesizes the solution statements of the given p_set_id
473   -- and highlights it with the keywords (p_text_query)
474   -- using the given tags (p_starttag and p_endtag);
475   -- Statements used in this function should respect solution security.
476   --
477   -- Parameters:
478   --   p_set_id     NUMBER, set id
479   --   p_text_query VARCHAR2, keywords used for highlighting
480   --   p_starttag   VARCHAR2, start highlighting tag
481   --   p_endtag     VARCHAR2, end highlighting tag
482   -- Returns:
483   --   Snippet with the keywords wrapped in tags
484   -- Since R12
485   --
486   FUNCTION get_set_snippet
487   (
488   p_set_id      IN NUMBER,
489   p_text_query  IN VARCHAR2,
490   p_starttag    IN VARCHAR2,
491   p_endtag      IN VARCHAR2
492   ) RETURN VARCHAR2
493   IS
494     l_document CLOB; -- the document from which the snippet will be calculated
495     l_data VARCHAR2(32767);
496     l_len NUMBER;
497 
498     CURSOR get_element_content(p_set_id NUMBER) IS
499       SELECT e.name, e.description
500 	   FROM cs_kb_set_eles b, cs_kb_elements_vl e
501         WHERE e.element_id = b.element_id
502 		 AND b.set_id = p_set_id
503 		 AND e.status = 'PUBLISHED'
504 		 AND e.access_level >= Cs_Kb_Security_Pvt.GET_STMT_VISIBILITY_POSITION;
505 
506   BEGIN
507     --Synthesize the document out of solution statements
508     dbms_lob.createtemporary(l_document, TRUE, dbms_lob.call);
509 
510     FOR element IN get_element_content( p_set_id ) LOOP
511       --append the statement
512       l_data := element.name;
513       IF(l_data IS NOT NULL AND
514         length (l_data) > 0) then
515         dbms_lob.writeappend(l_document, length(l_data)+3, '...' || l_data);
516       END IF;
517 
518       --append the description (CLOB)
519       IF(element.description IS NOT NULL AND
520          dbms_lob.getlength(element.description) > 0)
521       THEN
522         dbms_lob.writeappend(l_document, 3, '...');
523         dbms_lob.append(l_document, element.description);
524       END IF;
525 
526     END LOOP;
527 
528 --    dbms_output.put_line('DOC_LENGTH = ' || dbms_lob.getlength(l_document));
529 --    dbms_output.put_line(substr(l_document,1,255));
530 
531     return get_segment_kwic(p_text_query, p_starttag, p_endtag, l_document);
532 
533   END get_set_snippet;
534 
535 
536 end CS_KB_KWIC_UTIL  ;