eTRM uses javascript that opens and focuses a separate window to display reports. Your browser does not appear to support javascript. This should not prevent you from using eTRM but it does mean the window focus feature will not work. Please check the contents of other windows on your desktop if you click on a link that does not appear to do anything.
[Home] [Help]
PACKAGE BODY: APPS.CS_KB_KWIC_UTIL

Source


4 /*
1 PACKAGE BODY CS_KB_KWIC_UTIL AS
2 /* $Header: cskbkwicb.pls 120.5.12020000.2 2012/07/05 05:37:08 gasankar ship $ */
3 
5  *
6  * +======================================================================+
7  * |                Copyright (c) 2005 Oracle Corporation                 |
8  * |                   Redwood Shores, California, USA                    |
9  * |                        All rights reserved.                          |
10  * +======================================================================+
11  *
12  *   FILENAME
13  *     cskbkwicb.pls
14  *   PURPOSE
15  *     Creates the package body for CS_KB_KWIC_UTIL
16  *     CS_KB_KWIC_UTIL supports the Keywords In Context implementation
17  *
18  *   HISTORY
19  *   12-APR-2005 HMEI Created.
20  *   11-JUL-2005 HMEI Changed get_segment_kwic to use different regular
21  *                    expressions for pre/post segement. Removed
22  *                    string reverse logic
23  *   12-JUL-2005 HMEI Changed get_segment_kwic to use custom logic for
24  *                    determining pre-segment.  Cannot use a single
25  *                    pattern ending in '$', since performance suffers.
26  *   18-JUN-2006 KLOU Fix bug 5217204
27  *                    Change size of l_kwic_segment to 32000 as the regexp_replace
28  *                    can return up to 32K of content.
29  *    06-May-2011 isugavan Bug 11837564 - WEIRD CHARACTERS SHOWS UP IN THE
30  *                                        SEARCH RESULT WHEN WE SEARCH KNOWLEDGE BASE DOC
31  */
32 
33 
34   -- Maximum keyword segment length
35   MAX_SEGMENT_LENGTH CONSTANT NUMBER := 200;
36   -- Left and Right padding used when calculating KWIC
37   SEGMENT_PADDING CONSTANT NUMBER := MAX_SEGMENT_LENGTH/2;
38 
39   --
40   -- This API highlights all keywords (p_text_query) in a given document
41   -- (p_document) using the specified start and end tags. This API is intended
42   -- for highlighting a short document, e.g. solution title or SR summary.
43   --
44   -- Parameters:
45   --   p_text_query VARCHAR2, keywords used for highlighting
46   --   p_document   VARCHAR2, document to be highlighted
47   --   p_starttag   VARCHAR2, start highlighting tag
48   --   p_endtag     VARCHAR2, end highlighting tag
49   -- Returns:
50   --   The highlighted document
51   -- Since 12.0
52   --
53   FUNCTION highlight_text
54   (
55   p_text_query  IN VARCHAR2,
56   p_document    IN VARCHAR2,
57   p_starttag    IN VARCHAR2,
58   p_endtag      IN VARCHAR2
59   ) RETURN VARCHAR2
60   IS
61     l_result_segment CLOB;
62   BEGIN
63     CTX_DOC.POLICY_MARKUP (
64       policy_name => 'CS_KB_KWIC_POLICY',
65       document => p_document,
66       text_query => p_text_query,
67       starttag => p_starttag,
68       endtag => p_endtag,
69       restab => l_result_segment);
70     return l_result_segment;
71   END highlight_text;
72 
73   --
74   -- This Utility method escapes regular expression operators in a string.
75   -- This is useful if you want to literally match a string that may contain
76   -- a regular expression operator.  (e.g. 'e*trade')
77   --
78   -- Parameters:
79   --   p_string VARCHAR2, the string to escape
80   -- Returns:
81   --   string with regexp special characters escaped.
82   -- Since 12.0
83   --
84   FUNCTION regexp_escape
85   (
86   p_string IN VARCHAR2
87   ) RETURN VARCHAR2
88   IS
89     l_string VARCHAR2(32767); -- the final string after escaping
90     l_special_chars VARCHAR2(100); --chars to escape
91     l_char VARCHAR2(8);
92   BEGIN
93     l_string := p_string;
94     l_special_chars := '\[](){}*+$^?|.'; -- '\' must come first
95 
96     FOR i in 1..length(l_special_chars) LOOP
97       l_char := substr(l_special_chars, i, 1);
98       l_string := replace(l_string, l_char, '\' || l_char);
99     END LOOP;
100 
101     return l_string;
102   END regexp_escape;
103 
104   --
105   -- This API highlights all keywords (p_text_query) in a given document
106   -- (p_document) using the specified start and end tags. It scans the
107   -- document to construct a highlighted segment that contains the most
108   -- number of distinct keywords, with some surrounding context.
109   -- If no keywords found, will return NULL
110   --
111   -- Parameters:
112   --   p_text_query VARCHAR2, keywords used for highlighting
113   --   p_starttag   VARCHAR2, start highlighting tag
114   --   p_endtag     VARCHAR2, end highlighting tag
115   --   p_document   VARCHAR2, document to be highlighted
116   -- Returns:
120   --
117   --   The highlighted KWIC (Key Words In Context) segment,
118   --   or NULL if no keywords found.
119   -- Since 12.0
121   FUNCTION get_segment_kwic
122   (
123   p_text_query  IN VARCHAR2,
124   p_starttag    IN VARCHAR2,
125   p_endtag      IN VARCHAR2,
126   p_document    IN CLOB
127   ) RETURN VARCHAR2
128   IS
129     l_restab CTX_DOC.highlight_tab; -- Keyword match info returned by CTX_DOC
130 
131     TYPE number_table IS TABLE OF INTEGER INDEX BY PLS_INTEGER;
132     -- word occupies the range [open, close)
133     l_open_positions number_table;   -- open positions of matching words
134     l_close_positions number_table;  -- close positions of matching words
135 
136     i INTEGER;
137     idx INTEGER;
138     openIdx INTEGER; -- index for looping through the open positions
139     closeIdx INTEGER; -- index for looping through the close positions
140 
141     -- Map to hold distinct keywords
142     TYPE distinct_word_map IS TABLE OF INTEGER INDEX BY VARCHAR2(32767);
143     l_distinct_words_master distinct_word_map; -- master list of distinct words
144     l_distinct_words distinct_word_map; -- distinct words for a given segment
145 
146     l_word VARCHAR2(32767);
147     l_len INTEGER;
148 
149     --used when calculating the best segment
150     l_word_start INTEGER;
151     l_word_end INTEGER;
152     l_segment_start INTEGER;
153     l_segment_end INTEGER;
154 
155     --represent the best segment
156     l_best_count INTEGER := 0; --best # of distinct keywords in any segment
157     l_best_segment_start INTEGER := null;
158     l_best_segment_end INTEGER := null;
159     l_best_segment VARCHAR2(1000);
160 
161     --pre and post segments (the "Context" part of KWIC)
162     l_pre_segment VARCHAR2(1000);
163     l_post_segment VARCHAR2(1000);
164     l_pre_segment_start INTEGER;
165     l_post_segment_end INTEGER;
166 
167     l_space_positions number_table;  -- used to calculate pre-segement
168     l_pre_pattern VARCHAR2(50); -- regular expression
169     l_post_pattern VARCHAR2(50); -- regular expression
170 
171     -- (5217204)
172     l_kwic_segment VARCHAR2(32000); -- the final kwic segment
173 
174     -- Bug fix 11837564
175      l_char   varchar2(20);
176    l_left Number := 0;
177    l_right Number := 0;
178    l_length Number;
179 
180   BEGIN
181 
182     -- positions of matching words returned in l_restab
183     CTX_DOC.POLICY_HIGHLIGHT (
184       policy_name => 'CS_KB_KWIC_POLICY',
185       document => p_document,
186       text_query => p_text_query,
187       restab => l_restab );
188 
189     -- form master list of distinct keywords found in the document
190     i := l_restab.FIRST;
191     WHILE (i IS NOT NULL) LOOP
192       l_open_positions(i) := l_restab(i).offset;
193       l_close_positions(i) := l_restab(i).offset + l_restab(i).length;
194 
195       dbms_lob.read(p_document,
196                     l_restab(i).length,
197                     l_restab(i).offset,
198                     l_word );
199 
200       l_distinct_words_master( LOWER(l_word) ) := 1; --keys are the keywords
201 
202       i := l_restab.NEXT(i);
203     END LOOP;
204 
205     --dbms_output.put_line('# Distinct keywords:' || l_distinct_words_master.COUNT);
206 
207     -- Find the best segment (that with the most distinct keywords)
208     -- Loop through open and close positions and form the largest segment
209     -- possible. For each segment, keep track of distinct keywords.  Note
210     -- each segment begins and ends with a matching keyword
211     openIdx := l_open_positions.FIRST;
212     WHILE (openIdx IS NOT NULL AND
213            l_best_count < l_distinct_words_master.COUNT
214           ) LOOP
215       --segment start and end positions for this iteration
216       l_segment_start := l_open_positions(openIdx);
217       l_segment_end := null;
218 
219       l_distinct_words.DELETE; -- clear out keyword map.
220 
221       closeIdx := openIdx; -- slide the closeIdx to determine the segment close.
222       WHILE (closeIdx IS NOT NULL AND
223              l_close_positions(closeIdx) - l_segment_start <= MAX_SEGMENT_LENGTH
224              ) LOOP
225         l_segment_end := l_close_positions(closeIdx); --current end of segment
226 
227         -- read the current keyword
228         l_word_start := l_open_positions(closeIdx);
229         l_word_end := l_close_positions(closeIdx);
230         l_len := l_word_end - l_word_start;
231         dbms_lob.read(p_document,
232                       l_len,
233                       l_word_start,
234                       l_word );
235         --dbms_output.put_line(l_word_start || ' ' || l_word_end || ' [' || l_word || ']');
236         l_distinct_words( LOWER(l_word) ) := 1; -- add keyword to map
237 
238         closeIdx := l_close_positions.NEXT(closeIdx);
239       END LOOP;
240 
241       -- Note: A Corner Case:
242       -- l_segment_end is null if the keyword length > MAX_SEGMENT_LENGTH
243 
244       if(l_distinct_words.COUNT > l_best_count AND
245          l_segment_end IS NOT NULL)
246       then
247         l_best_count := l_distinct_words.COUNT;
248         l_best_segment_start := l_segment_start;
249         l_best_segment_end := l_segment_end;
250       end if;
251       --dbms_output.put_line( '[' || l_segment_start || ',' || l_segment_end || '] = ' || l_distinct_words.COUNT);
252 
253       if(closeIdx IS NOT NULL AND
254          l_segment_end IS NOT NULL AND
255          l_close_positions(closeIdx) - l_segment_end >= MAX_SEGMENT_LENGTH)
256       then
257         openIdx := l_open_positions.NEXT(closeIdx); --look-ahead optimization
258         --dbms_output.put_line( 'Looking AHEAD');
262 
259       else
260         openIdx := l_open_positions.NEXT(openIdx);
261       end if;
263     END LOOP;
264 
265     -- could not find a best segment
266     if(l_best_segment_start IS NULL) then return NULL; end if;
267 
268     -- now calculate the context surrounding the snippet
269     -- find pre segment start position
270     l_pre_segment_start := l_best_segment_start - SEGMENT_PADDING;
271     if(l_pre_segment_start < 1) then
272       l_pre_segment_start := 1;
273     end if;
274     -- find the post segment end position
275     l_post_segment_end := l_best_segment_end + SEGMENT_PADDING;
276     l_len := dbms_lob.getlength(p_document);
277     if(l_post_segment_end > l_len) then
278       l_post_segment_end := l_len + 1; --last position + 1
279     end if;
280 
281     -- get the middle segment (the one with the densest keywords)
282     l_len := l_best_segment_end - l_best_segment_start;
283     dbms_lob.read(p_document,
284                     l_len,
285                     l_best_segment_start,
286                     l_best_segment );
287 
288     -- now get the prefix segment
289     l_len := l_best_segment_start - l_pre_segment_start;
290     if(l_len > 0) then
291       dbms_lob.read(p_document,
292                     l_len,
293                     l_pre_segment_start,
294                     l_pre_segment );
295     end if;
296 
297     -- now get the postfix segment
298     l_len := l_post_segment_end - l_best_segment_end;
299     if(l_len > 0) then
300       dbms_lob.read(p_document,
301                     l_len,
302                     l_best_segment_end,
303                     l_post_segment );
304     end if;
305 
306 --    dbms_output.put_line('PRE: [' || l_pre_segment || ']');
307 
308     -- Now refine the pre and post segments
309     -- try to match up to 8 words on either side (pre and post)
310 
311     --NOTE: cannot use a single expression ending in '$'.  This
312     -- has terrible performance, probably due to excessive backtracking
313     --l_pre_pattern := '([^[:space:]]+[[:space:]]*){1,8}$'; -- greedy
314     l_pre_pattern := '[[:space:]]+'; -- just match spaces
315 
316     -- Refine the pre segment
317     -- Loop find all the occurences spaces in the pre-segment
318     -- Trim the pre-segment to include the last N words.
319     l_len := 1;
320     i := 0;
321     WHILE(l_len > 0) LOOP
322       l_len := regexp_instr( l_pre_segment,
323                              l_pre_pattern,
324                              l_len,  -- start search at position
325                              1,  -- return the first occurence
326                     	     1   -- return the position of last character + 1
327                              );
328       IF (l_len > 0) THEN
329         l_space_positions(i) := l_len;
330         i := i + 1;
331       END IF;
332 
333     END LOOP;
334 
335     IF( i > 0 ) THEN
336       i := i - 8; -- get up to eight words preceding the main snippet
337       IF( i < 0 ) THEN i := 0; END IF;
338       l_pre_segment := SUBSTR (l_pre_segment, l_space_positions(i));
339     END IF;
340     -- Bug fix 11837564 Start
341      if(l_pre_segment is not null) then
342      l_length := length(l_pre_segment);
343 	   For i in 1.. l_length loop
344 	      l_char := substr(l_pre_segment,i,1);
345 	      If l_char = '<' then
346 		 l_left := l_left + 1;
347 	      End if;
348 	      If l_char = '>' then
349 		 l_right := l_right + 1;
350 	      End if;
351 	   end loop;
352      If l_right > l_left then
353            l_pre_segment := '<' || l_pre_segment;
354     End if;
355      End if;
356      -- Bug fix 11837564 End
357     /*l_len := regexp_instr( l_pre_segment,
358                            l_pre_pattern,
359                            1,  -- start search at position 1
360                            1,  -- return the first occurence
361                     	   0   -- return the position of first char in match
362                            );
363     IF(l_len > 0) THEN
364       l_pre_segment := SUBSTR ( l_pre_segment, l_len );
365     ELSE
366       l_pre_segment := '';
367     END IF;
368     */
369 
370 --    dbms_output.put_line('NEW PRE: [' || l_pre_segment || ']');
371 --    dbms_output.put_line('MID: [' || l_best_segment || ']');
372 --    dbms_output.put_line('POST: [' || l_post_segment || ']');
373 
374     l_post_pattern := '([[:space:]]*[^[:space:]]+){1,8}'; -- greedy
375     -- Refine the post segment
376     l_len := regexp_instr( l_post_segment,
377                            l_post_pattern,
378                            1,  -- start search at position 1
379                            1,  -- return the first occurence
380                            1   -- return the position of last character + 1
381                            );
382     IF(l_len > 0) THEN
383       l_post_segment := SUBSTR ( l_post_segment, 1, l_len - 1 );
384     ELSE
385       l_post_segment := '';
386     END IF;
387 
388 
389 --    dbms_output.put_line('NEW POST: [' || l_post_segment || ']');
390     -- combine the pre, mid, and post to form the KWIC segment
391     l_kwic_segment := l_pre_segment || l_best_segment || l_post_segment;
392 
393     -- surround keywords with the start and end tag, case insensitive
394     l_word := l_distinct_words_master.FIRST;
395     while(l_word IS NOT NULL) LOOP
396       --dbms_output.put_line(l_word || ' ==> ' || regexp_escape(l_word));
397       --cannot use regular replace(): need case insensitive match/replace
398       l_kwic_segment := regexp_replace(l_kwic_segment,
402                                        0,  --occurences (0 means ALL)
399                                        '(' || regexp_escape(l_word) || ')',
400                                        p_starttag || '\1' || p_endtag,
401                                        1,  --start index
403                                        'i' --case insensitive
404                                       );
405       l_word := l_distinct_words_master.NEXT(l_word);
406     END LOOP;
407 
408     return l_kwic_segment;
409   END get_segment_kwic;
410 
411   -- This function synthesizes a snippet from the SR notes of the given p_sr_id
412   -- and highlights it with the keywords (p_text_query)
413   -- using the given tags (p_starttag and p_endtag);
414   -- Note: this function does not include private notes of the SR
415   --
416   -- Parameters:
417   --   p_sr_id      NUMBER, service request id
418   --   p_text_query VARCHAR2, keywords used for highlighting
419   --   p_starttag   VARCHAR2, start highlighting tag
420   --   p_endtag     VARCHAR2, end highlighting tag
421   -- Returns:
422   --   Snippet with the keywords wrapped in tags
423   -- Since R12
424   FUNCTION get_sr_snippet
425   (
426   p_sr_id       IN NUMBER,
427   p_text_query  IN VARCHAR2,
428   p_starttag    IN VARCHAR2,
429   p_endtag      IN VARCHAR2
430   ) RETURN VARCHAR2
431   IS
432     l_document CLOB; -- the document from which the snippet will be calculated
433     l_data VARCHAR2(32767);
434     l_len NUMBER;
435 
436     CURSOR get_sr_notes(c_obj_id NUMBER) IS
437      SELECT notes, notes_detail
438       FROM jtf_notes_vl
439       WHERE source_object_code  = 'SR'
440       AND source_object_id = c_obj_id
441       AND note_status <> 'P'; -- IN ('E', 'I'); ignore private notes
442 
443   BEGIN
444     --Synthesize the document out of SR notes
445     dbms_lob.createtemporary(l_document, TRUE, dbms_lob.call);
446     FOR element IN get_sr_notes( p_sr_id ) LOOP
447       --append the note (VARCHAR)
448       l_data := element.notes;
449       IF(l_data IS NOT NULL AND
450         length (l_data) > 0) then
451         dbms_lob.writeappend(l_document, length(l_data)+3, '...' || l_data);
452       END IF;
453 
454       --append the note detail (CLOB)
455       IF(element.notes_detail IS NOT NULL AND
456          dbms_lob.getlength(element.notes_detail) > 0)
457       THEN
458         dbms_lob.writeappend(l_document, 3, '...');
459         dbms_lob.append(l_document, element.notes_detail);
460       END IF;
461 
462     END LOOP;
463 
464 --    dbms_output.put_line('DOC_LENGTH = ' || dbms_lob.getlength(l_document));
465 --    dbms_output.put_line(substr(l_document,1,255));
466 
467     return get_segment_kwic(p_text_query, p_starttag, p_endtag, l_document);
468 
469   END get_sr_snippet;
470 
471   --
472   -- This function synthesizes the solution statements of the given p_set_id
473   -- and highlights it with the keywords (p_text_query)
474   -- using the given tags (p_starttag and p_endtag);
475   -- Statements used in this function should respect solution security.
476   --
477   -- Parameters:
478   --   p_set_id     NUMBER, set id
479   --   p_text_query VARCHAR2, keywords used for highlighting
480   --   p_starttag   VARCHAR2, start highlighting tag
481   --   p_endtag     VARCHAR2, end highlighting tag
482   -- Returns:
483   --   Snippet with the keywords wrapped in tags
484   -- Since R12
485   --
486   FUNCTION get_set_snippet
487   (
488   p_set_id      IN NUMBER,
489   p_text_query  IN VARCHAR2,
490   p_starttag    IN VARCHAR2,
491   p_endtag      IN VARCHAR2
492   ) RETURN VARCHAR2
493   IS
494     l_document CLOB; -- the document from which the snippet will be calculated
495     l_data VARCHAR2(32767);
496     l_len NUMBER;
497 
498     CURSOR get_element_content(p_set_id NUMBER) IS
499       SELECT e.name, e.description
500 	   FROM cs_kb_set_eles b, cs_kb_elements_vl e
501         WHERE e.element_id = b.element_id
502 		 AND b.set_id = p_set_id
503 		 AND e.status = 'PUBLISHED'
504 		 AND e.access_level >= Cs_Kb_Security_Pvt.GET_STMT_VISIBILITY_POSITION;
505 
506   BEGIN
507     --Synthesize the document out of solution statements
508     dbms_lob.createtemporary(l_document, TRUE, dbms_lob.call);
509 
510     FOR element IN get_element_content( p_set_id ) LOOP
511       --append the statement
512       l_data := element.name;
513       IF(l_data IS NOT NULL AND
514         length (l_data) > 0) then
515         dbms_lob.writeappend(l_document, length(l_data)+3, '...' || l_data);
516       END IF;
517 
518       --append the description (CLOB)
519       IF(element.description IS NOT NULL AND
520          dbms_lob.getlength(element.description) > 0)
521       THEN
522         dbms_lob.writeappend(l_document, 3, '...');
523         dbms_lob.append(l_document, element.description);
524       END IF;
525 
526     END LOOP;
527 
528 --    dbms_output.put_line('DOC_LENGTH = ' || dbms_lob.getlength(l_document));
529 --    dbms_output.put_line(substr(l_document,1,255));
530 
531     return get_segment_kwic(p_text_query, p_starttag, p_endtag, l_document);
532 
533   END get_set_snippet;
534 
535 
536 end CS_KB_KWIC_UTIL  ;