eTRM uses javascript that opens and focuses a separate window to display reports. Your browser does not appear to support javascript. This should not prevent you from using eTRM but it does mean the window focus feature will not work. Please check the contents of other windows on your desktop if you click on a link that does not appear to do anything.
[Home] [Help]
PACKAGE BODY: APPS.CS_KB_KWIC_UTIL

Source


1 PACKAGE BODY CS_KB_KWIC_UTIL AS
2 /* $Header: cskbkwicb.pls 120.3 2006/05/18 16:22:00 klou noship $ */
3 
4 /*
5  *
6  * +======================================================================+
7  * |                Copyright (c) 2005 Oracle Corporation                 |
8  * |                   Redwood Shores, California, USA                    |
9  * |                        All rights reserved.                          |
10  * +======================================================================+
11  *
12  *   FILENAME
13  *     cskbkwicb.pls
14  *   PURPOSE
15  *     Creates the package body for CS_KB_KWIC_UTIL
16  *     CS_KB_KWIC_UTIL supports the Keywords In Context implementation
17  *
18  *   HISTORY
19  *   12-APR-2005 HMEI Created.
20  *   11-JUL-2005 HMEI Changed get_segment_kwic to use different regular
21  *                    expressions for pre/post segement. Removed
22  *                    string reverse logic
23  *   12-JUL-2005 HMEI Changed get_segment_kwic to use custom logic for
24  *                    determining pre-segment.  Cannot use a single
25  *                    pattern ending in '$', since performance suffers.
26  *   18-JUN-2006 KLOU Fix bug 5217204
27  *                    Change size of l_kwic_segment to 32000 as the regexp_replace
28  *                    can return up to 32K of content.
29  */
30 
31 
32   -- Maximum keyword segment length
33   MAX_SEGMENT_LENGTH CONSTANT NUMBER := 200;
34   -- Left and Right padding used when calculating KWIC
35   SEGMENT_PADDING CONSTANT NUMBER := MAX_SEGMENT_LENGTH/2;
36 
37   --
38   -- This API highlights all keywords (p_text_query) in a given document
39   -- (p_document) using the specified start and end tags. This API is intended
40   -- for highlighting a short document, e.g. solution title or SR summary.
41   --
42   -- Parameters:
43   --   p_text_query VARCHAR2, keywords used for highlighting
44   --   p_document   VARCHAR2, document to be highlighted
45   --   p_starttag   VARCHAR2, start highlighting tag
46   --   p_endtag     VARCHAR2, end highlighting tag
47   -- Returns:
48   --   The highlighted document
49   -- Since 12.0
50   --
51   FUNCTION highlight_text
52   (
53   p_text_query  IN VARCHAR2,
54   p_document    IN VARCHAR2,
55   p_starttag    IN VARCHAR2,
56   p_endtag      IN VARCHAR2
57   ) RETURN VARCHAR2
58   IS
59     l_result_segment CLOB;
60   BEGIN
61     CTX_DOC.POLICY_MARKUP (
62       policy_name => 'CS_KB_KWIC_POLICY',
63       document => p_document,
64       text_query => p_text_query,
65       starttag => p_starttag,
66       endtag => p_endtag,
67       restab => l_result_segment);
68     return l_result_segment;
69   END highlight_text;
70 
71   --
72   -- This Utility method escapes regular expression operators in a string.
73   -- This is useful if you want to literally match a string that may contain
74   -- a regular expression operator.  (e.g. 'e*trade')
75   --
76   -- Parameters:
77   --   p_string VARCHAR2, the string to escape
78   -- Returns:
79   --   string with regexp special characters escaped.
80   -- Since 12.0
81   --
82   FUNCTION regexp_escape
83   (
84   p_string IN VARCHAR2
85   ) RETURN VARCHAR2
86   IS
87     l_string VARCHAR2(32767); -- the final string after escaping
88     l_special_chars VARCHAR2(100); --chars to escape
89     l_char VARCHAR2(8);
90   BEGIN
91     l_string := p_string;
92     l_special_chars := '\[](){}*+$^?|.'; -- '\' must come first
93 
94     FOR i in 1..length(l_special_chars) LOOP
95       l_char := substr(l_special_chars, i, 1);
96       l_string := replace(l_string, l_char, '\' || l_char);
97     END LOOP;
98 
99     return l_string;
100   END regexp_escape;
101 
102   --
103   -- This API highlights all keywords (p_text_query) in a given document
104   -- (p_document) using the specified start and end tags. It scans the
105   -- document to construct a highlighted segment that contains the most
106   -- number of distinct keywords, with some surrounding context.
107   -- If no keywords found, will return NULL
108   --
109   -- Parameters:
110   --   p_text_query VARCHAR2, keywords used for highlighting
111   --   p_starttag   VARCHAR2, start highlighting tag
112   --   p_endtag     VARCHAR2, end highlighting tag
113   --   p_document   VARCHAR2, document to be highlighted
114   -- Returns:
115   --   The highlighted KWIC (Key Words In Context) segment,
116   --   or NULL if no keywords found.
117   -- Since 12.0
118   --
119   FUNCTION get_segment_kwic
120   (
121   p_text_query  IN VARCHAR2,
122   p_starttag    IN VARCHAR2,
123   p_endtag      IN VARCHAR2,
124   p_document    IN CLOB
125   ) RETURN VARCHAR2
126   IS
127     l_restab CTX_DOC.highlight_tab; -- Keyword match info returned by CTX_DOC
128 
129     TYPE number_table IS TABLE OF INTEGER INDEX BY PLS_INTEGER;
130     -- word occupies the range [open, close)
131     l_open_positions number_table;   -- open positions of matching words
132     l_close_positions number_table;  -- close positions of matching words
133 
134     i INTEGER;
135     idx INTEGER;
136     openIdx INTEGER; -- index for looping through the open positions
137     closeIdx INTEGER; -- index for looping through the close positions
138 
139     -- Map to hold distinct keywords
140     TYPE distinct_word_map IS TABLE OF INTEGER INDEX BY VARCHAR2(32767);
141     l_distinct_words_master distinct_word_map; -- master list of distinct words
142     l_distinct_words distinct_word_map; -- distinct words for a given segment
143 
144     l_word VARCHAR2(32767);
145     l_len INTEGER;
146 
147     --used when calculating the best segment
148     l_word_start INTEGER;
149     l_word_end INTEGER;
150     l_segment_start INTEGER;
151     l_segment_end INTEGER;
152 
153     --represent the best segment
154     l_best_count INTEGER := 0; --best # of distinct keywords in any segment
155     l_best_segment_start INTEGER := null;
156     l_best_segment_end INTEGER := null;
157     l_best_segment VARCHAR2(1000);
158 
159     --pre and post segments (the "Context" part of KWIC)
160     l_pre_segment VARCHAR2(1000);
161     l_post_segment VARCHAR2(1000);
162     l_pre_segment_start INTEGER;
163     l_post_segment_end INTEGER;
164 
165     l_space_positions number_table;  -- used to calculate pre-segement
166     l_pre_pattern VARCHAR2(50); -- regular expression
167     l_post_pattern VARCHAR2(50); -- regular expression
168 
169     -- (5217204)
170     l_kwic_segment VARCHAR2(32000); -- the final kwic segment
171   BEGIN
172 
173     -- positions of matching words returned in l_restab
174     CTX_DOC.POLICY_HIGHLIGHT (
175       policy_name => 'CS_KB_KWIC_POLICY',
176       document => p_document,
177       text_query => p_text_query,
178       restab => l_restab );
179 
180     -- form master list of distinct keywords found in the document
181     i := l_restab.FIRST;
182     WHILE (i IS NOT NULL) LOOP
183       l_open_positions(i) := l_restab(i).offset;
184       l_close_positions(i) := l_restab(i).offset + l_restab(i).length;
185 
186       dbms_lob.read(p_document,
187                     l_restab(i).length,
188                     l_restab(i).offset,
189                     l_word );
190 
191       l_distinct_words_master( LOWER(l_word) ) := 1; --keys are the keywords
192 
193       i := l_restab.NEXT(i);
194     END LOOP;
195 
196     --dbms_output.put_line('# Distinct keywords:' || l_distinct_words_master.COUNT);
197 
198     -- Find the best segment (that with the most distinct keywords)
199     -- Loop through open and close positions and form the largest segment
200     -- possible. For each segment, keep track of distinct keywords.  Note
201     -- each segment begins and ends with a matching keyword
202     openIdx := l_open_positions.FIRST;
203     WHILE (openIdx IS NOT NULL AND
204            l_best_count < l_distinct_words_master.COUNT
205           ) LOOP
206       --segment start and end positions for this iteration
207       l_segment_start := l_open_positions(openIdx);
208       l_segment_end := null;
209 
210       l_distinct_words.DELETE; -- clear out keyword map.
211 
212       closeIdx := openIdx; -- slide the closeIdx to determine the segment close.
213       WHILE (closeIdx IS NOT NULL AND
214              l_close_positions(closeIdx) - l_segment_start <= MAX_SEGMENT_LENGTH
215              ) LOOP
216         l_segment_end := l_close_positions(closeIdx); --current end of segment
217 
218         -- read the current keyword
219         l_word_start := l_open_positions(closeIdx);
220         l_word_end := l_close_positions(closeIdx);
221         l_len := l_word_end - l_word_start;
222         dbms_lob.read(p_document,
223                       l_len,
224                       l_word_start,
225                       l_word );
226         --dbms_output.put_line(l_word_start || ' ' || l_word_end || ' [' || l_word || ']');
227         l_distinct_words( LOWER(l_word) ) := 1; -- add keyword to map
228 
229         closeIdx := l_close_positions.NEXT(closeIdx);
230       END LOOP;
231 
232       -- Note: A Corner Case:
233       -- l_segment_end is null if the keyword length > MAX_SEGMENT_LENGTH
234 
235       if(l_distinct_words.COUNT > l_best_count AND
236          l_segment_end IS NOT NULL)
237       then
238         l_best_count := l_distinct_words.COUNT;
239         l_best_segment_start := l_segment_start;
240         l_best_segment_end := l_segment_end;
241       end if;
242       --dbms_output.put_line( '[' || l_segment_start || ',' || l_segment_end || '] = ' || l_distinct_words.COUNT);
243 
244       if(closeIdx IS NOT NULL AND
245          l_segment_end IS NOT NULL AND
246          l_close_positions(closeIdx) - l_segment_end >= MAX_SEGMENT_LENGTH)
247       then
248         openIdx := l_open_positions.NEXT(closeIdx); --look-ahead optimization
249         --dbms_output.put_line( 'Looking AHEAD');
250       else
251         openIdx := l_open_positions.NEXT(openIdx);
252       end if;
253 
254     END LOOP;
255 
256     -- could not find a best segment
257     if(l_best_segment_start IS NULL) then return NULL; end if;
258 
259     -- now calculate the context surrounding the snippet
260     -- find pre segment start position
261     l_pre_segment_start := l_best_segment_start - SEGMENT_PADDING;
262     if(l_pre_segment_start < 1) then
263       l_pre_segment_start := 1;
264     end if;
265     -- find the post segment end position
266     l_post_segment_end := l_best_segment_end + SEGMENT_PADDING;
267     l_len := dbms_lob.getlength(p_document);
268     if(l_post_segment_end > l_len) then
269       l_post_segment_end := l_len + 1; --last position + 1
270     end if;
271 
272     -- get the middle segment (the one with the densest keywords)
273     l_len := l_best_segment_end - l_best_segment_start;
274     dbms_lob.read(p_document,
275                     l_len,
276                     l_best_segment_start,
277                     l_best_segment );
278 
279     -- now get the prefix segment
280     l_len := l_best_segment_start - l_pre_segment_start;
281     if(l_len > 0) then
282       dbms_lob.read(p_document,
283                     l_len,
284                     l_pre_segment_start,
285                     l_pre_segment );
286     end if;
287 
288     -- now get the postfix segment
289     l_len := l_post_segment_end - l_best_segment_end;
290     if(l_len > 0) then
291       dbms_lob.read(p_document,
292                     l_len,
293                     l_best_segment_end,
294                     l_post_segment );
295     end if;
296 
297 --    dbms_output.put_line('PRE: [' || l_pre_segment || ']');
298 
299     -- Now refine the pre and post segments
300     -- try to match up to 8 words on either side (pre and post)
301 
302     --NOTE: cannot use a single expression ending in '$'.  This
303     -- has terrible performance, probably due to excessive backtracking
304     --l_pre_pattern := '([^[:space:]]+[[:space:]]*){1,8}$'; -- greedy
305     l_pre_pattern := '[[:space:]]+'; -- just match spaces
306 
307     -- Refine the pre segment
308     -- Loop find all the occurences spaces in the pre-segment
309     -- Trim the pre-segment to include the last N words.
310     l_len := 1;
311     i := 0;
312     WHILE(l_len > 0) LOOP
313       l_len := regexp_instr( l_pre_segment,
314                              l_pre_pattern,
315                              l_len,  -- start search at position
316                              1,  -- return the first occurence
317                     	     1   -- return the position of last character + 1
318                              );
319       IF (l_len > 0) THEN
320         l_space_positions(i) := l_len;
321         i := i + 1;
322       END IF;
323 
324     END LOOP;
325 
326     IF( i > 0 ) THEN
327       i := i - 8; -- get up to eight words preceding the main snippet
328       IF( i < 0 ) THEN i := 0; END IF;
329       l_pre_segment := SUBSTR (l_pre_segment, l_space_positions(i));
330     END IF;
331 
332     /*l_len := regexp_instr( l_pre_segment,
333                            l_pre_pattern,
334                            1,  -- start search at position 1
335                            1,  -- return the first occurence
336                     	   0   -- return the position of first char in match
337                            );
338     IF(l_len > 0) THEN
339       l_pre_segment := SUBSTR ( l_pre_segment, l_len );
340     ELSE
341       l_pre_segment := '';
342     END IF;
343     */
344 
345 --    dbms_output.put_line('NEW PRE: [' || l_pre_segment || ']');
346 --    dbms_output.put_line('MID: [' || l_best_segment || ']');
347 --    dbms_output.put_line('POST: [' || l_post_segment || ']');
348 
349     l_post_pattern := '([[:space:]]*[^[:space:]]+){1,8}'; -- greedy
350     -- Refine the post segment
351     l_len := regexp_instr( l_post_segment,
352                            l_post_pattern,
353                            1,  -- start search at position 1
354                            1,  -- return the first occurence
355                            1   -- return the position of last character + 1
356                            );
357     IF(l_len > 0) THEN
358       l_post_segment := SUBSTR ( l_post_segment, 1, l_len - 1 );
359     ELSE
360       l_post_segment := '';
361     END IF;
362 
363 
364 --    dbms_output.put_line('NEW POST: [' || l_post_segment || ']');
365     -- combine the pre, mid, and post to form the KWIC segment
366     l_kwic_segment := l_pre_segment || l_best_segment || l_post_segment;
367 
368     -- surround keywords with the start and end tag, case insensitive
369     l_word := l_distinct_words_master.FIRST;
370     while(l_word IS NOT NULL) LOOP
371       --dbms_output.put_line(l_word || ' ==> ' || regexp_escape(l_word));
375                                        p_starttag || '\1' || p_endtag,
372       --cannot use regular replace(): need case insensitive match/replace
373       l_kwic_segment := regexp_replace(l_kwic_segment,
374                                        '(' || regexp_escape(l_word) || ')',
376                                        1,  --start index
377                                        0,  --occurences (0 means ALL)
378                                        'i' --case insensitive
379                                       );
380       l_word := l_distinct_words_master.NEXT(l_word);
381     END LOOP;
382 
383     return l_kwic_segment;
384   END get_segment_kwic;
385 
386   -- This function synthesizes a snippet from the SR notes of the given p_sr_id
387   -- and highlights it with the keywords (p_text_query)
388   -- using the given tags (p_starttag and p_endtag);
389   -- Note: this function does not include private notes of the SR
390   --
391   -- Parameters:
392   --   p_sr_id      NUMBER, service request id
393   --   p_text_query VARCHAR2, keywords used for highlighting
394   --   p_starttag   VARCHAR2, start highlighting tag
395   --   p_endtag     VARCHAR2, end highlighting tag
396   -- Returns:
397   --   Snippet with the keywords wrapped in tags
398   -- Since R12
399   FUNCTION get_sr_snippet
400   (
401   p_sr_id       IN NUMBER,
402   p_text_query  IN VARCHAR2,
403   p_starttag    IN VARCHAR2,
404   p_endtag      IN VARCHAR2
405   ) RETURN VARCHAR2
406   IS
407     l_document CLOB; -- the document from which the snippet will be calculated
408     l_data VARCHAR2(32767);
409     l_len NUMBER;
410 
411     CURSOR get_sr_notes(c_obj_id NUMBER) IS
412      SELECT notes, notes_detail
413       FROM jtf_notes_vl
414       WHERE source_object_code  = 'SR'
415       AND source_object_id = c_obj_id
416       AND note_status <> 'P'; -- IN ('E', 'I'); ignore private notes
417 
418   BEGIN
419     --Synthesize the document out of SR notes
420     dbms_lob.createtemporary(l_document, TRUE, dbms_lob.call);
421     FOR element IN get_sr_notes( p_sr_id ) LOOP
422       --append the note (VARCHAR)
423       l_data := element.notes;
424       IF(l_data IS NOT NULL AND
425         length (l_data) > 0) then
426         dbms_lob.writeappend(l_document, length(l_data)+3, '...' || l_data);
427       END IF;
428 
429       --append the note detail (CLOB)
430       IF(element.notes_detail IS NOT NULL AND
431          dbms_lob.getlength(element.notes_detail) > 0)
432       THEN
433         dbms_lob.writeappend(l_document, 3, '...');
434         dbms_lob.append(l_document, element.notes_detail);
435       END IF;
436 
437     END LOOP;
438 
439 --    dbms_output.put_line('DOC_LENGTH = ' || dbms_lob.getlength(l_document));
440 --    dbms_output.put_line(substr(l_document,1,255));
441 
442     return get_segment_kwic(p_text_query, p_starttag, p_endtag, l_document);
443 
444   END get_sr_snippet;
445 
446   --
447   -- This function synthesizes the solution statements of the given p_set_id
448   -- and highlights it with the keywords (p_text_query)
449   -- using the given tags (p_starttag and p_endtag);
450   -- Statements used in this function should respect solution security.
451   --
452   -- Parameters:
453   --   p_set_id     NUMBER, set id
454   --   p_text_query VARCHAR2, keywords used for highlighting
455   --   p_starttag   VARCHAR2, start highlighting tag
456   --   p_endtag     VARCHAR2, end highlighting tag
457   -- Returns:
458   --   Snippet with the keywords wrapped in tags
459   -- Since R12
460   --
461   FUNCTION get_set_snippet
462   (
463   p_set_id      IN NUMBER,
464   p_text_query  IN VARCHAR2,
465   p_starttag    IN VARCHAR2,
466   p_endtag      IN VARCHAR2
467   ) RETURN VARCHAR2
468   IS
469     l_document CLOB; -- the document from which the snippet will be calculated
470     l_data VARCHAR2(32767);
471     l_len NUMBER;
472 
473     CURSOR get_element_content(p_set_id NUMBER) IS
474       SELECT e.name, e.description
475 	   FROM cs_kb_set_eles b, cs_kb_elements_vl e
476         WHERE e.element_id = b.element_id
477 		 AND b.set_id = p_set_id
478 		 AND e.status = 'PUBLISHED'
479 		 AND e.access_level >= Cs_Kb_Security_Pvt.GET_STMT_VISIBILITY_POSITION;
480 
481   BEGIN
482     --Synthesize the document out of solution statements
483     dbms_lob.createtemporary(l_document, TRUE, dbms_lob.call);
484 
485     FOR element IN get_element_content( p_set_id ) LOOP
486       --append the statement
487       l_data := element.name;
488       IF(l_data IS NOT NULL AND
489         length (l_data) > 0) then
490         dbms_lob.writeappend(l_document, length(l_data)+3, '...' || l_data);
491       END IF;
492 
493       --append the description (CLOB)
494       IF(element.description IS NOT NULL AND
495          dbms_lob.getlength(element.description) > 0)
496       THEN
497         dbms_lob.writeappend(l_document, 3, '...');
498         dbms_lob.append(l_document, element.description);
499       END IF;
500 
501     END LOOP;
502 
503 --    dbms_output.put_line('DOC_LENGTH = ' || dbms_lob.getlength(l_document));
504 --    dbms_output.put_line(substr(l_document,1,255));
505 
506     return get_segment_kwic(p_text_query, p_starttag, p_endtag, l_document);
507 
508   END get_set_snippet;
509 
510 
511 end CS_KB_KWIC_UTIL  ;