1 PACKAGE BODY dbms_xdbt AS
2
3 -----------------------------------------------------------------------
4 -- CONFIGURATION SETTINGS
5 --
6 -- This section contains the default settings for the index. This
7 -- section should be changed as appropriate.
8 --
9 -----------------------------------------------------------------------
10
11 --
12 -- The following are the default values for the policy_filter
13 -- used to filter out HTML, from BLOB into CLOB.
14 -- 'Policy-based' procedures do not make use of the Ctx index.
15 --
16 PolicyFilterName VARCHAR2(32) := IndexName || '_POLICYFILTER';
17 BlobLanguage VARCHAR2(32) := NULL;
18 BlobFormat VARCHAR2(32) := 'BINARY';
19 BlobCharset VARCHAR2(32) := NULL;
20
21 --
22 -- The following are the default names for the various preferences
23 --
24 DatastorePref VARCHAR2(32) := IndexName || '_DATASTORE';
25 AutoFilterPref VARCHAR2(32) := IndexName || '_AUTO_FILTER';
26 FilterPref VARCHAR2(32) := IndexName || '_FILTER';
27 SectionGroupPref VARCHAR2(32) := IndexName || '_SECTIONGROUP';
28 MultiLexerPref VARCHAR2(32) := IndexName || '_LEXER';
29 DefaultLexerPref VARCHAR2(32) := IndexName || '_DEFAULT_LEXER';
30 WordlistPref VARCHAR2(32) := IndexName || '_WORDLIST';
31 StoplistPref VARCHAR2(32) := IndexName || '_STOPLIST';
32 StoragePref VARCHAR2(32) := IndexName || '_STORAGE';
33
34 LanguageColumn VARCHAR2(32) := '"XMLDATA"."LANGUAGE"';
35
36 -----------------------------------------------------------------------
37 -- PRIVATE ROUTINES
38 -----------------------------------------------------------------------
39
40 --
41 -- This function determines the filtering required by a document
42 -- based on its mime type.
43 --
44 FUNCTION getFilterOption(mimeType IN VARCHAR2) RETURN PLS_INTEGER;
45
46 --
47 -- This function determines the internal character setid given an
48 -- external character set name
49 --
50 FUNCTION getCharsetId(extCSName IN VARCHAR2) RETURN NUMBER;
51
52 -- This function determines if a sync is needed.
53 FUNCTION checkSync(myIndexName VARCHAR2, myMaxPendingCount NUMBER)
54 RETURN BOOLEAN;
55
56 --
57 -- This function creates the index
58 --
59 PROCEDURE createIndex IS
60 sqlstr VARCHAR2(4000);
61 BEGIN
62
63 -- Turn rowid logging on if requested
64 IF LogFile IS NOT NULL THEN
65 -- Turn rowid logging on during a create index
66 BEGIN
67 ctxsys.ctx_output.end_log;
68 EXCEPTION WHEN OTHERS THEN NULL;
69 END;
70 ctxsys.ctx_output.start_log(LogFile);
71 ctxsys.ctx_output.add_event(ctxsys.ctx_output.EVENT_INDEX_PRINT_ROWID);
72 END IF;
73
74 -- Actually create the index
75 sqlstr := 'create index ' || IndexName ||
76 ' on xdb.xdb$resource p(value(p))' ||
77 ' indextype is ctxsys.context' ||
78 ' parameters (''datastore ' || DatastorePref ||
79 ' storage ' || StoragePref ||
80 ' filter ' || FilterPref ||
81 ' section group ' || SectionGroupPref ;
82
83 if UseMultiLexer = true THEN
84 sqlstr := sqlstr || ' language column ' || LanguageColumn ||
85 ' lexer ' || MultiLexerPref;
86 ELSE
87 sqlstr := sqlstr || ' lexer ' || DefaultLexerPref;
88 END IF;
89 sqlstr := sqlstr || ' wordlist ' || WordlistPref ||
90 ' stoplist ' || StoplistPref ||
91 ' memory ' || IndexMemory ||
92 ''')';
93
94 execute immediate sqlstr;
95 END createIndex;
96
97 --
98 -- This function drops all preferences used by the context index on XDB
99 --
100 PROCEDURE dropPreferences IS
101 BEGIN
102 -- datastore preference
103 BEGIN
104 ctx_ddl.drop_policy(policy_name => PolicyFilterName);
105 EXCEPTION WHEN OTHERS THEN NULL; END;
106 -- policy preference
107 BEGIN
108 ctx_ddl.drop_preference(DatastorePref);
109 EXCEPTION WHEN OTHERS THEN NULL; END;
110 -- filter preference
111 BEGIN
112 ctx_ddl.drop_preference(FilterPref);
113 EXCEPTION WHEN OTHERS THEN NULL; END;
114 BEGIN
115 ctx_ddl.drop_preference(AutoFilterPref);
116 EXCEPTION WHEN OTHERS THEN NULL; END;
117 -- section group
118 BEGIN
119 ctx_ddl.drop_section_group(SectionGroupPref);
120 EXCEPTION WHEN OTHERS THEN NULL; END;
121 -- lexer preferences
122 BEGIN
123 ctx_ddl.drop_preference(MultiLexerPref);
124 EXCEPTION WHEN OTHERS THEN NULL; END;
125 BEGIN
126 ctx_ddl.drop_preference(DefaultLexerPref);
127 EXCEPTION WHEN OTHERS THEN NULL; END;
128 -- wordlist preferences
129 BEGIN
130 ctx_ddl.drop_preference(WordlistPref);
131 EXCEPTION WHEN OTHERS THEN NULL; END;
132 -- stoplist preferences
133 BEGIN
134 ctx_ddl.drop_stoplist(StoplistPref);
135 EXCEPTION WHEN OTHERS THEN NULL; END;
136 -- storage preference
137 BEGIN
138 ctx_ddl.drop_preference(StoragePref);
139 EXCEPTION WHEN OTHERS THEN NULL; END;
140
141 END dropPreferences;
142
143 --
144 -- This function creates all preferences required by the cntext index on XDB
145 --
146 PROCEDURE createPreferences IS
147 LexerPref VARCHAR2(32);
148 BEGIN
149 createDatastorePref;
150 createFilterPref;
151 createLexerPref;
152 createStoplistPref;
153 createWordlistPref;
154 createStoragePref;
155 createSectiongroupPref;
156
157 IF UseMultiLexer = true THEN
158 LexerPref := MultiLexerPref;
159 ELSE
160 LexerPref := DefaultLexerPref;
161 END IF;
162
163 ctx_ddl.create_policy(policy_name =>PolicyFilterName,
164 filter =>AutoFilterPref);
165 END createPreferences;
166
167 --
168 -- This function creates the datastore preference. A USER datastore
169 -- is used.
170 --
171 PROCEDURE createDatastorePref IS
172 BEGIN
173 ctx_ddl.create_preference(DatastorePref, 'USER_DATASTORE');
174 ctx_ddl.set_attribute(DatastorePref, 'PROCEDURE', 'XDB_DATASTORE_PROC');
175 ctx_ddl.set_attribute(DatastorePref, 'OUTPUT_TYPE', 'CLOB');
176 END createDatastorePref;
177
178 --
179 -- Creates the filter preference.
180 -- A NULL filter is used - the user datastore procedure calls the IFILTER
181 -- APIs to INSO when appropriate
182 --
183 PROCEDURE createFilterPref IS
184 BEGIN
185 ctx_ddl.create_preference(FilterPref, 'NULL_FILTER');
186 ctx_ddl.create_preference(AutoFilterPref, 'AUTO_FILTER');
187 END createFilterPref;
188
189 --
190 -- Creates the section group.
191 --
192 PROCEDURE createSectiongroupPref IS
193 BEGIN
194 ctx_ddl.create_section_group(SectionGroupPref, SectionGroup);
195 END createSectiongroupPref;
196
197 --
198 -- Creates the lexer preferences
199 -- Creates a default lexer with base letter transcriptions, and
200 -- adds this as a sub-lexer of a multi-lexer
201 -- Currently, the multi-lexer is not used.
202 --
203 PROCEDURE createLexerPref IS
204 BEGIN
205 ctx_ddl.create_preference(DefaultLexerPref, 'BASIC_LEXER');
206 -- Turn on base letter translations
207 ctx_ddl.set_attribute(DefaultLexerPref, 'base_letter', 'YES');
208
209 ctx_ddl.create_preference(MultiLexerPref, 'MULTI_LEXER');
210 ctx_ddl.add_sub_lexer(MultiLexerPref, 'DEFAULT', DefaultLexerPref);
211 END createLexerPref;
212
213 --
214 -- Creates the word-list preferences
215 --
216 PROCEDURE createWordlistPref IS
217 BEGIN
218 ctx_ddl.create_preference(WordlistPref, 'BASIC_WORDLIST');
219
220 -- default values for stemming and fuzzy match
221 ctx_ddl.set_attribute(WordlistPref, 'fuzzy_match', 'AUTO');
222 ctx_ddl.set_attribute(WordlistPref, 'stemmer', 'AUTO');
223 END createWordlistPref;
224
225 --
226 -- Creates the stoplist.
227 -- Imports the stopwords from CTXSYS.DEFAULT_STOPLIST
228 -- And adds the stopwords specified in StopWords
229 -- By default, NUMBERS are treated as a stopclass
230 --
231 PROCEDURE createStoplistPref IS
232 i PLS_INTEGER;
233 wordset varcharset := varcharset();
234 BEGIN
235 --
236 -- You can also create a MULTI_STOPLIST instead of a basic stoplist
237 -- The problem then is to specify the appropriate language for the
238 -- stopwords that are part of the default stoplist, and the stopwords
239 -- that the user provides
240 -- ctx_ddl.create_stoplist(StoplistPref, 'MULTI_STOPLIST');
241 --
242 IF UseMultiLexer = true THEN
243 ctx_ddl.create_stoplist(StoplistPref, 'MULTI_STOPLIST');
244 ELSE
245 ctx_ddl.create_stoplist(StoplistPref, 'BASIC_STOPLIST');
246 END IF;
247
248 -- don't index any numbers
249 ctx_ddl.add_stopclass(StoplistPref, 'NUMBERS');
250
251 -- extend the local stopwords array
252 wordset.extend(4096);
253 SELECT spw_word BULK COLLECT INTO wordset
254 FROM ctx_stopwords
255 WHERE spw_stoplist = 'DEFAULT_STOPLIST' and
256 spw_owner = 'CTXSYS';
257
258 -- add all stopwords for the default stoplist to our stoplist
259 FOR i in 1..wordset.COUNT LOOP
260 ctx_ddl.add_stopword(StoplistPref, wordset(i));
261 END LOOP;
262
263 -- Then add our own specific stop words
264 FOR i in 1..StopWords.COUNT LOOP
265 BEGIN
266 ctx_ddl.add_stopword(StoplistPref, StopWords(i));
267 EXCEPTION WHEN OTHERS THEN NULL; END;
268 END LOOP;
269
270 END createStoplistPref;
271
272 --
273 -- Storage preferences
274 -- Determine if the $I table and the $X index require a special tablespace
275 -- or any other properties
276 -- Note that prefix and substr indexing are turned off
277 --
278 PROCEDURE createStoragePref IS
279 -- The tablespace of the tables/indexes that constitute the context index.
280 IndexTableSpace VARCHAR2(32);
281 ts_clause varchar2(100);
282 BEGIN
283
284 -- Get the tablespace of xdb$resource, that can be considered as the
285 -- default tablespace. Only one row can be returned from the query.
286 IndexTableSpace := xdb.dbms_xdb.getxdb_tablespace();
287 ts_clause := 'tablespace ' || IndexTableSpace;
288
289 ctx_ddl.create_preference(StoragePref, 'BASIC_STORAGE');
290 ctx_ddl.set_attribute(StoragePref, 'I_TABLE_CLAUSE', ts_clause);
291 ctx_ddl.set_attribute(StoragePref, 'K_TABLE_CLAUSE', ts_clause);
292 ctx_ddl.set_attribute(StoragePref, 'R_TABLE_CLAUSE',
293 ts_clause || ' lob(data) store as (cache)');
294 ctx_ddl.set_attribute(StoragePref, 'N_TABLE_CLAUSE', ts_clause);
295 ctx_ddl.set_attribute(StoragePref, 'I_INDEX_CLAUSE',
296 ts_clause || ' compress 2');
297 ctx_ddl.set_attribute(StoragePref, 'P_TABLE_CLAUSE', ts_clause);
298 END createStoragePref;
299
300 --
301 -- Determines the filtering to use for a document
302 --
303 FUNCTION getFilterOption(mimeType IN VARCHAR2) RETURN PLS_INTEGER IS
304 filterOption PLS_INTEGER := USE_INSO_FILTER;
305 i PLS_INTEGER;
306 BEGIN
307 -- We have 3 choices.
308 -- (a) Skip the data completely - this is true for resources that
309 -- contain images, audios etc
310 -- (b) No filtering required - this can be used when the data
311 -- is a text type
312 -- (c) Use INSO filter - for everything else
313
314 FOR i IN 1..SkipFilter_Types.COUNT LOOP
315 IF mimeType LIKE SkipFilter_Types(i) THEN
316 filterOption := SKIP_DATA;
317 EXIT;
318 END IF;
319 END LOOP;
320
321 FOR i in 1..NullFilter_Types.COUNT LOOP
322 IF mimeType LIKE NullFilter_Types(i) THEN
323 filterOption := USE_NULL_FILTER;
324 EXIT;
325 END IF;
326 END LOOP;
327
328 RETURN filterOption;
329 END getFilterOption;
330
331 --
332 -- Determines the internal charset-id for an external charset name
333 -- Currently hardcoded to 2 (represents UTF-8)
334 --
335 FUNCTION getCharsetId(extCSName IN VARCHAR2) RETURN NUMBER IS
336 intCSName varchar2(255);
337 BEGIN
338 intCSName := utl_gdk.charset_map(extCSName, utl_gdk.IANA_TO_ORACLE);
339 IF intCSName is null THEN
340 RETURN NLS_CHARSET_ID(extCSName);
341 ELSE
342 RETURN NLS_CHARSET_ID(intCSName);
343 END IF;
344 END getCharsetId;
345
346 PROCEDURE annotate_doclob(author IN varchar2,
347 dispname IN varchar2, -- resource's display name
348 rescomment IN varchar2, -- resource creation comment
349 owner IN varchar2, -- owner user name
350 creator IN varchar2, -- creator user name
351 lastmod IN varchar2, -- last modifier user name
352 resextra IN clob, -- resource extra properties
353 xmlref_lob IN clob, -- xmldata
354 outlob IN OUT NOCOPY clob) IS
355 begin
356 -- Write the metadata fields into output lob
357 if author is not null then
358 dbms_lob.writeappend(outlob, length(author), author);
359 dbms_lob.writeappend(outlob, 1, ' ');
360 end if;
361 if dispname is not null then
362 dbms_lob.writeappend(outlob, length(dispname), dispname);
363 dbms_lob.writeappend(outlob, 1, ' ');
364 end if;
365 if rescomment is not null then
366 dbms_lob.writeappend(outlob, length(rescomment), rescomment);
367 dbms_lob.writeappend(outlob, 1, ' ');
368 end if;
369 if owner is not null then
370 dbms_lob.writeappend(outlob, length(owner), owner);
371 dbms_lob.writeappend(outlob, 1, ' ');
372 end if;
373 if creator is not null then
374 dbms_lob.writeappend(outlob, length(creator), creator);
375 dbms_lob.writeappend(outlob, 1, ' ');
376 end if;
377 if lastmod is not null then
378 dbms_lob.writeappend(outlob, length(lastmod), lastmod);
379 dbms_lob.writeappend(outlob, 1, ' ');
380 end if;
381
382 -- check if resextra holds data
383 if resextra is not null and dbms_lob.getlength(resextra) > 0 then
384 dbms_lob.append(outlob, resextra);
385 dbms_lob.writeappend(outlob, 1, ' ');
386 end if;
387
388 -- check if xmlref/xmllob holds data
389 if xmlref_lob is not null and dbms_lob.getlength(xmlref_lob) > 0 then
390 dbms_lob.append(outlob, xmlref_lob);
391 end if;
392 END annotate_doclob;
393
394 --
395 -- The datastore procedure
396 -- This reads the data from the underlying row
397 -- Some of the metadata of the document are also indexed - notably,
398 -- the author, the creation comment of the resource, the display name etc
399 --
400 PROCEDURE xdb_datastore_proc(rid IN ROWID, outlob IN OUT NOCOPY CLOB) IS
401 author varchar2(128); -- the author
402 dispname varchar2(128); -- resource's display name
403 rescomment varchar2(128); -- resource creation comment
404 contype varchar2(80); -- mimetype of the resource
405 xmllob blob; -- blob containing inline resource
406 xmlref_lob clob; -- xmldata
407 charset varchar2(128); -- character set of the resource
408 resextra clob; -- extra properties about the resource
409 owneridr raw(16); -- owner id in its raw form
410 creatoridr raw(16); -- creator id in its raw form
411 lastmodidr raw(16); -- last modifier id in its raw form
412 owner DBMS_QUOTED_ID; -- owner user name
413 creator DBMS_QUOTED_ID; -- creator user name
414 lastmod DBMS_QUOTED_ID; -- last modifier user name
415
416 begin
417 if (dbms_lob.getlength(outlob) <> 0) then
418 raise_application_error(-20010, 'non-zero lob');
419 end if;
420
421 -- Get the columns from the resource row
422 SELECT e.xmldata.author, e.xmldata.dispname,
423 e.xmldata.rescomment, e.xmldata.contype,
424 CASE WHEN e.xmldata.xmlref IS NOT NULL
425 THEN DEREF(e.xmldata.xmlref).getclobval()
426 ELSE NULL END,
427 e.xmldata.xmllob, e.xmldata.charset,e.xmldata.ownerid,
428 e.xmldata.creatorid, e.xmldata.lastmodifierid, e.xmldata.resextra
429 INTO
430 author, dispname, rescomment, contype, xmlref_lob, xmllob,
431 charset, owneridr, creatoridr, lastmodidr, resextra
432
433 FROM xdb.xdb$resource e
434 WHERE ROWID = rid;
435
436 -- get the user names for the owner, creator and the last modifier
437 xdb.dbms_xdbz0.get_username(owneridr, owner);
438 xdb.dbms_xdbz0.get_username(creatoridr, creator);
439 xdb.dbms_xdbz0.get_username(lastmodidr, lastmod);
440
441 annotate_doclob(author,
442 dispname,
443 rescomment,
444 owner,
445 creator,
446 lastmod,
447 resextra,
448 xmlref_lob,
449 outlob);
450
451 -- If resource data is stored in the blob, filter it
452 IF xmllob IS NOT NULL AND dbms_lob.getlength(xmllob) > 0 THEN
453 DECLARE
454 filterOption PLS_INTEGER; -- what kind of filter should I use
455 BEGIN
456 filterOption := getFilterOption(conType);
457 IF (filterOption = USE_NULL_FILTER) THEN
458 DECLARE
459 amount number := dbms_lob.getlength(xmllob);
460 dest_offset number := dbms_lob.getlength(outlob) + 1;
461 src_offset number := 1;
462 blob_csid number;
463 lang_context integer := 0;
464 warning integer := 0;
465 BEGIN
466 blob_csid := getCharsetId(charset);
467 dbms_lob.convertToClob(outlob, xmllob, amount, dest_offset,
468 src_offset, blob_csid, lang_context,
469 warning);
470 END;
471 ELSIF (filterOption = USE_INSO_FILTER) THEN
472 -- BUG 8915200: IFILTER deprecated since 9i.
473 -- ctx_doc.ifilter(xmllob, outlob);
474 -- This solution was suggested by mfaisal.
475 ctx_doc.policy_filter(policy_name=>PolicyFilterName,
476 document =>xmllob,
477 restab =>outlob,
478 plaintext =>FALSE, -- yes we want HTML / XML
479 language =>BlobLanguage,
480 format =>BlobFormat,
481 charset =>BlobCharset);
482 annotate_doclob(author,
483 dispname,
484 rescomment,
485 owner,
486 creator,
487 lastmod,
488 resextra,
489 xmlref_lob,
490 outlob);
491 END IF;
492 END;
493 END IF;
494
495 END xdb_datastore_proc;
496
497 --
498 -- Sync's up the index
499 --
500 PROCEDURE syncIndex(myIndexName VARCHAR2, myIndexMemory VARCHAR2) IS
501 BEGIN
502 ctx_ddl.sync_index(myIndexName, myIndexMemory, maxtime=>SyncTimeOut);
503 END syncIndex;
504
505 --
506 -- Set time limit for Sync Index
507 PROCEDURE setSyncTimeout(timeout IN INTEGER := NULL)
508 IS
509 BEGIN
510 -- Don't care about weird / negative values !
511 SyncTimeOut := timeout;
512 END setSyncTimeOut;
513
514 --
515 -- Optimizes the index
516 --
517 PROCEDURE optimizeIndex IS
518 BEGIN
519 null;
520 -- ctx_ddl.optimize_index(IndexName, lvl, OptimizeTime, NULL, NULL);
521 END optimizeIndex;
522
523 --
524 -- This function determines if a sync is needed.
525 -- This is really applicable ONLY when the sync policy is _BY_PENDING_COUNT
526 -- This function looks at the pending queue, and determines if the
527 -- queue is larger than the predefined threshold.
528 -- NOTE: This function is only to be used by the autoSyncJob procedure
529 --
530 FUNCTION checkSync(myIndexName VARCHAR2, myMaxPendingCount NUMBER)
531 RETURN BOOLEAN IS
532 doSync BOOLEAN := false;
533 pendingCount NUMBER;
534 pos NUMBER;
535 iname VARCHAR2(128);
536 BEGIN
537 -- lrg 9312645: myIndexName may be of format schema.indexname
538 pos := InStr(myIndexName, '.');
539 iname := SubStr(myIndexName, pos+1);
540 -- Check to see if there are many documents waiting to be indexed
541 SELECT COUNT(*) INTO pendingCount
542 FROM ctx_user_pending
543 WHERE pnd_index_name = DBMS_ASSERT.SIMPLE_SQL_NAME(iname);
544
545 IF pendingCount > myMaxPendingCount THEN
546 doSync := true;
547 END IF;
548
549 -- Is the index in a reasonable state to be sync'ed
550 RETURN doSync;
551 END checkSync;
552
553 --
554 -- procedure used by dbms_jobs to automatically sync the index
555 --
556 PROCEDURE autoSyncJobByCount(myIndexName VARCHAR2, myMaxPendingCount NUMBER,
557 myIndexMemory VARCHAR2) IS
558 BEGIN
559 IF checkSync(myIndexname, myMaxPendingCount) = true THEN
560 syncIndex(myIndexName, myIndexMemory);
561 END IF;
562 END autoSyncJobByCount;
563
564 --
565 -- procedure used by dbms_jobs to automatically sync the index
566 --
567 PROCEDURE autoSyncJobByTime(myIndexName VARCHAR2,
568 myIndexmemory VARCHAR2) IS
569 BEGIN
570 syncIndex(myIndexName, myIndexMemory);
571 END autoSyncJobByTime;
572
573 --
574 -- Configures auto-sync
575 -- The system must be configured for job queues
576 --
577 PROCEDURE configureAutoSync IS
578 job number;
579 what varchar2(255);
580 quote varchar2(1) := '''';
581 i PLS_INTEGER;
582 jobInterval PLS_INTEGER;
583 jInterval varchar(20);
584 BEGIN
585 IF (AutoSyncPolicy = SYNC_BY_TIME or
586 AutoSyncPolicy = SYNC_BY_PENDING_COUNT_AND_TIME) THEN
587 what := 'xdb.dbms_xdbt.autoSyncJobByTime(' || quote ||
588 IndexName || quote || ',' ||
589 quote || IndexMemory || quote || ');' ;
590 jobInterval := SyncInterval;
591 END IF;
592 IF (AutoSyncPolicy = SYNC_BY_PENDING_COUNT or
593 AutoSyncPolicy = SYNC_BY_PENDING_COUNT_AND_TIME) THEN
594 what := 'xdb.dbms_xdbt.autoSyncJobByCount(' || quote ||
595 IndexName || quote || ',' ||
596 MaxPendingCount || ',' ||
597 quote || IndexMemory || quote || ');';
598 jobInterval := CheckPendingCountInterval;
599 END IF;
600
601 i := (jobInterval * 1000000) / 1440;
602 jInterval := 'SYSDATE+' || to_char((i*1.0) / 1000000);
603 dbms_job.submit(job, what, interval=>jInterval);
604 --dbms_output.put_line('configureAutoSync =' || jInterval);
605 END configureAutoSync;
606
607 END dbms_xdbt;