1 PACKAGE dbms_xdbt
2 IS
3
4 ------------
5 -- OVERVIEW
6 --
7 -- This package provides utilities for creating and managing conText
8 -- indexes on the XDB repository.
9 --
10 -- The preferred mode of operation is as follows
11 -- (a) drop any existing preferences. dbms_xdbt.dropPreferences
12 -- (b) re-create preferences for the index
13 -- (dbms_xdbt.createPreferences)
14 -- (c) Create the index
15 -- (dbms_xdbt.createIndex)
16 -- Verify that things have gone smoothly using
17 -- "select * from ctx_user_index_errors"
18 -- (d) Setup automatic sync'ing of the index
19 -- (dbms_xdbt.configureAutoSync)
20 -- (e) Sit back and relax
21 --
22 -- The package spec contains a a list of package variables that
23 -- describe the configuration settings. These are intended to
24 -- cover some of the more basic customizations that installations
25 -- might require, but is not intended to be a complete set.
26 --
27 -- There are 2 ways to customize this package.
28 -- (a) Use a PL/SQL procedure to set the appropriate package variables
29 -- that control the relevant configurations, and then execute
30 -- the package. Obviously, this only applies to the set of existing
31 -- package variables
32 -- (b) The more general approach is to modify (in place, or as a copy)
33 -- this package to introduce the appropriate customizations
34 --
35 -- For instance, if you need to change the amount of memory available
36 -- for indexing, you could use option (a).
37 --
38 -- NOTES:
39 -- If you're using this package as is, please note the following
40 -- (a) Make sure that the LOG_DIRECTORY parameter is set using
41 -- ctx_adm.set_parameter
42 -- Alternately, turn off rowid logging by setting the
43 -- 'LogFile' package variable to NULL.
44 -- (b) Make sure that the MAX_INDEX_MEMORY parameter is at least
45 -- 128M. Other change the package variable 'IndexMemory'
46 -- appropriately
47 --
48 ------------
49
50 TYPE varcharset IS TABLE OF VARCHAR2(100);
51
52 -------------------------------------------------------------
53 -- CONSTANTS
54 -------------------------------------------------------------
55
56 ----------------------
57 -- FILTERING OPTIONS
58 --
59 -- The following constants describe the kinds of filtering we may want
60 -- to do.
61 -- USE_NULL_FILTER simply sends the document over to the charset converter
62 -- USE_INSO_FILTER uses the IFILTER api of INSO to convert the document
63 -- into HTML
64 -- SKIP_DATA is used to completely ignore the document's contents for
65 -- filtering. (The document metadata is indexed, however)
66 ----------------------
67
68 USE_NULL_FILTER CONSTANT PLS_INTEGER := 1;
69 USE_INSO_FILTER CONSTANT PLS_INTEGER := 2;
70 SKIP_DATA CONSTANT PLS_INTEGER := 3;
71
72 ----------------------
73 -- Sync options
74 --
75 -- There are basically two mechanisms of automatic sync provided here.
76 -- SYNC_BY_PENDING_COUNT indicates that when the number of entries in the
77 -- pending queue reaches a threshold, it is time to sync up the index
78 -- SYNC_BY_TIME indicates that the index should be synced up at regular
79 -- intervals
80 -- SYNC_BY_PENDING_COUNT_AND_TIME is a combination of both these strategies
81 --
82 ----------------------
83 SYNC_BY_PENDING_COUNT CONSTANT PLS_INTEGER := 1;
84 SYNC_BY_TIME CONSTANT PLS_INTEGER := 2;
85 SYNC_BY_PENDING_COUNT_AND_TIME CONSTANT PLS_INTEGER := 3;
86 SyncTimeOut NUMBER := NULL;
87
88 -----------------------------------------------------------------------
89 -- CONFIGURATION SETTINGS
90 --
91 -- This section contains the default settings for the index. This
92 -- section should be changed as appropriate.
93 --
94 -----------------------------------------------------------------------
95 -- The name of the context index
96 IndexName CONSTANT VARCHAR2(32) := 'XDB.XDB$CI';
97
98 -- The default memory to be used for index creation and sync
99 -- NOTE: This must be less than (or equal to) the MAX_INDEX_MEMORY
100 -- parameter
101 IndexMemory CONSTANT VARCHAR2(32) := '50M';
102
103
104 -------------------------------------------------------------
105 -- SYNC OPTIONS
106 -------------------------------------------------------------
107 -- The following section describes the automatic sync policy.
108 -- By default, the auto-sync policy (once it has been configured)
109 -- is to sync based on the pending count.
110
111 -- Should we sync up based on pending count, or time or both ?
112 AutoSyncPolicy PLS_INTEGER := SYNC_BY_PENDING_COUNT;
113
114 -- This parameter determines the maximum size of the pending queue
115 -- before the index is sync-ed. Applies only when the sync policy
116 -- is SYNC_BY_PENDING_COUNT or SYNC_BY_TIME_AND_PENDING_COUNT
117 --
118 MaxPendingCount PLS_INTEGER := 2;
119
120 -- This parameter determines the interval - in minutes - at
121 -- which the "regular" sync should be performed on the index.
122 -- Applies only to the SYNC_BY_TIME and the SYNC_BY_PENDING_COUNT_AND_TIME
123 -- policies
124 SyncInterval PLS_INTEGER := 60;
125
126 --
127 -- This parameter determines how frequently - in minutes - the pending
128 -- queue is polled. Applies only to the SYNC_BY_PENDING_COUNT
129 -- (and the SYNC_BY_PENDING_COUNT_AND_TIME) policies
130 --
131 CheckPendingCountInterval PLS_INTEGER := 10;
132
133 ----------------------------------------------------
134 -- LOGGING OPTIONS
135 --
136 -- Please ensure that the LOG_DIRECTORY parameter is
137 -- already set if you need rowid logging
138 ----------------------------------------------------
139 --
140 -- Logging options. This parameter determines the logfile used - for
141 -- rowid logging during index creation etc.
142 -- Set this parameter to NULL to avoid rowid-logging
143 -- bug 16733810: by default, logging should be disallowed;
144 -- however, in createIndex, empty-string LogFile does not turn off
145 -- the logging. We should set the default value to NULL.
146 -- A user has to set the right logFile variable and LOG_DIRECTORY
147 -- parameter to enable log.
148 --
149 LogFile VARCHAR2(32) := NULL;
150
151 -------------------------------------
152 -- FILTER OPTIONS
153 --
154 -- The following classes determine the filtering options based on the
155 -- mime type of the document
156 --
157 -- The skipFilter_Types list contains a list of regular expressions
158 -- that describe mime types for which the document is *not* to be
159 -- filtered/indexed. (The document metadata, however, is still indexed)
160 -- Use this for document types that cannot really be indexed. Good
161 -- examples of this class are images, audio files etc
162 --
163 -- The NullFilter_Types list contains a list of regular expressions
164 -- that describe mime types of documents for which no INSO filtering
165 -- is required - these documents should be basically text formats
166 -- and the only filtering required should be character set conversion
167 -- (if needed)
168 --
169 -- For any given document, the skipFilter_Types list is first scanned
170 -- to determine if any regular expression in that list matches the
171 -- document's content type. If it does, then the document content is
172 -- not indexed.
173 -- Failing this, the NullFilter_Types list is then scanned. If any
174 -- regular expression in this list matches the document's content type,
175 -- the document is sent through character-set conversion
176 -- failing this, the document is filtered using the INSO filter (using the
177 -- IFILTER interfaces)
178 -------------------------------------
179 SkipFilter_Types varcharset := varcharset('image/%', 'audio/%', 'video/%',
180 'model/%');
181 NullFilter_Types varcharset := varcharset('%text/plain', '%text/html',
182 '%text/xml');
183
184 -------------------------------------
185 -- STOPWORD Settings
186 --
187 -- This list describes the set of stopwords over and above that
188 -- specified by the CTXSYS.DEFAULT_STOPLIST stoplist
189 --
190 -------------------------------------
191 StopWords varcharset := varcharset('0','1','2','3','4','5','6',
192 '7','8','9',
193 'a','b','c','d','e','f','g','h','i',
194 'j','k','l','m','n','o','p','q','r',
195 's','t','u','v','w','x','y','z',
196 'A','B','C','D','E','F','G','H','I',
197 'J','K','L','M','N','O','P','Q','R',
198 'S','T','U','V','W','X','Y','Z'
199 );
200
201 --------------------------------------
202 -- LEXER preferences
203 --
204 -- This parameter determines if multi-language lexers can be
205 -- used.
206 -- Not supported currently, and this parameter should always be FALSE
207 --------------------------------------
208 UseMultiLexer BOOLEAN := false;
209
210 --------------------------------------
211 -- SECTION GROUP
212 --
213 -- This parameter determines the sectioner to use.
214 -- By default, this is an HTML section group. No zone sections have been
215 -- created - (ie) WITHIN searches are not possible
216 -- If the vast majority of documents are XML or XML-like, consider using
217 -- the AUTO_SECTION_GROUP or the PATH_SECTION_GROUP or even a
218 -- NULL_SECTION_GROUP
219 --------------------------------------
220 SectionGroup VARCHAR2(100) := 'HTML_SECTION_GROUP';
221
222 --------------------------------------
223 -- PUBLIC INTERFACES
224 --
225 -- The public APIs exposed by this package
226 --------------------------------------
227
228 --
229 -- This procedure drops all preferences required by the context index
230 --
231 PROCEDURE dropPreferences;
232
233 --
234 -- This procedure creates all preferences required by the context index
235 -- on the XDB repository.
236 -- The set of preferences include Datastore, Storage, Filter, Lexer,
237 -- SectionGroup, Stoplist and Wordlist preferences
238 -- NOTE: This will raise exceptions if any of the preferences already
239 -- exist
240 --
241 PROCEDURE createPreferences;
242
243 --
244 -- Creates the datastore preference
245 -- Will raise an exception if the datastore already exists
246 PROCEDURE createDatastorePref;
247
248 --
249 -- Creates the storage preferences
250 -- Will raise an exception if the preference already exists
251 --
252 PROCEDURE createStoragePref;
253
254 --
255 -- Creates the section group
256 -- Will raise an exception if the preference already exists
257 --
258 PROCEDURE createSectiongroupPref;
259
260 --
261 -- Creates the filter preference
262 -- Will raise an exception if the preference already exists
263 --
264 PROCEDURE createFilterPref;
265
266 --
267 -- Creates the lexer preference
268 -- Will raise an exception if the preference already exists
269 --
270 PROCEDURE createLexerPref;
271
272 --
273 -- Creates the stoplist
274 -- Will raise an exception if the preference already exists
275 --
276 PROCEDURE createStoplistPref;
277
278 --
279 -- Creates the wordlist
280 -- Will raise an exception if the preference already exists
281 --
282 PROCEDURE createWordlistPref;
283
284 --
285 -- Creates the index
286 -- This requires the above preferences to have already been created.
287 -- (a) The LOG_DIRECTORY parameter must be set (to enable
288 -- rowid logging during index creation)
289 -- (b) Ensure that the memory size specified to index creation is less than
290 -- the MAX_INDEX_MEMORY parameter
291 --
292 PROCEDURE createIndex;
293
294 --
295 -- Syncs up the index
296 -- This can be used to explicitly sync up the index.
297 -- The preferred mechanism is to set up automatic sync'ing with
298 -- the "configureAutoSync" procedure
299 --
300 PROCEDURE syncIndex(myIndexName VARCHAR2 := Indexname,
301 myIndexMemory VARCHAR2 := IndexMemory);
302
303 --
304 -- Set a suggested time limit on the SYNC operation, in minutes.
305 -- SYNC_INDEX will process as many documents in the queue as possible
306 -- within the time limit.
307 -- The maxtime value of NULL is equivalent to CTX_DDL.MAXTIME_UNLIMITED.
308 --
309 PROCEDURE setSyncTimeout(timeout IN INTEGER := NULL);
310
311 --
312 -- Optimizes the index
313 --
314 PROCEDURE optimizeIndex;
315
316 --
317 -- Configures for automatic sync of the index
318 -- NOTE: The system must be configured for job queues. Also, the
319 -- number of job queue processes must be non-zero
320 --
321 PROCEDURE configureAutoSync;
322
323 --
324 -- Procedure used by dbms_job to automatically sync up the context
325 -- index
326 -- Don't use this directly
327 --
328 PROCEDURE autoSyncJobByCount(myIndexName VARCHAR2, myMaxPendingCount NUMBER,
329 myIndexMemory VARCHAR2);
330 --
331 -- Procedure used by dbms_job to automatically sync up the context
332 -- index
333 -- Don't use this directly
334 --
335 PROCEDURE autoSyncJobByTime(myIndexName VARCHAR2, myIndexMemory VARCHAR2);
336
337 --
338 -- The user-datastore procedure
339 -- Do *not* call this directly
340 --
341 PROCEDURE xdb_datastore_proc(rid IN ROWID, outlob IN OUT NOCOPY CLOB);
342
343 end dbms_xdbt;