DBA Data[Home] [Help]

PACKAGE: XDB.DBMS_XDBT

Source


1 PACKAGE     dbms_xdbt
2 IS
3 
4   ------------
5   --  OVERVIEW
6   --
7   --    This package provides utilities for creating and managing conText
8   --    indexes on the XDB repository.
9   --
10   --    The preferred mode of operation is as follows
11   --    (a) drop any existing preferences. dbms_xdbt.dropPreferences
12   --    (b) re-create preferences for the index
13   --        (dbms_xdbt.createPreferences)
14   --    (c) Create the index
15   --        (dbms_xdbt.createIndex)
16   --        Verify that things have gone smoothly using
17   --          "select * from ctx_user_index_errors"
18   --    (d) Setup automatic sync'ing of the index
19   --        (dbms_xdbt.configureAutoSync)
20   --    (e) Sit back and relax
21   --
22   --    The package spec contains a a list of package variables that
23   --    describe the configuration settings. These are intended to
24   --    cover some of the more basic customizations that installations
25   --    might require, but is not intended to be a complete set.
26   --
27   --    There are 2 ways to customize this package.
28   --    (a) Use a PL/SQL procedure to set the appropriate package variables
29   --        that control the relevant configurations, and then execute
30   --        the package. Obviously, this only applies to the set of existing
31   --        package variables
32   --    (b) The more general approach is to modify (in place, or as a copy)
33   --        this package to introduce the appropriate customizations
34   --
35   --    For instance, if you need to change the amount of memory available
36   --    for indexing, you could use option (a).
37   --
38   --    NOTES:
39   --      If you're using this package as is, please note the following
40   --    (a) Make sure that the LOG_DIRECTORY parameter is set using
41   --        ctx_adm.set_parameter
42   --        Alternately, turn off rowid logging by setting the
43   --        'LogFile' package variable to NULL.
44   --    (b) Make sure that the MAX_INDEX_MEMORY parameter is at least
45   --        128M. Other change the package variable 'IndexMemory'
46   --        appropriately
47   --
48   ------------
49 
50 TYPE varcharset IS TABLE OF VARCHAR2(100);
51 
52 -------------------------------------------------------------
53 -- CONSTANTS
54 -------------------------------------------------------------
55 
56 ----------------------
57 -- FILTERING OPTIONS
58 --
59 -- The following constants describe the kinds of filtering we may want
60 -- to do.
61 -- USE_NULL_FILTER simply sends the document over to the charset converter
62 -- USE_INSO_FILTER uses the IFILTER api of INSO to convert the document
63 --    into HTML
64 -- SKIP_DATA is used to completely ignore the document's contents for
65 -- filtering. (The document metadata is indexed, however)
66 ----------------------
67 
68 USE_NULL_FILTER            CONSTANT PLS_INTEGER := 1;
69 USE_INSO_FILTER            CONSTANT PLS_INTEGER := 2;
70 SKIP_DATA                  CONSTANT PLS_INTEGER := 3;
71 
72 ----------------------
73 -- Sync options
74 --
75 -- There are basically two mechanisms of automatic sync provided here.
76 -- SYNC_BY_PENDING_COUNT indicates that when the number of entries in the
77 --   pending queue reaches a threshold, it is time to sync up the index
78 -- SYNC_BY_TIME indicates that the index should be synced up at regular
79 --   intervals
80 -- SYNC_BY_PENDING_COUNT_AND_TIME is a combination of both these strategies
81 --
82 ----------------------
83 SYNC_BY_PENDING_COUNT      CONSTANT PLS_INTEGER := 1;
84 SYNC_BY_TIME               CONSTANT PLS_INTEGER := 2;
85 SYNC_BY_PENDING_COUNT_AND_TIME CONSTANT PLS_INTEGER := 3;
86 SyncTimeOut                NUMBER := NULL;
87 
88 -----------------------------------------------------------------------
89 -- CONFIGURATION SETTINGS
90 --
91 -- This section contains the default settings for the index. This
92 -- section should be changed as appropriate.
93 --
94 -----------------------------------------------------------------------
95 -- The name of the context index
96 IndexName         CONSTANT VARCHAR2(32) := 'XDB.XDB$CI';
97 
98 -- The default memory to be used for index creation and sync
99 -- NOTE: This must be less than (or equal to) the MAX_INDEX_MEMORY
100 --       parameter
101 IndexMemory       CONSTANT VARCHAR2(32) := '50M';
102 
103 
104 -------------------------------------------------------------
105 -- SYNC OPTIONS
106 -------------------------------------------------------------
107 -- The following section describes the automatic sync policy.
108 -- By default, the auto-sync policy (once it has been configured)
109 -- is to sync based on the pending count.
110 
111 -- Should we sync up based on pending count, or time or both ?
112 AutoSyncPolicy             PLS_INTEGER := SYNC_BY_PENDING_COUNT;
113 
114 -- This parameter determines the maximum size of the pending queue
115 --   before the index is sync-ed. Applies only when the sync policy
116 --   is SYNC_BY_PENDING_COUNT or SYNC_BY_TIME_AND_PENDING_COUNT
117 --
118 MaxPendingCount            PLS_INTEGER := 2;
119 
120 -- This parameter determines the interval - in minutes - at
121 -- which the "regular" sync should be performed on the index.
122 -- Applies only to the SYNC_BY_TIME and the SYNC_BY_PENDING_COUNT_AND_TIME
123 -- policies
124 SyncInterval               PLS_INTEGER := 60;
125 
126 --
127 -- This parameter determines how frequently - in minutes - the pending
128 -- queue is polled. Applies only to the SYNC_BY_PENDING_COUNT
129 -- (and the SYNC_BY_PENDING_COUNT_AND_TIME) policies
130 --
131 CheckPendingCountInterval  PLS_INTEGER := 10;
132 
133 ----------------------------------------------------
134 -- LOGGING OPTIONS
135 --
136 -- Please ensure that the LOG_DIRECTORY parameter is
137 -- already set if you need rowid logging
138 ----------------------------------------------------
139 --
140 -- Logging options. This parameter determines the logfile used - for
141 -- rowid logging during index creation etc.
142 -- Set this parameter to NULL to avoid rowid-logging
143 -- bug 16733810: by default, logging should be disallowed;
144 --   however, in createIndex, empty-string LogFile does not turn off
145 --   the logging. We should set the default value to NULL.
146 --   A user has to set the right logFile variable and LOG_DIRECTORY
147 --   parameter to enable log.
148 --
149 LogFile                    VARCHAR2(32) := NULL;
150 
151 -------------------------------------
152 -- FILTER OPTIONS
153 --
154 -- The following classes determine the filtering options based on the
155 -- mime type of the document
156 --
157 -- The skipFilter_Types list contains a list of regular expressions
158 -- that describe mime types for which the document is *not* to be
159 -- filtered/indexed. (The document metadata, however, is still indexed)
160 -- Use this for document types that cannot really be indexed. Good
161 -- examples of this class are images, audio files etc
162 --
163 -- The NullFilter_Types list contains a list of regular expressions
164 -- that describe mime types of documents for which no INSO filtering
165 -- is required - these documents should be basically text formats
166 -- and the only filtering required should be character set conversion
167 -- (if needed)
168 --
169 -- For any given document, the skipFilter_Types list is first scanned
170 -- to determine if any regular expression in that list matches the
171 -- document's content type. If it does, then the document content is
172 -- not indexed.
173 -- Failing this, the NullFilter_Types list is then scanned. If any
174 -- regular expression in this list matches the document's content type,
175 -- the document is sent through character-set conversion
176 -- failing this, the document is filtered using the INSO filter (using the
177 -- IFILTER interfaces)
178 -------------------------------------
179 SkipFilter_Types varcharset := varcharset('image/%', 'audio/%', 'video/%',
180                                           'model/%');
181 NullFilter_Types varcharset := varcharset('%text/plain', '%text/html',
182                                           '%text/xml');
183 
184 -------------------------------------
185 -- STOPWORD Settings
186 --
187 -- This list describes the set of stopwords over and above that
188 -- specified by the CTXSYS.DEFAULT_STOPLIST stoplist
189 --
190 -------------------------------------
191 StopWords        varcharset := varcharset('0','1','2','3','4','5','6',
192                                           '7','8','9',
193                                           'a','b','c','d','e','f','g','h','i',
194                                           'j','k','l','m','n','o','p','q','r',
195                                           's','t','u','v','w','x','y','z',
196                                           'A','B','C','D','E','F','G','H','I',
197                                           'J','K','L','M','N','O','P','Q','R',
198                                           'S','T','U','V','W','X','Y','Z'
199                                          );
200 
201 --------------------------------------
202 -- LEXER preferences
203 --
204 -- This parameter determines if multi-language lexers can be
205 -- used.
206 -- Not supported currently, and this parameter should always be FALSE
207 --------------------------------------
208 UseMultiLexer   BOOLEAN := false;
209 
210 --------------------------------------
211 -- SECTION GROUP
212 --
213 -- This parameter determines the sectioner to use.
214 -- By default, this is an HTML section group. No zone sections have been
215 -- created - (ie) WITHIN searches are not possible
216 -- If the vast majority of documents are XML or XML-like, consider using
217 -- the AUTO_SECTION_GROUP or the PATH_SECTION_GROUP or even a
218 -- NULL_SECTION_GROUP
219 --------------------------------------
220 SectionGroup    VARCHAR2(100) := 'HTML_SECTION_GROUP';
221 
222 --------------------------------------
223 -- PUBLIC INTERFACES
224 --
225 -- The public APIs exposed by this package
226 --------------------------------------
227 
228   --
229   -- This procedure drops all preferences required by the context index
230   --
231   PROCEDURE dropPreferences;
232 
233   --
234   -- This procedure creates all preferences required by the context index
235   -- on the XDB repository.
236   -- The set of preferences include Datastore, Storage, Filter, Lexer,
237   -- SectionGroup, Stoplist and Wordlist preferences
238   -- NOTE: This will raise exceptions if any of the preferences already
239   --       exist
240   --
241   PROCEDURE createPreferences;
242 
243   --
244   -- Creates the datastore preference
245   -- Will raise an exception if the datastore already exists
246   PROCEDURE createDatastorePref;
247 
248   --
249   -- Creates the storage preferences
250   -- Will raise an exception if the preference already exists
251   --
252   PROCEDURE createStoragePref;
253 
254   --
255   -- Creates the section group
256   -- Will raise an exception if the preference already exists
257   --
258   PROCEDURE createSectiongroupPref;
259 
260   --
261   -- Creates the filter preference
262   -- Will raise an exception if the preference already exists
263   --
264   PROCEDURE createFilterPref;
265 
266   --
267   -- Creates the lexer preference
268   -- Will raise an exception if the preference already exists
269   --
270   PROCEDURE createLexerPref;
271 
272   --
273   -- Creates the stoplist
274   -- Will raise an exception if the preference already exists
275   --
276   PROCEDURE createStoplistPref;
277 
278   --
279   -- Creates the wordlist
280   -- Will raise an exception if the preference already exists
281   --
282   PROCEDURE createWordlistPref;
283 
284   --
285   -- Creates the index
286   -- This requires the above preferences to have already been created.
287   -- (a) The LOG_DIRECTORY parameter must be set (to enable
288   --     rowid logging during index creation)
289   -- (b) Ensure that the memory size specified to index creation is less than
290   --     the MAX_INDEX_MEMORY parameter
291   --
292   PROCEDURE createIndex;
293 
294   --
295   -- Syncs up the index
296   -- This can be used to explicitly sync up the index.
297   -- The preferred mechanism is to set up automatic sync'ing with
298   -- the "configureAutoSync" procedure
299   --
300   PROCEDURE syncIndex(myIndexName VARCHAR2 := Indexname,
301                       myIndexMemory VARCHAR2 := IndexMemory);
302 
303   --
304   -- Set a suggested time limit on the SYNC operation, in minutes.
305   -- SYNC_INDEX will process as many documents in the queue as possible
306   -- within the time limit.
307   -- The maxtime value of NULL is equivalent to CTX_DDL.MAXTIME_UNLIMITED.
308   --
309   PROCEDURE setSyncTimeout(timeout IN INTEGER := NULL);
310 
311   --
312   -- Optimizes the index
313   --
314   PROCEDURE optimizeIndex;
315 
316   --
317   -- Configures for automatic sync of the index
318   -- NOTE: The system must be configured for job queues. Also, the
319   --   number of job queue processes must be non-zero
320   --
321   PROCEDURE configureAutoSync;
322 
323   --
324   -- Procedure used by dbms_job to automatically sync up the context
325   -- index
326   -- Don't use this directly
327   --
328   PROCEDURE autoSyncJobByCount(myIndexName VARCHAR2, myMaxPendingCount NUMBER,
329                                myIndexMemory VARCHAR2);
330   --
331   -- Procedure used by dbms_job to automatically sync up the context
332   -- index
333   -- Don't use this directly
334   --
335   PROCEDURE autoSyncJobByTime(myIndexName VARCHAR2, myIndexMemory VARCHAR2);
336 
337   --
338   -- The user-datastore procedure
339   -- Do *not* call this directly
340   --
341   PROCEDURE xdb_datastore_proc(rid IN ROWID, outlob IN OUT NOCOPY CLOB);
342 
343 end dbms_xdbt;