DBA Data[Home] [Help]

PACKAGE: CTXSYS.CTX_CLS

Source


1 PACKAGE ctx_cls AUTHID current_user AS
2 
3 /*------------------------------- TYPE DEFINITIONS ----------------------------*/
4 /* in-memory table for document assignment */
5 TYPE doc_rec IS RECORD (
6 	docid     number,   -- document ID to identify the document
7 	clusterid number,   -- the ID of the cluster the document is assigned to
8 	score     number    -- the similarity score between document and cluster
9 );
10 TYPE doc_tab is TABLE OF doc_rec INDEX BY BINARY_INTEGER;
11 
12 /* in-memory table for cluster information */
13 TYPE cluster_rec IS RECORD (
14 	clusterid number,         -- cluster ID to identify a cluster
15 	descript  varchar2(4000), -- a string to describe the cluster
16         label     varchar2(200),  -- a suggested label for the cluster
17         sze       number,         -- number of documents assigned to the cluster
18 	quality_score number,     -- the quality score of the cluster
19 	parent    number          -- parent cluster id. negative means no parent
20 );
21 TYPE cluster_tab IS TABLE OF cluster_rec INDEX BY BINARY_INTEGER;
22 
23 TYPE docid_tab IS TABLE OF number INDEX BY BINARY_INTEGER;
24 
25 /*------------------------------- train for ctx-rules --------------------------*/
26 /*
27    NAME
28      train - automatically generate ctx-rules from training examples
29 
30    DESCRIPTION
31      This procedure will generate the ctx-rules for a given set of training
32      examples. The training examples are contained in the following two tables
33 
34 	table1:  doctab must have the following columns:
35 		 docid		number primary key
36 		 text		doc. column which can be indexed by context index
37         table2:  category table must have the following columns:
38 		 docid		CONSTRAINT fk_id REFERENCES doctab(id)
39 		 category_id	number
40 
41 	the foreign key in category table is recommended by not required.
42 
43      The rules will be written to the result table specified.
44      The query table must have the following columns:
45 
46        category_id		number         (the category_id)
47        query	              	varchar2(4000) (the rule)
48        confidence               number         (the confidence level
49 						(percentage) that a document
50 						is relevant if this rule is
51 						satisfied )
52 
53      The names of table and column are not necessary the same as above.
54 
55    ARGUMENTS
56      index_name               - the name of the text index
57      docid                    - the name of docid column in document table
58      cattab		      - the name of category table
59      catdocid		      - the name of docid column in categroy table
60      catid		      - the name of category ID column in category table
61      restab		      - the name of result table
62      rescatid                 - the name of category ID column in result table
63      resquery                 - the name of query column in result table
64      resconfid                - the name of confidence column in result table
65      pref_name                - the name of preference
66 */
67 
68 PROCEDURE train (
69   index_name    in varchar2,
70   docid	 	in varchar2,
71   cattab        in varchar2,
72   catdocid      in varchar2,
73   catid         in varchar2,
74   restab        in varchar2,
75   rescatid      in varchar2,
76   resquery      in varchar2,
77   resconfid     in varchar2,
78   pref_name     in varchar2 DEFAULT NULL
79 );
80 PRAGMA SUPPLEMENTAL_LOG_DATA(train, AUTO);
81 
82 /*------------------------------- generic train API ---------------------------*/
83 /*
84    NAME
85      train - automatically generate predicative model from examples for
86              classification
87 
88    DESCRIPTION
89      This procedure will generate the predicative model from a given set of
90      training examples. The training examples are contained in the following
91      two tables
92 
93 	table1:  doctab must have the following columns:
94 		 docid	  	number primary key
95 		 text	 	doc. column which can be indexed by context index
96         table2:  category table must have the following columns:
97 		 docid		number
98 		 category_id	number
99 
100      The names of table and column are not necessary the same as above.
101 
102      The predicative model (classifier) will be written to the result tables.
103      The result table is either created by users before calling this function or
104      created in this program with the specified table name and under the current
105      user (if the specified table does not exist).
106      If user create the result table (which can support table schema for
107      different users), the table should have the following three columns with the
108      exact column names:
109 		cat_id number
110 		type number(3) not null
111 		rule clob
112 
113    ARGUMENTS
114      index_name               - the name of the text index
115      docid                    - the name of docid column in document table
116      cattab		      - the name of category table
117      catdocid		      - the name of docid column in categroy table
118      catid		      - the name of category ID column in category table
119      restab                   - the name of generated result table
120      pref_name                - the name of preference
121 */
122 PROCEDURE train (
123   index_name    	in varchar2,
124   docid	 		in varchar2,
125   cattab        	in varchar2,
126   catdocid      	in varchar2,
127   catid         	in varchar2,
128   restab 	    	in varchar2,
129   pref_name     	in varchar2
130 );
131 PRAGMA SUPPLEMENTAL_LOG_DATA(train, AUTO);
132 
133 
134 /* ---------- clustering API for permanent table result------------------------*/
135 /*
136   NAME
137     clustering - clustering a collection
138 
139   DESCRIPTION
140     This procedure will generate a set of sub-group (clusters) from a provided
141     collection of documents. The collection is given by a table which having a
142     context index built with or without population. The collection table at least
143     has the following two collums, whose name may not be exactly the same.
144 	 docid	  	number primary key
145 	 text	 	doc. column which can be indexed by context index
146 
147     The output of clustering is represented by two tables:
148 
149 	table 1: document membership table having the following collums with the
150 		 exact same names
151 	 docid     number -- document ID to identify a document
152 	 clusterid number -- the ID of the cluster the document is assigned to
153 	 score     number -- the similarity score between document and cluster
154 
155 	table 2: cluster description table having the following collums with the
156 		 exact same names
157 	 clusterid number         -- cluster ID to identify a cluster
158 	 descript  varchar2(4000) -- a string to describe the cluster
159          label     varchar2(200)  -- a suggested label for the cluster
160          size      number         -- number of documents assigned to the cluster
161 	 quality_score number     -- the quality score of the cluster
162 	 parent    number         -- parent cluster id. negative means no parent
163 
164      The output tables can either be created by users before calling this
165      function or created in this program with the specified table name and
166      under the current user (if the specified table does not exist).
167 
168    ARGUMENTS:
169      index_name               - the name of the text index
170      docid                    - the name of docid column in document table
171      doctab_name              - the name of document membership table
172      clstab_name              - the name of cluster description table
173      pref_name                - the name of the preference
174 */
175 PROCEDURE clustering (
176   index_name            in varchar2,
177   docid                 in varchar2,
178   doctab_name           in varchar2,
179   clstab_name           in varchar2,
180   pref_name             in varchar2 DEFAULT NULL
181 );
182 PRAGMA SUPPLEMENTAL_LOG_DATA(clustering, AUTO);
183 
184 /* ---------- clustering API for in-memory table result------------------------*/
185 /*
186   NAME
187     clustering - clustering a collection
188 
189   DESCRIPTION
190     This procedure will generate a set of sub-group (clusters) from a provided
191     collection of documents. The collection is given by a table which having a
192     context index built with or without population. The collection table at least
193     has the following two collums, whose name may not be exactly the same.
194 	 docid	  	number primary key
195 	 text	 	doc. column which can be indexed by context index
196 
197     The output of clustering is represented by two in-memory tables:
198 
199 	table 1: document membership table ctx_cls.doc_tab
200 	table 2: cluster description table ctx_cls.cluster_tab
201 
202    ARGUMENTS:
203      index_name               - the name of the text index
204      docid                    - the name of docid column in document table
205      dids                     - docid list to be clustered
206      doctab_name              - the name of document membership table
207      clstab_name              - the name of cluster description table
208      pref_name                - the name of the preference
209 */
210 PROCEDURE clustering (
211   index_name            in varchar2,
212   docid                 in varchar2,
213   dids                  in docid_tab,
214   doctab_name           in out nocopy doc_tab,
215   clstab_name           in out nocopy cluster_tab,
216   pref_name             in varchar2 DEFAULT NULL
217 );
218 --PRAGMA SUPPLEMENTAL_LOG_DATA(clustering, AUTO);
219 
220 END ctx_cls;