[Home] [Help]
PACKAGE: SYS.DBMS_DATA_MINING
Source
1 PACKAGE dbms_data_mining AUTHID CURRENT_USER AS
2
3 ------------
4 -- OVERVIEW
5 --
6 -- This package provides general purpose routines for Data Mining
7 -- operations viz.
8 -- . CREATE a MODEL against build data.
9 -- . DROP an existing MODEL.
10 -- . RENAME an existing MODEL.
11 -- . COMPUTE various metrics to test a model against the APPLY
12 -- results on test data, with cost inputs
13 -- . APPLY a model to (production) mining data
14 -- . RANK the APPLY results based on cost and other factors
15 -- . GET the MODEL SIGNATURE - i.e. retrieve the attributes
16 -- that constitute the model and their relevant characteristics.
17 -- . GET the MODEL DETAILS - i.e. retrieve the contents of
18 -- the model - the specific patterns and rules that were used
19 -- in making the prediction (in the case of predictive models),
20 -- and/or the declarative rules (in the case of declarative models).
21 --
22
23 ------------------------
24 -- RULES AND LIMITATIONS
25 --
26 -- The following rules apply in the specification of functions and
27 -- procedures in this package.
28 --
29 -- A function/procedure will raise an INVALID_ARGVAL exception if the
30 -- the following restrictions are not followed in specifying values
31 -- for parameters (unless otherwise specified):
32 --
33 -- 1. Every BUILD operation MUST have the mining function
34 -- name specified at the minimum.
35 -- 2. All schema object names, except models, should be maximum
36 -- 30 bytes in size.
37 -- 3. All model names should be maximum 25 bytes in size.
38 -- 4. The SETTINGS discussed below under CONSTANTS represent the name
39 -- tags and values that act as column values in a user-created
40 -- Settings Table, with a fixed schema and column types:
41 --
42 -- SETTING_NAME SETTING_VALUE
43 -- varchar2(30) varchar2(30)
44 --
45 -- 5. For numerical settings, use TO_CHAR() to store them in the
46 -- SETTING_VALUE column - the API will interpret the values.
47 --
48 --
49
50 -----------
51 -- SECURITY
52 --
53 -- Privileges are associated with the the caller of the procedures/
54 -- functions in this package as follows:
55 -- If the caller is an anonymous PL/SQL block, the procedures/functions
56 -- are run with the privilege of the current user.
57 -- If the caller is a stored procedure, the procedures/functions are run
58 -- using the privileges of the owner of the stored procedure.
59 --
60
61 ------------
62 -- CONSTANTS
63 --
64 -- General Settings - Begin ------------------------------------------------
65
66 -- Data Prep: Setting Names
67 prep_auto CONSTANT VARCHAR2(30) := 'PREP_AUTO';
68
69 -- Data Prep: Setting Values for prep_auto
70 prep_auto_off CONSTANT VARCHAR2(30) := 'OFF';
71 prep_auto_on CONSTANT VARCHAR2(30) := 'ON';
72
73 -- Score Criterion Type: Setting Values for score_criterion_type
74 score_criterion_probability CONSTANT VARCHAR2(30) := 'PROBABILITY';
75 score_criterion_cost CONSTANT VARCHAR2(30) := 'COST';
76
77 -- Row Weights - Setting Name
78 odms_row_weight_column_name CONSTANT VARCHAR2(30) :=
79 'ODMS_ROW_WEIGHT_COLUMN_NAME';
80
81 -- Cost Matrix
82 cost_matrix_type_score CONSTANT VARCHAR2(30) := 'SCORE';
83 cost_matrix_type_create CONSTANT VARCHAR2(30) := 'CREATE';
84
85 -- Missing Value Treatment - Setting Name
86 odms_missing_value_treatment CONSTANT VARCHAR2(30) :=
87 'ODMS_MISSING_VALUE_TREATMENT';
88
89 -- Missing Value Treatment: Setting Values for ODMS_MISSING_VALUE_TREATMENT
90 odms_missing_value_mean_mode CONSTANT VARCHAR2(30) :=
91 'ODMS_MISSING_VALUE_MEAN_MODE';
92 odms_missing_value_delete_row CONSTANT VARCHAR2(30) :=
93 'ODMS_MISSING_VALUE_DELETE_ROW';
94
95 -- Transactional training data format: Setting Names
96 odms_item_id_column_name CONSTANT VARCHAR2(30) :=
97 'ODMS_ITEM_ID_COLUMN_NAME';
98 odms_item_value_column_name CONSTANT VARCHAR2(30) :=
99 'ODMS_ITEM_VALUE_COLUMN_NAME';
100
101 -- Unstructured Text Setting Names
102 odms_text_policy_name CONSTANT VARCHAR2(30) :=
103 'ODMS_TEXT_POLICY_NAME';
104 odms_text_max_features CONSTANT VARCHAR2(30) :=
105 'ODMS_TEXT_MAX_FEATURES';
106
107 -- Approximate computation
108 odms_approximate_computation CONSTANT VARCHAR2(30) :=
109 'ODMS_APPROXIMATE_COMPUTATION';
110 -- Setting values for odms_approximate_computation
111 odms_appr_comp_enable CONSTANT VARCHAR2(30) := 'ODMS_APPR_COMP_ENABLE';
112 odms_appr_comp_disable CONSTANT VARCHAR2(30) := 'ODMS_APPR_COMP_DISABLE';
113
114 -- General Settings - End -------------------------------------------------
115
116 ----------- Function and Algorithm Settings - Begin ---------------------
117
118 -- FUNCTION NAME (input as CREATE_MODEL parameter)
119 --
120 classification CONSTANT VARCHAR2(30) := 'CLASSIFICATION';
121 regression CONSTANT VARCHAR2(30) := 'REGRESSION';
122 clustering CONSTANT VARCHAR2(30) := 'CLUSTERING';
123 association CONSTANT VARCHAR2(30) := 'ASSOCIATION';
124 feature_extraction CONSTANT VARCHAR2(30) := 'FEATURE_EXTRACTION';
125 attribute_importance CONSTANT VARCHAR2(30) := 'ATTRIBUTE_IMPORTANCE';
126
127 -- FUNCTION: Setting Names (input to settings_name column in settings table)
128 clas_priors_table_name CONSTANT VARCHAR2(30) := 'CLAS_PRIORS_TABLE_NAME';
129 clas_weights_table_name CONSTANT VARCHAR2(30) := 'CLAS_WEIGHTS_TABLE_NAME';
130 clas_cost_table_name CONSTANT VARCHAR2(30) := 'CLAS_COST_TABLE_NAME';
131 asso_max_rule_length CONSTANT VARCHAR2(30) := 'ASSO_MAX_RULE_LENGTH';
132 asso_min_confidence CONSTANT VARCHAR2(30) := 'ASSO_MIN_CONFIDENCE';
133 asso_min_support CONSTANT VARCHAR2(30) := 'ASSO_MIN_SUPPORT';
134 feat_num_features CONSTANT VARCHAR2(30) := 'FEAT_NUM_FEATURES';
135 clus_num_clusters CONSTANT VARCHAR2(30) := 'CLUS_NUM_CLUSTERS';
136
137 -- ALGORITHM Setting Name (input to settings_name column in settings table)
138 --
139 algo_name CONSTANT VARCHAR2(30) := 'ALGO_NAME';
140
141 -- ALGORITHM: Setting Values for algo_name
142 algo_naive_bayes CONSTANT VARCHAR2(30) :=
143 'ALGO_NAIVE_BAYES';
144 algo_adaptive_bayes_network CONSTANT VARCHAR2(30) :=
145 'ALGO_ADAPTIVE_BAYES_NETWORK';
146 algo_support_vector_machines CONSTANT VARCHAR2(30) :=
147 'ALGO_SUPPORT_VECTOR_MACHINES';
148 algo_nonnegative_matrix_factor CONSTANT VARCHAR2(30) :=
149 'ALGO_NONNEGATIVE_MATRIX_FACTOR';
150 algo_apriori_association_rules CONSTANT VARCHAR2(30) :=
151 'ALGO_APRIORI_ASSOCIATION_RULES';
152 algo_kmeans CONSTANT VARCHAR2(30) :=
153 'ALGO_KMEANS';
154 algo_ocluster CONSTANT VARCHAR2(30) :=
155 'ALGO_O_CLUSTER';
156 algo_ai_mdl CONSTANT VARCHAR2(30) :=
157 'ALGO_AI_MDL';
158 algo_decision_tree CONSTANT VARCHAR2(30) :=
159 'ALGO_DECISION_TREE';
160 algo_generalized_linear_model CONSTANT VARCHAR2(30) :=
161 'ALGO_GENERALIZED_LINEAR_MODEL';
162 algo_singular_value_decomp CONSTANT VARCHAR2(30) :=
163 'ALGO_SINGULAR_VALUE_DECOMP';
164 algo_expectation_maximization CONSTANT VARCHAR2(30) :=
165 'ALGO_EXPECTATION_MAXIMIZATION';
166
167 -- ALGORITHM SETTINGS AND VALUES
168 --
169 -- ABN: Setting Names
170 abns_model_type CONSTANT VARCHAR2(30) := 'ABNS_MODEL_TYPE';
171 abns_max_build_minutes CONSTANT VARCHAR2(30) := 'ABNS_MAX_BUILD_MINUTES';
172 abns_max_predictors CONSTANT VARCHAR2(30) := 'ABNS_MAX_PREDICTORS';
173 abns_max_nb_predictors CONSTANT VARCHAR2(30) := 'ABNS_MAX_NB_PREDICTORS';
174
175 -- ABN: Setting Values for abns_model_type
176 abns_multi_feature CONSTANT VARCHAR2(30) := 'ABNS_MULTI_FEATURE';
177 abns_single_feature CONSTANT VARCHAR2(30) := 'ABNS_SINGLE_FEATURE';
178 abns_naive_bayes CONSTANT VARCHAR2(30) := 'ABNS_NAIVE_BAYES';
179
180 -- NB: Setting Names
181 nabs_pairwise_threshold CONSTANT VARCHAR2(30) := 'NABS_PAIRWISE_THRESHOLD';
182 nabs_singleton_threshold CONSTANT VARCHAR2(30) := 'NABS_SINGLETON_THRESHOLD';
183
184 -- SVM: Setting Names
185 -- NOTE: svms_epsilon applies only for SVM Regression
186 -- svms_complexity_factor applies to both
187 -- svms_std_dev applies only for Gaussian Kernels
188 -- kernel_cache_size to Gaussian kernels only
189 svms_conv_tolerance CONSTANT VARCHAR2(30) := 'SVMS_CONV_TOLERANCE';
190 svms_std_dev CONSTANT VARCHAR2(30) := 'SVMS_STD_DEV';
191 svms_complexity_factor CONSTANT VARCHAR2(30) := 'SVMS_COMPLEXITY_FACTOR';
192 svms_kernel_cache_size CONSTANT VARCHAR2(30) := 'SVMS_KERNEL_CACHE_SIZE';
193 svms_epsilon CONSTANT VARCHAR2(30) := 'SVMS_EPSILON';
194 svms_kernel_function CONSTANT VARCHAR2(30) := 'SVMS_KERNEL_FUNCTION';
195 svms_active_learning CONSTANT VARCHAR2(30) := 'SVMS_ACTIVE_LEARNING';
196 svms_outlier_rate CONSTANT VARCHAR2(30) := 'SVMS_OUTLIER_RATE';
197
198 -- SVM: Setting Values for svms_kernel_function
199 svms_linear CONSTANT VARCHAR2(30) := 'SVMS_LINEAR';
200 svms_gaussian CONSTANT VARCHAR2(30) := 'SVMS_GAUSSIAN';
201
202 -- SVM: Setting Values for svms_active_learning
203 svms_al_enable CONSTANT VARCHAR2(30) := 'SVMS_AL_ENABLE';
204 svms_al_disable CONSTANT VARCHAR2(30) := 'SVMS_AL_DISABLE';
205
206 -- KMNS: Setting Names
207 kmns_distance CONSTANT VARCHAR2(30) := 'KMNS_DISTANCE';
208 kmns_iterations CONSTANT VARCHAR2(30) := 'KMNS_ITERATIONS';
209 kmns_conv_tolerance CONSTANT VARCHAR2(30) := 'KMNS_CONV_TOLERANCE';
210 kmns_split_criterion CONSTANT VARCHAR2(30) := 'KMNS_SPLIT_CRITERION';
211 kmns_min_pct_attr_support CONSTANT VARCHAR2(30):= 'KMNS_MIN_PCT_ATTR_SUPPORT';
212 kmns_block_growth CONSTANT VARCHAR2(30) := 'KMNS_BLOCK_GROWTH';
213 kmns_num_bins CONSTANT VARCHAR2(30) := 'KMNS_NUM_BINS';
214
215 -- KMNS: Setting Values for kmns_distance
216 kmns_euclidean CONSTANT VARCHAR2(30) := 'KMNS_EUCLIDEAN';
217 kmns_cosine CONSTANT VARCHAR2(30) := 'KMNS_COSINE';
218 kmns_fast_cosine CONSTANT VARCHAR2(30) := 'KMNS_FAST_COSINE';
219
220 -- KMNS: Setting Values for kmns_split_criterion
221 kmns_size CONSTANT VARCHAR2(30) := 'KMNS_SIZE';
222 kmns_variance CONSTANT VARCHAR2(30) := 'KMNS_VARIANCE';
223
224 -- NMF: Setting Names
225 nmfs_num_iterations CONSTANT VARCHAR2(30) := 'NMFS_NUM_ITERATIONS';
226 nmfs_conv_tolerance CONSTANT VARCHAR2(30) := 'NMFS_CONV_TOLERANCE';
227 nmfs_random_seed CONSTANT VARCHAR2(30) := 'NMFS_RANDOM_SEED';
228 nmfs_nonnegative_scoring CONSTANT VARCHAR2(30) :=
229 'NMFS_NONNEGATIVE_SCORING';
230 -- Setting values for NMFS_NONNEGATIVE_SCORING
231 nmfs_nonneg_scoring_enable CONSTANT VARCHAR2(30) :=
232 'NMFS_NONNEG_SCORING_ENABLE';
233 nmfs_nonneg_scoring_disable CONSTANT VARCHAR2(30) :=
234 'NMFS_NONNEG_SCORING_DISABLE';
235
236 -- OCLT: Setting Names for O-Cluster
237 oclt_sensitivity CONSTANT VARCHAR2(30) := 'OCLT_SENSITIVITY';
238 oclt_max_buffer CONSTANT VARCHAR2(30) := 'OCLT_MAX_BUFFER';
239
240 -- TREE: Setting Names
241 tree_impurity_metric CONSTANT VARCHAR2(30) := 'TREE_IMPURITY_METRIC';
242 tree_term_max_depth CONSTANT VARCHAR2(30) := 'TREE_TERM_MAX_DEPTH';
243 tree_term_minrec_split CONSTANT VARCHAR2(30) := 'TREE_TERM_MINREC_SPLIT';
244 tree_term_minpct_split CONSTANT VARCHAR2(30) := 'TREE_TERM_MINPCT_SPLIT';
245 tree_term_minrec_node CONSTANT VARCHAR2(30) := 'TREE_TERM_MINREC_NODE';
246 tree_term_minpct_node CONSTANT VARCHAR2(30) := 'TREE_TERM_MINPCT_NODE';
247
248 -- TREE: Setting Values for tree_impurity_metric
249 tree_impurity_gini CONSTANT VARCHAR2(30) := 'TREE_IMPURITY_GINI';
250 tree_impurity_entropy CONSTANT VARCHAR2(30) := 'TREE_IMPURITY_ENTROPY';
251
252 -- GLM: Setting Names
253 glms_ridge_regression CONSTANT VARCHAR2(30) := 'GLMS_RIDGE_REGRESSION';
254 glms_diagnostics_table_name CONSTANT VARCHAR2(30) :=
255 'GLMS_DIAGNOSTICS_TABLE_NAME';
256 glms_reference_class_name CONSTANT VARCHAR2(30) :=
257 'GLMS_REFERENCE_CLASS_NAME';
258 glms_ridge_value CONSTANT VARCHAR2(30) := 'GLMS_RIDGE_VALUE';
259 glms_conf_level CONSTANT VARCHAR2(30) := 'GLMS_CONF_LEVEL';
260 glms_vif_for_ridge CONSTANT VARCHAR2(30) := 'GLMS_VIF_FOR_RIDGE';
261
262 -- GLM: Setting Values for glms_ridge_regression
263 glms_ridge_reg_enable CONSTANT VARCHAR2(30) := 'GLMS_RIDGE_REG_ENABLE';
264 glms_ridge_reg_disable CONSTANT VARCHAR2(30) := 'GLMS_RIDGE_REG_DISABLE';
265
266 -- GLM: Setting Values for glms_vif_for_ridge
267 glms_vif_ridge_enable CONSTANT VARCHAR2(30) := 'GLMS_VIF_RIDGE_ENABLE';
268 glms_vif_ridge_disable CONSTANT VARCHAR2(30) := 'GLMS_VIF_RIDGE_DISABLE';
269
270 -- GLM: Setting Values for glms_ftr_selection
271 glms_ftr_selection CONSTANT VARCHAR2(30) := 'GLMS_FTR_SELECTION';
272 glms_ftr_selection_enable CONSTANT VARCHAR2(30) :=
273 'GLMS_FTR_SELECTION_ENABLE';
274 glms_ftr_selection_disable CONSTANT VARCHAR2(30) :=
275 'GLMS_FTR_SELECTION_DISABLE';
276
277 -- GLM: Setting Values for glms_ftr_sel_crit
278 glms_ftr_sel_crit CONSTANT VARCHAR2(30) := 'GLMS_FTR_SEL_CRIT';
279 glms_ftr_sel_aic CONSTANT VARCHAR2(30) := 'GLMS_FTR_SEL_AIC';
280 glms_ftr_sel_sbic CONSTANT VARCHAR2(30) := 'GLMS_FTR_SEL_SBIC';
281 glms_ftr_sel_ric CONSTANT VARCHAR2(30) := 'GLMS_FTR_SEL_RIC';
282 glms_ftr_sel_alpha_inv CONSTANT VARCHAR2(30) := 'GLMS_FTR_SEL_ALPHA_INV';
283
284 -- GLM: Setting Values for glms_feature_generation
285 glms_ftr_generation CONSTANT VARCHAR2(30) := 'GLMS_FTR_GENERATION';
286 glms_ftr_generation_enable CONSTANT VARCHAR2(30) :=
287 'GLMS_FTR_GENERATION_ENABLE';
288 glms_ftr_generation_disable CONSTANT VARCHAR2(30) :=
289 'GLMS_FTR_GENERATION_DISABLE';
290
291 -- GLM: Setting Values for glms_feature_gen
292 glms_ftr_gen_method CONSTANT VARCHAR2(30) := 'GLMS_FTR_GEN_METHOD';
293 glms_ftr_gen_quadratic CONSTANT VARCHAR2(30) := 'GLMS_FTR_GEN_QUADRATIC';
294 glms_ftr_gen_cubic CONSTANT VARCHAR2(30) := 'GLMS_FTR_GEN_CUBIC';
295
296 -- GLM: feature selection categorical value handling
297 glms_select_block CONSTANT VARCHAR2(30) := 'GLMS_SELECT_BLOCK';
298 glms_select_block_disable CONSTANT VARCHAR2(30) := 'GLMS_SELECT_BLOCK_DISABLE';
299 glms_select_block_enable CONSTANT VARCHAR2(30) := 'GLMS_SELECT_BLOCK_ENABLE';
300
301 -- GLM: feature selection - max features selected
302 glms_max_features CONSTANT VARCHAR2(30) := 'GLMS_MAX_FEATURES';
303
304 -- GLM: feature identification - whether row sampling is used in the
305 -- selection of feature
306 glms_ftr_identification CONSTANT VARCHAR2(30) := 'GLMS_FTR_IDENTIFICATION';
307 glms_ftr_ident_quick CONSTANT VARCHAR2(30) :=
308 'GLMS_FTR_IDENT_QUICK';
309 glms_ftr_ident_complete CONSTANT VARCHAR2(30) :=
310 'GLMS_FTR_IDENT_COMPLETE';
311
312 -- GLM: model pruning - whether the final model features will be
313 -- pruned using t-statistics
314 glms_prune_model CONSTANT VARCHAR2(30) := 'GLMS_PRUNE_MODEL';
315 glms_prune_model_enable CONSTANT VARCHAR2(30) := 'GLMS_PRUNE_MODEL_ENABLE';
316 glms_prune_model_disable CONSTANT VARCHAR2(30) := 'GLMS_PRUNE_MODEL_DISABLE';
317
318 -- GLM: feature acceptance - whether partitioning the data into feature
319 -- ordering and feature selection sets will be used (strict) or
320 -- not (relaxed
321 glms_ftr_acceptance CONSTANT VARCHAR2(30) := 'GLMS_FTR_ACCEPTANCE';
322 glms_ftr_acceptance_strict CONSTANT VARCHAR2(30) :=
323 'GLMS_FTR_ACCEPTANCE_STRICT';
324 glms_ftr_acceptance_relaxed CONSTANT VARCHAR2(30) :=
325 'GLMS_FTR_ACCEPTANCE_RELAXED';
326
327
328 -- SVD
329 -- max number of features allowed
330 svds_max_num_features CONSTANT NUMBER := 2500;
331
332 svds_scoring_mode CONSTANT VARCHAR2(30) := 'SVDS_SCORING_MODE';
333 -- SVD: Setting values for svds_scoring_mode
334 svds_scoring_svd CONSTANT VARCHAR2(30) := 'SVDS_SCORING_SVD';
338 -- SVD: Setting values for svds_u_matrix_output
335 svds_scoring_pca CONSTANT VARCHAR2(30) := 'SVDS_SCORING_PCA';
336
337 svds_u_matrix_output CONSTANT VARCHAR2(30) := 'SVDS_U_MATRIX_OUTPUT';
339 svds_u_matrix_enable CONSTANT VARCHAR2(30) := 'SVDS_U_MATRIX_ENABLE';
340 svds_u_matrix_disable CONSTANT VARCHAR2(30) := 'SVDS_U_MATRIX_DISABLE';
341
342 -- EM
343 -- number of components
344 emcs_num_components CONSTANT VARCHAR2(30) := 'EMCS_NUM_COMPONENTS';
345
346 -- high-level component clustering
347 emcs_cluster_components CONSTANT VARCHAR2(30) :=
348 'EMCS_CLUSTER_COMPONENTS';
349 -- values for emcs_cluster_components
350 emcs_cluster_comp_enable CONSTANT VARCHAR2(30) :=
351 'EMCS_CLUSTER_COMP_ENABLE';
352 emcs_cluster_comp_disable CONSTANT VARCHAR2(30) :=
353 'EMCS_CLUSTER_COMP_DISABLE';
354
355 -- high-level cluster threshold
356 emcs_cluster_thresh CONSTANT VARCHAR2(30) := 'EMCS_CLUSTER_THRESH';
357
358 -- max number of 2D attributes
359 emcs_max_num_attr_2d CONSTANT VARCHAR2(30) := 'EMCS_MAX_NUM_ATTR_2D';
360
361 -- number of projections
362 emcs_num_projections CONSTANT VARCHAR2(30) := 'EMCS_NUM_PROJECTIONS';
363
364 -- number of quantile bins
365 emcs_num_quantile_bins CONSTANT VARCHAR2(30) := 'EMCS_NUM_QUANTILE_BINS';
366
367 -- number of topN bins
368 emcs_num_topn_bins CONSTANT VARCHAR2(30) := 'EMCS_NUM_TOPN_BINS';
369
370 -- number of equi-width bins
371 emcs_num_equiwidth_bins CONSTANT VARCHAR2(30) :=
372 'EMCS_NUM_EQUIWIDTH_BINS';
373
374 -- minimum percentage attribute support
375 emcs_min_pct_attr_support CONSTANT VARCHAR2(30) :=
376 'EMCS_MIN_PCT_ATTR_SUPPORT';
377 -- full covariance (next release)
378 -- emcs_full_covariance CONSTANT VARCHAR2(30) := 'EMCS_FULL_COVARIANCE';
379 -- values for emcs_full_covariance
380 -- emcs_full_cov_enable CONSTANT VARCHAR2(30) := 'EMCS_FULL_COV_ENABLE';
381 -- emcs_full_cov_disable CONSTANT VARCHAR2(30) := 'EMCS_FULL_COV_DISABLE';
382
383 -- cluster statistics
384 emcs_cluster_statistics CONSTANT VARCHAR2(30) := 'EMCS_CLUSTER_STATISTICS';
385 -- values for emcs_cluster_statistics
386 emcs_clus_stats_enable CONSTANT VARCHAR2(30) := 'EMCS_CLUS_STATS_ENABLE';
387 emcs_clus_stats_disable CONSTANT VARCHAR2(30) := 'EMCS_CLUS_STATS_DISABLE';
388
389 -- distribution for modeling numerical attributes
390 emcs_num_distribution CONSTANT VARCHAR2(30) := 'EMCS_NUM_DISTRIBUTION';
391 -- values for emcs_num_distribution
392 emcs_num_distr_bernoulli CONSTANT VARCHAR2(30) :=
393 'EMCS_NUM_DISTR_BERNOULLI';
394 emcs_num_distr_gaussian CONSTANT VARCHAR2(30) :=
395 'EMCS_NUM_DISTR_GAUSSIAN';
396 emcs_num_distr_system CONSTANT VARCHAR2(30) := 'EMCS_NUM_DISTR_SYSTEM';
397
398 -- number of iterations
399 emcs_num_iterations CONSTANT VARCHAR2(30) := 'EMCS_NUM_ITERATIONS';
400
401 -- required log likelihood improvement
402 emcs_loglike_improvement CONSTANT VARCHAR2(30) :=
403 'EMCS_LOGLIKE_IMPROVEMENT';
404
405 -- linkage function
406 emcs_linkage_function CONSTANT VARCHAR2(30) := 'EMCS_LINKAGE_FUNCTION';
407 -- values for linkage function
408 emcs_linkage_single CONSTANT VARCHAR2(30) := 'EMCS_LINKAGE_SINGLE';
409 emcs_linkage_average CONSTANT VARCHAR2(30) := 'EMCS_LINKAGE_AVERAGE';
410 emcs_linkage_complete CONSTANT VARCHAR2(30) := 'EMCS_LINKAGE_COMPLETE';
411
412 -- attribute filtering
413 emcs_attribute_filter CONSTANT VARCHAR2(30) := 'EMCS_ATTRIBUTE_FILTER';
414 -- values for attribute filtering
415 emcs_attr_filter_enable CONSTANT VARCHAR2(30) :=
416 'EMCS_ATTR_FILTER_ENABLE';
417 emcs_attr_filter_disable CONSTANT VARCHAR2(30) :=
418 'EMCS_ATTR_FILTER_DISABLE';
419 -- convergence criterion
420 emcs_convergence_criterion CONSTANT VARCHAR2(30) :=
421 'EMCS_CONVERGENCE_CRITERION';
422 -- values for convergence criterion
423 emcs_conv_crit_heldaside CONSTANT VARCHAR2(30) :=
424 'EMCS_CONV_CRIT_HELDASIDE';
425 emcs_conv_crit_bic CONSTANT VARCHAR2(30) :=
426 'EMCS_CONV_CRIT_BIC';
427
428 ----------- Function and Algorithm Settings - End ------------------------
429
430 --------------
431 -- LOCAL TYPES
432 --
433 SUBTYPE TRANSFORM_LIST IS dbms_data_mining_transform.TRANSFORM_LIST;
434
435 -- Default values for model build settings
436 TYPE default_settings_type IS TABLE OF VARCHAR2(30) INDEX BY VARCHAR2(30);
437 external_default_settings default_settings_type;
438 internal_default_settings default_settings_type;
439
440 ---------------------------
441 -- PROCEDURES AND FUNCTIONS
442 --
443 PROCEDURE apply(model_name IN VARCHAR2,
444 data_table_name IN VARCHAR2,
445 case_id_column_name IN VARCHAR2,
449 PROCEDURE compute_confusion_matrix(
446 result_table_name IN VARCHAR2,
447 data_schema_name IN VARCHAR2 DEFAULT NULL);
448
450 accuracy OUT NUMBER,
451 apply_result_table_name IN VARCHAR2,
452 target_table_name IN VARCHAR2,
453 case_id_column_name IN VARCHAR2,
454 target_column_name IN VARCHAR2,
455 confusion_matrix_table_name IN VARCHAR2,
456 score_column_name IN VARCHAR2 DEFAULT
457 'PREDICTION',
458 score_criterion_column_name IN VARCHAR2 DEFAULT
459 'PROBABILITY',
460 cost_matrix_table_name IN VARCHAR2 DEFAULT NULL,
461 apply_result_schema_name IN VARCHAR2 DEFAULT NULL,
462 target_schema_name IN VARCHAR2 DEFAULT NULL,
463 cost_matrix_schema_name IN VARCHAR2 DEFAULT NULL,
464 score_criterion_type IN VARCHAR2 DEFAULT NULL);
465
466 PROCEDURE compute_lift(
467 apply_result_table_name IN VARCHAR2,
468 target_table_name IN VARCHAR2,
469 case_id_column_name IN VARCHAR2,
470 target_column_name IN VARCHAR2,
471 lift_table_name IN VARCHAR2,
472 positive_target_value IN VARCHAR2,
473 score_column_name IN VARCHAR2 DEFAULT
474 'PREDICTION',
475 score_criterion_column_name IN VARCHAR2 DEFAULT
476 'PROBABILITY',
477 num_quantiles IN NUMBER DEFAULT 10,
478 cost_matrix_table_name IN VARCHAR2 DEFAULT NULL,
479 apply_result_schema_name IN VARCHAR2 DEFAULT NULL,
483
480 target_schema_name IN VARCHAR2 DEFAULT NULL,
481 cost_matrix_schema_name IN VARCHAR2 DEFAULT NULL,
482 score_criterion_type IN VARCHAR2 DEFAULT NULL);
484 PROCEDURE compute_roc(
485 roc_area_under_curve OUT NUMBER,
486 apply_result_table_name IN VARCHAR2,
487 target_table_name IN VARCHAR2,
488 case_id_column_name IN VARCHAR2,
489 target_column_name IN VARCHAR2,
490 roc_table_name IN VARCHAR2,
491 positive_target_value IN VARCHAR2,
492 score_column_name IN VARCHAR2 DEFAULT
493 'PREDICTION',
494 score_criterion_column_name IN VARCHAR2 DEFAULT
495 'PROBABILITY',
496 apply_result_schema_name IN VARCHAR2 DEFAULT NULL,
497 target_schema_name IN VARCHAR2 DEFAULT NULL);
498
499 PROCEDURE create_model(
500 model_name IN VARCHAR2,
501 mining_function IN VARCHAR2,
502 data_table_name IN VARCHAR2,
503 case_id_column_name IN VARCHAR2,
504 target_column_name IN VARCHAR2 DEFAULT NULL,
505 settings_table_name IN VARCHAR2 DEFAULT NULL,
506 data_schema_name IN VARCHAR2 DEFAULT NULL,
507 settings_schema_name IN VARCHAR2 DEFAULT NULL,
508 xform_list IN TRANSFORM_LIST DEFAULT NULL);
509
510 PROCEDURE drop_model(model_name IN VARCHAR2,
511 force IN BOOLEAN DEFAULT FALSE);
512
513 PROCEDURE export_model (filename IN VARCHAR2,
514 directory IN VARCHAR2,
515 model_filter IN VARCHAR2 DEFAULT NULL,
516 filesize IN VARCHAR2 DEFAULT NULL,
517 operation IN VARCHAR2 DEFAULT NULL,
518 remote_link IN VARCHAR2 DEFAULT NULL,
519 jobname IN VARCHAR2 DEFAULT NULL);
520
521 -- XML (PMML) versions of get model details
522 FUNCTION get_model_details_xml(model_name IN VARCHAR2)
523 RETURN XMLType;
524
525 -- Specifying topn orders by confidence DESC, support DESC
526 -- otherwise by rule_id
527 FUNCTION get_association_rules(model_name IN VARCHAR2,
528 topn IN NUMBER DEFAULT NULL,
529 rule_id IN INTEGER DEFAULT NULL,
530 min_confidence IN NUMBER DEFAULT NULL,
531 min_support IN NUMBER DEFAULT NULL,
532 max_rule_length IN INTEGER DEFAULT NULL,
533 min_rule_length IN INTEGER DEFAULT NULL,
534 sort_order IN ORA_MINING_VARCHAR2_NT DEFAULT NULL,
535 antecedent_items IN DM_ITEMS DEFAULT NULL,
536 consequent_items IN DM_ITEMS DEFAULT NULL,
537 min_lift IN NUMBER DEFAULT NULL)
538 RETURN DM_Rules PIPELINED;
539
540 -- Specifying topn orders by support DESC otherwise there
541 -- is no ordering
542 FUNCTION get_frequent_itemsets(model_name IN VARCHAR2,
543 topn IN NUMBER DEFAULT NULL,
544 max_itemset_length IN NUMBER DEFAULT NULL)
545 RETURN DM_ItemSets PIPELINED;
546
547 FUNCTION get_model_details_ai(model_name IN VARCHAR2)
548 RETURN dm_ranked_attributes pipelined;
549
550 FUNCTION get_model_details_glm(model_name IN VARCHAR2)
551 RETURN DM_GLM_Coeff_Set PIPELINED;
552
553 FUNCTION get_model_details_svd(model_name IN VARCHAR2,
554 matrix_type IN VARCHAR2 DEFAULT NULL)
555 RETURN DM_SVD_MATRIX_Set PIPELINED;
556
557 FUNCTION get_model_details_km(model_name VARCHAR2,
558 cluster_id NUMBER DEFAULT NULL,
559 attribute VARCHAR2 DEFAULT NULL,
560 centroid NUMBER DEFAULT 1,
561 histogram NUMBER DEFAULT 1,
562 rules NUMBER DEFAULT 2,
563 attribute_subname VARCHAR2 DEFAULT NULL,
564 topn_attributes NUMBER DEFAULT NULL)
565
566 RETURN dm_clusters PIPELINED;
567
568 FUNCTION get_model_details_nb(model_name IN VARCHAR2)
569 RETURN DM_NB_Details PIPELINED;
570
571 FUNCTION get_model_details_nmf(model_name IN VARCHAR2)
572 RETURN DM_NMF_Feature_Set PIPELINED;
573
574 FUNCTION get_model_details_oc(model_name VARCHAR2,
575 cluster_id NUMBER DEFAULT NULL,
576 attribute VARCHAR2 DEFAULT NULL,
577 centroid NUMBER DEFAULT 1,
578 histogram NUMBER DEFAULT 1,
579 rules NUMBER DEFAULT 2,
580 topn_attributes NUMBER DEFAULT NULL)
581 RETURN dm_clusters PIPELINED;
582
583 FUNCTION get_model_details_svm(model_name VARCHAR2,
584 reverse_coef NUMBER DEFAULT 0)
585 RETURN DM_SVM_Linear_Coeff_Set PIPELINED;
586
587 FUNCTION get_model_details_em(model_name VARCHAR2,
588 cluster_id NUMBER DEFAULT NULL,
589 attribute VARCHAR2 DEFAULT NULL,
590 centroid NUMBER DEFAULT 1,
594 topn_attributes NUMBER DEFAULT NULL)
591 histogram NUMBER DEFAULT 1,
592 rules NUMBER DEFAULT 2,
593 attribute_subname VARCHAR2 DEFAULT NULL,
595 RETURN dm_clusters PIPELINED;
596
597 FUNCTION get_model_details_em_comp(model_name IN VARCHAR2)
598 RETURN DM_EM_COMPONENT_SET PIPELINED;
599
600 FUNCTION get_model_details_em_proj(model_name IN VARCHAR2)
601 RETURN DM_EM_PROJECTION_SET PIPELINED;
602
603 FUNCTION get_model_details_global(model_name IN VARCHAR2)
604 RETURN DM_model_global_details PIPELINED;
605
606 FUNCTION get_model_settings(model_name IN VARCHAR2)
607 RETURN DM_Model_Settings PIPELINED;
608
609 FUNCTION get_default_settings
610 RETURN DM_Model_Settings PIPELINED;
611
612 FUNCTION get_model_signature(model_name IN VARCHAR2)
613 RETURN DM_Model_Signature PIPELINED;
614
615 FUNCTION get_model_transformations(model_name IN VARCHAR2)
616 RETURN DM_Transforms PIPELINED;
617
618 PROCEDURE get_transform_list(xform_list OUT NOCOPY TRANSFORM_LIST,
619 model_xforms IN DM_TRANSFORMS);
620
621 PROCEDURE import_model (filename IN VARCHAR2,
622 directory IN VARCHAR2,
623 model_filter IN VARCHAR2 DEFAULT NULL,
624 operation IN VARCHAR2 DEFAULT NULL,
625 remote_link IN VARCHAR2 DEFAULT NULL,
626 jobname IN VARCHAR2 DEFAULT NULL,
627 schema_remap IN VARCHAR2 DEFAULT NULL,
628 tablespace_remap IN VARCHAR2 DEFAULT NULL);
629
630 PROCEDURE import_model (model_name IN VARCHAR2,
631 pmmldoc IN XMLTYPE,
632 strict_check IN BOOLEAN DEFAULT FALSE);
633
634 PROCEDURE rank_apply(apply_result_table_name IN VARCHAR2,
635 case_id_column_name IN VARCHAR2,
636 score_column_name IN VARCHAR2,
637 score_criterion_column_name IN VARCHAR2,
638 ranked_apply_table_name IN VARCHAR2,
639 top_n IN INTEGER DEFAULT 1,
640 cost_matrix_table_name IN VARCHAR2 DEFAULT NULL,
641 apply_result_schema_name IN VARCHAR2 DEFAULT NULL,
642 cost_matrix_schema_name IN VARCHAR2 DEFAULT NULL);
643
644 PROCEDURE rename_model(model_name IN VARCHAR2,
645 new_model_name IN VARCHAR2,
646 versioned_model_name IN VARCHAR2 DEFAULT NULL);
647
648 PROCEDURE add_cost_matrix(model_name IN VARCHAR2,
649 cost_matrix_table_name IN VARCHAR2,
650 cost_matrix_schema_name IN VARCHAR2 DEFAULT NULL);
651
652 PROCEDURE remove_cost_matrix(model_name IN VARCHAR2);
653
654 FUNCTION get_model_cost_matrix(model_name IN VARCHAR2,
655 matrix_type IN VARCHAR2
656 DEFAULT cost_matrix_type_score)
657 RETURN DM_COST_MATRIX PIPELINED;
658
659 PROCEDURE alter_reverse_expression(
660 model_name VARCHAR2,
661 expression CLOB,
662 attribute_name VARCHAR2 DEFAULT NULL,
663 attribute_subname VARCHAR2 DEFAULT NULL);
664
665 END dbms_data_mining;