DBA Data[Home] [Help]

PACKAGE: SYS.DBMS_DATA_MINING

Source


1 PACKAGE dbms_data_mining AUTHID CURRENT_USER AS
2 
3   ------------
4   --  OVERVIEW
5   --
6   --     This package provides general purpose routines for Data Mining
7   --     operations viz.
8   --     . CREATE a MODEL against build data.
9   --     . DROP an existing MODEL.
10   --     . RENAME an existing MODEL.
11   --     . COMPUTE various metrics to test a model against the APPLY
12   --       results on test data, with cost inputs
13   --     . APPLY a model to (production) mining data
14   --     . RANK the APPLY results based on cost and other factors
15   --     . GET the MODEL SIGNATURE - i.e. retrieve the attributes
16   --       that constitute the model and their relevant characteristics.
17   --     . GET the MODEL DETAILS - i.e. retrieve the contents of
18   --       the model - the specific patterns and rules that were used
19   --       in making the prediction (in the case of predictive models),
20   --       and/or the declarative rules (in the case of declarative models).
21   --
22 
23   ------------------------
24   -- RULES AND LIMITATIONS
25   --
26   --     The following rules apply in the specification of functions and
27   --     procedures in this package.
28   --
29   --     A function/procedure will raise an INVALID_ARGVAL exception if the
30   --     the following restrictions are not followed in specifying values
31   --     for parameters (unless otherwise specified):
32   --
33   --     1. Every BUILD operation MUST have the mining function
34   --        name specified at the minimum.
35   --     2. All schema object names, except models, should be maximum
36   --        30 bytes in size.
37   --     3. All model names should be maximum 25 bytes in size.
38   --     4. The SETTINGS discussed below under CONSTANTS represent the name
39   --        tags and values that act as column values in a user-created
40   --        Settings Table, with a fixed schema and column types:
41   --
42   --        SETTING_NAME  SETTING_VALUE
43   --        varchar2(30)  varchar2(30)
44   --
45   --     5. For numerical settings, use TO_CHAR() to store them in the
46   --        SETTING_VALUE column - the API will interpret the values.
47   --
48   --
49 
50   -----------
51   -- SECURITY
52   --
53   --     Privileges are associated with the the caller of the procedures/
54   --     functions in this package as follows:
55   --     If the caller is an anonymous PL/SQL block, the procedures/functions
56   --     are run with the privilege of the current user.
57   --     If the caller is a stored procedure, the procedures/functions are run
58   --     using the privileges of the owner of the stored procedure.
59   --
60 
61   ------------
62   -- CONSTANTS
63   --
64   -- General Settings - Begin ------------------------------------------------
65 
66   -- Data Prep: Setting Names
67   prep_auto                CONSTANT VARCHAR2(30) := 'PREP_AUTO';
68 
69   -- Data Prep: Setting Values for prep_auto
70   prep_auto_off            CONSTANT VARCHAR2(30) := 'OFF';
71   prep_auto_on             CONSTANT VARCHAR2(30) := 'ON';
72 
73   -- Score Criterion Type: Setting Values for score_criterion_type
74   score_criterion_probability CONSTANT VARCHAR2(30) := 'PROBABILITY';
75   score_criterion_cost        CONSTANT VARCHAR2(30) := 'COST';
76 
77   -- Row Weights - Setting Name
78   odms_row_weight_column_name    CONSTANT VARCHAR2(30) :=
79     'ODMS_ROW_WEIGHT_COLUMN_NAME';
80 
81   -- Cost Matrix
82   cost_matrix_type_score   CONSTANT VARCHAR2(30) := 'SCORE';
83   cost_matrix_type_create  CONSTANT VARCHAR2(30) := 'CREATE';
84 
85   -- Missing Value Treatment - Setting Name
86   odms_missing_value_treatment   CONSTANT VARCHAR2(30) :=
87     'ODMS_MISSING_VALUE_TREATMENT';
88 
89   -- Missing Value Treatment: Setting Values for ODMS_MISSING_VALUE_TREATMENT
90   odms_missing_value_mean_mode   CONSTANT VARCHAR2(30) :=
91     'ODMS_MISSING_VALUE_MEAN_MODE';
92   odms_missing_value_delete_row  CONSTANT VARCHAR2(30) :=
93     'ODMS_MISSING_VALUE_DELETE_ROW';
94 
95   -- Transactional training data format: Setting Names
96   odms_item_id_column_name       CONSTANT VARCHAR2(30) :=
97     'ODMS_ITEM_ID_COLUMN_NAME';
98   odms_item_value_column_name    CONSTANT VARCHAR2(30) :=
99     'ODMS_ITEM_VALUE_COLUMN_NAME';
100 
101   -- Unstructured Text Setting Names
102   odms_text_policy_name          CONSTANT VARCHAR2(30) :=
103     'ODMS_TEXT_POLICY_NAME';
104   odms_text_max_features         CONSTANT VARCHAR2(30) :=
105     'ODMS_TEXT_MAX_FEATURES';
106 
107   -- Approximate computation
108   odms_approximate_computation CONSTANT VARCHAR2(30) :=
109                                              'ODMS_APPROXIMATE_COMPUTATION';
110   -- Setting values for odms_approximate_computation
111   odms_appr_comp_enable     CONSTANT VARCHAR2(30) := 'ODMS_APPR_COMP_ENABLE';
112   odms_appr_comp_disable    CONSTANT VARCHAR2(30) := 'ODMS_APPR_COMP_DISABLE';
113 
114   -- General Settings - End -------------------------------------------------
115 
116   -----------   Function and Algorithm Settings - Begin ---------------------
117 
118   -- FUNCTION NAME (input as CREATE_MODEL parameter)
119   --
120   classification           CONSTANT VARCHAR2(30) := 'CLASSIFICATION';
121   regression               CONSTANT VARCHAR2(30) := 'REGRESSION';
122   clustering               CONSTANT VARCHAR2(30) := 'CLUSTERING';
123   association              CONSTANT VARCHAR2(30) := 'ASSOCIATION';
124   feature_extraction       CONSTANT VARCHAR2(30) := 'FEATURE_EXTRACTION';
125   attribute_importance     CONSTANT VARCHAR2(30) := 'ATTRIBUTE_IMPORTANCE';
126 
127   -- FUNCTION: Setting Names (input to settings_name column in settings table)
128   clas_priors_table_name   CONSTANT VARCHAR2(30) := 'CLAS_PRIORS_TABLE_NAME';
129   clas_weights_table_name  CONSTANT VARCHAR2(30) := 'CLAS_WEIGHTS_TABLE_NAME';
130   clas_cost_table_name     CONSTANT VARCHAR2(30) := 'CLAS_COST_TABLE_NAME';
131   asso_max_rule_length     CONSTANT VARCHAR2(30) := 'ASSO_MAX_RULE_LENGTH';
132   asso_min_confidence      CONSTANT VARCHAR2(30) := 'ASSO_MIN_CONFIDENCE';
133   asso_min_support         CONSTANT VARCHAR2(30) := 'ASSO_MIN_SUPPORT';
134   feat_num_features        CONSTANT VARCHAR2(30) := 'FEAT_NUM_FEATURES';
135   clus_num_clusters        CONSTANT VARCHAR2(30) := 'CLUS_NUM_CLUSTERS';
136 
137   -- ALGORITHM Setting Name (input to settings_name column in settings table)
138   --
139   algo_name CONSTANT VARCHAR2(30) := 'ALGO_NAME';
140 
141   -- ALGORITHM: Setting Values for algo_name
142   algo_naive_bayes               CONSTANT VARCHAR2(30) :=
143     'ALGO_NAIVE_BAYES';
144   algo_adaptive_bayes_network    CONSTANT VARCHAR2(30) :=
145     'ALGO_ADAPTIVE_BAYES_NETWORK';
146   algo_support_vector_machines   CONSTANT VARCHAR2(30) :=
147     'ALGO_SUPPORT_VECTOR_MACHINES';
148   algo_nonnegative_matrix_factor CONSTANT VARCHAR2(30) :=
149     'ALGO_NONNEGATIVE_MATRIX_FACTOR';
150   algo_apriori_association_rules CONSTANT VARCHAR2(30) :=
151      'ALGO_APRIORI_ASSOCIATION_RULES';
152   algo_kmeans                    CONSTANT VARCHAR2(30) :=
153     'ALGO_KMEANS';
154   algo_ocluster                  CONSTANT VARCHAR2(30) :=
155     'ALGO_O_CLUSTER';
156   algo_ai_mdl                    CONSTANT VARCHAR2(30) :=
157     'ALGO_AI_MDL';
158   algo_decision_tree             CONSTANT VARCHAR2(30) :=
159     'ALGO_DECISION_TREE';
160   algo_generalized_linear_model  CONSTANT VARCHAR2(30) :=
161     'ALGO_GENERALIZED_LINEAR_MODEL';
162   algo_singular_value_decomp     CONSTANT VARCHAR2(30) :=
163     'ALGO_SINGULAR_VALUE_DECOMP';
164   algo_expectation_maximization  CONSTANT VARCHAR2(30) :=
165     'ALGO_EXPECTATION_MAXIMIZATION';
166 
167   -- ALGORITHM SETTINGS AND VALUES
168   --
169   -- ABN: Setting Names
170   abns_model_type          CONSTANT VARCHAR2(30) := 'ABNS_MODEL_TYPE';
171   abns_max_build_minutes   CONSTANT VARCHAR2(30) := 'ABNS_MAX_BUILD_MINUTES';
172   abns_max_predictors      CONSTANT VARCHAR2(30) := 'ABNS_MAX_PREDICTORS';
173   abns_max_nb_predictors   CONSTANT VARCHAR2(30) := 'ABNS_MAX_NB_PREDICTORS';
174 
175   -- ABN: Setting Values for abns_model_type
176   abns_multi_feature       CONSTANT VARCHAR2(30) := 'ABNS_MULTI_FEATURE';
177   abns_single_feature      CONSTANT VARCHAR2(30) := 'ABNS_SINGLE_FEATURE';
178   abns_naive_bayes         CONSTANT VARCHAR2(30) := 'ABNS_NAIVE_BAYES';
179 
180   -- NB: Setting Names
181   nabs_pairwise_threshold  CONSTANT VARCHAR2(30) := 'NABS_PAIRWISE_THRESHOLD';
182   nabs_singleton_threshold CONSTANT VARCHAR2(30) := 'NABS_SINGLETON_THRESHOLD';
183 
184   -- SVM: Setting Names
185   -- NOTE: svms_epsilon applies only for SVM Regression
186   --       svms_complexity_factor applies to both
187   --       svms_std_dev applies only for Gaussian Kernels
188   --       kernel_cache_size to Gaussian kernels only
189   svms_conv_tolerance      CONSTANT VARCHAR2(30) := 'SVMS_CONV_TOLERANCE';
190   svms_std_dev             CONSTANT VARCHAR2(30) := 'SVMS_STD_DEV';
191   svms_complexity_factor   CONSTANT VARCHAR2(30) := 'SVMS_COMPLEXITY_FACTOR';
192   svms_kernel_cache_size   CONSTANT VARCHAR2(30) := 'SVMS_KERNEL_CACHE_SIZE';
193   svms_epsilon             CONSTANT VARCHAR2(30) := 'SVMS_EPSILON';
194   svms_kernel_function     CONSTANT VARCHAR2(30) := 'SVMS_KERNEL_FUNCTION';
195   svms_active_learning     CONSTANT VARCHAR2(30) := 'SVMS_ACTIVE_LEARNING';
196   svms_outlier_rate        CONSTANT VARCHAR2(30) := 'SVMS_OUTLIER_RATE';
197 
198   -- SVM: Setting Values for svms_kernel_function
199   svms_linear              CONSTANT VARCHAR2(30) := 'SVMS_LINEAR';
200   svms_gaussian            CONSTANT VARCHAR2(30) := 'SVMS_GAUSSIAN';
201 
202   -- SVM: Setting Values for svms_active_learning
203   svms_al_enable           CONSTANT VARCHAR2(30) := 'SVMS_AL_ENABLE';
204   svms_al_disable          CONSTANT VARCHAR2(30) := 'SVMS_AL_DISABLE';
205 
206   -- KMNS: Setting Names
207   kmns_distance            CONSTANT VARCHAR2(30) := 'KMNS_DISTANCE';
208   kmns_iterations          CONSTANT VARCHAR2(30) := 'KMNS_ITERATIONS';
209   kmns_conv_tolerance      CONSTANT VARCHAR2(30) := 'KMNS_CONV_TOLERANCE';
210   kmns_split_criterion     CONSTANT VARCHAR2(30) := 'KMNS_SPLIT_CRITERION';
211   kmns_min_pct_attr_support CONSTANT VARCHAR2(30):= 'KMNS_MIN_PCT_ATTR_SUPPORT';
212   kmns_block_growth        CONSTANT VARCHAR2(30) := 'KMNS_BLOCK_GROWTH';
213   kmns_num_bins            CONSTANT VARCHAR2(30) := 'KMNS_NUM_BINS';
214 
215   -- KMNS: Setting Values for kmns_distance
216   kmns_euclidean           CONSTANT VARCHAR2(30) := 'KMNS_EUCLIDEAN';
217   kmns_cosine              CONSTANT VARCHAR2(30) := 'KMNS_COSINE';
218   kmns_fast_cosine         CONSTANT VARCHAR2(30) := 'KMNS_FAST_COSINE';
219 
220   -- KMNS: Setting Values for kmns_split_criterion
221   kmns_size                CONSTANT VARCHAR2(30) := 'KMNS_SIZE';
222   kmns_variance            CONSTANT VARCHAR2(30) := 'KMNS_VARIANCE';
223 
224   -- NMF: Setting Names
225   nmfs_num_iterations      CONSTANT VARCHAR2(30) := 'NMFS_NUM_ITERATIONS';
226   nmfs_conv_tolerance      CONSTANT VARCHAR2(30) := 'NMFS_CONV_TOLERANCE';
227   nmfs_random_seed         CONSTANT VARCHAR2(30) := 'NMFS_RANDOM_SEED';
228   nmfs_nonnegative_scoring CONSTANT VARCHAR2(30) :=
229                                           'NMFS_NONNEGATIVE_SCORING';
230   -- Setting values for NMFS_NONNEGATIVE_SCORING
231   nmfs_nonneg_scoring_enable CONSTANT VARCHAR2(30) :=
232                                           'NMFS_NONNEG_SCORING_ENABLE';
233   nmfs_nonneg_scoring_disable CONSTANT VARCHAR2(30) :=
234                                           'NMFS_NONNEG_SCORING_DISABLE';
235 
236   -- OCLT: Setting Names for O-Cluster
237   oclt_sensitivity         CONSTANT VARCHAR2(30) := 'OCLT_SENSITIVITY';
238   oclt_max_buffer          CONSTANT VARCHAR2(30) := 'OCLT_MAX_BUFFER';
239 
240   -- TREE: Setting Names
241   tree_impurity_metric     CONSTANT VARCHAR2(30) := 'TREE_IMPURITY_METRIC';
242   tree_term_max_depth      CONSTANT VARCHAR2(30) := 'TREE_TERM_MAX_DEPTH';
243   tree_term_minrec_split   CONSTANT VARCHAR2(30) := 'TREE_TERM_MINREC_SPLIT';
244   tree_term_minpct_split   CONSTANT VARCHAR2(30) := 'TREE_TERM_MINPCT_SPLIT';
245   tree_term_minrec_node    CONSTANT VARCHAR2(30) := 'TREE_TERM_MINREC_NODE';
246   tree_term_minpct_node    CONSTANT VARCHAR2(30) := 'TREE_TERM_MINPCT_NODE';
247 
248   -- TREE: Setting Values for tree_impurity_metric
249   tree_impurity_gini       CONSTANT VARCHAR2(30) := 'TREE_IMPURITY_GINI';
250   tree_impurity_entropy    CONSTANT VARCHAR2(30) := 'TREE_IMPURITY_ENTROPY';
251 
252   -- GLM: Setting Names
253   glms_ridge_regression    CONSTANT VARCHAR2(30) := 'GLMS_RIDGE_REGRESSION';
254   glms_diagnostics_table_name    CONSTANT VARCHAR2(30) :=
255     'GLMS_DIAGNOSTICS_TABLE_NAME';
256   glms_reference_class_name      CONSTANT VARCHAR2(30) :=
257     'GLMS_REFERENCE_CLASS_NAME';
258   glms_ridge_value     CONSTANT VARCHAR2(30) := 'GLMS_RIDGE_VALUE';
259   glms_conf_level     CONSTANT VARCHAR2(30) := 'GLMS_CONF_LEVEL';
260   glms_vif_for_ridge     CONSTANT VARCHAR2(30) := 'GLMS_VIF_FOR_RIDGE';
261 
262   -- GLM: Setting Values for glms_ridge_regression
263   glms_ridge_reg_enable    CONSTANT VARCHAR2(30) := 'GLMS_RIDGE_REG_ENABLE';
264   glms_ridge_reg_disable   CONSTANT VARCHAR2(30) := 'GLMS_RIDGE_REG_DISABLE';
265 
266   -- GLM: Setting Values for glms_vif_for_ridge
267   glms_vif_ridge_enable    CONSTANT VARCHAR2(30) := 'GLMS_VIF_RIDGE_ENABLE';
268   glms_vif_ridge_disable   CONSTANT VARCHAR2(30) := 'GLMS_VIF_RIDGE_DISABLE';
269 
270   -- GLM: Setting Values for glms_ftr_selection
271   glms_ftr_selection         CONSTANT VARCHAR2(30) := 'GLMS_FTR_SELECTION';
272   glms_ftr_selection_enable  CONSTANT VARCHAR2(30) :=
273     'GLMS_FTR_SELECTION_ENABLE';
274   glms_ftr_selection_disable  CONSTANT VARCHAR2(30) :=
275     'GLMS_FTR_SELECTION_DISABLE';
276 
277   -- GLM: Setting Values for glms_ftr_sel_crit
278   glms_ftr_sel_crit CONSTANT VARCHAR2(30) := 'GLMS_FTR_SEL_CRIT';
279   glms_ftr_sel_aic  CONSTANT VARCHAR2(30) := 'GLMS_FTR_SEL_AIC';
280   glms_ftr_sel_sbic  CONSTANT VARCHAR2(30) := 'GLMS_FTR_SEL_SBIC';
281   glms_ftr_sel_ric  CONSTANT VARCHAR2(30) := 'GLMS_FTR_SEL_RIC';
282   glms_ftr_sel_alpha_inv  CONSTANT VARCHAR2(30) := 'GLMS_FTR_SEL_ALPHA_INV';
283 
284   -- GLM: Setting Values for glms_feature_generation
285   glms_ftr_generation CONSTANT VARCHAR2(30) := 'GLMS_FTR_GENERATION';
286   glms_ftr_generation_enable  CONSTANT VARCHAR2(30) :=
287     'GLMS_FTR_GENERATION_ENABLE';
288   glms_ftr_generation_disable  CONSTANT VARCHAR2(30) :=
289     'GLMS_FTR_GENERATION_DISABLE';
290 
291   -- GLM: Setting Values for glms_feature_gen
292   glms_ftr_gen_method CONSTANT VARCHAR2(30) := 'GLMS_FTR_GEN_METHOD';
293   glms_ftr_gen_quadratic CONSTANT VARCHAR2(30) := 'GLMS_FTR_GEN_QUADRATIC';
294   glms_ftr_gen_cubic CONSTANT VARCHAR2(30) := 'GLMS_FTR_GEN_CUBIC';
295 
296   -- GLM: feature selection categorical value handling
297   glms_select_block CONSTANT VARCHAR2(30) := 'GLMS_SELECT_BLOCK';
298   glms_select_block_disable CONSTANT VARCHAR2(30) := 'GLMS_SELECT_BLOCK_DISABLE';
299   glms_select_block_enable CONSTANT VARCHAR2(30) := 'GLMS_SELECT_BLOCK_ENABLE';
300 
301   -- GLM: feature selection - max features selected
302   glms_max_features CONSTANT VARCHAR2(30) := 'GLMS_MAX_FEATURES';
303 
304   -- GLM: feature identification - whether row sampling is used in the
305   --      selection of feature
306   glms_ftr_identification CONSTANT VARCHAR2(30) := 'GLMS_FTR_IDENTIFICATION';
307   glms_ftr_ident_quick  CONSTANT VARCHAR2(30) :=
308     'GLMS_FTR_IDENT_QUICK';
309   glms_ftr_ident_complete  CONSTANT VARCHAR2(30) :=
310     'GLMS_FTR_IDENT_COMPLETE';
311 
312   -- GLM: model pruning - whether the final model features will be
313   --      pruned using t-statistics
314   glms_prune_model CONSTANT VARCHAR2(30) := 'GLMS_PRUNE_MODEL';
315   glms_prune_model_enable CONSTANT VARCHAR2(30) := 'GLMS_PRUNE_MODEL_ENABLE';
316   glms_prune_model_disable CONSTANT VARCHAR2(30) := 'GLMS_PRUNE_MODEL_DISABLE';
317 
318   -- GLM: feature acceptance - whether partitioning the data into feature
319   --      ordering and feature selection sets will be used (strict) or
320   --      not (relaxed
321   glms_ftr_acceptance CONSTANT VARCHAR2(30) := 'GLMS_FTR_ACCEPTANCE';
322   glms_ftr_acceptance_strict CONSTANT VARCHAR2(30) :=
323     'GLMS_FTR_ACCEPTANCE_STRICT';
324   glms_ftr_acceptance_relaxed CONSTANT VARCHAR2(30) :=
325     'GLMS_FTR_ACCEPTANCE_RELAXED';
326 
327 
328   -- SVD
329   -- max number of features allowed
330   svds_max_num_features    CONSTANT NUMBER       := 2500;
331 
332   svds_scoring_mode        CONSTANT VARCHAR2(30) := 'SVDS_SCORING_MODE';
333   -- SVD: Setting values for svds_scoring_mode
334   svds_scoring_svd         CONSTANT VARCHAR2(30) := 'SVDS_SCORING_SVD';
338   -- SVD: Setting values for svds_u_matrix_output
335   svds_scoring_pca         CONSTANT VARCHAR2(30) := 'SVDS_SCORING_PCA';
336 
337   svds_u_matrix_output     CONSTANT VARCHAR2(30) := 'SVDS_U_MATRIX_OUTPUT';
339   svds_u_matrix_enable     CONSTANT VARCHAR2(30) := 'SVDS_U_MATRIX_ENABLE';
340   svds_u_matrix_disable    CONSTANT VARCHAR2(30) := 'SVDS_U_MATRIX_DISABLE';
341 
342   -- EM
343   -- number of components
344   emcs_num_components        CONSTANT VARCHAR2(30) := 'EMCS_NUM_COMPONENTS';
345 
346   -- high-level component clustering
347   emcs_cluster_components    CONSTANT VARCHAR2(30) :=
348                                                   'EMCS_CLUSTER_COMPONENTS';
349   -- values for emcs_cluster_components
350   emcs_cluster_comp_enable   CONSTANT VARCHAR2(30) :=
351                                                   'EMCS_CLUSTER_COMP_ENABLE';
352   emcs_cluster_comp_disable  CONSTANT VARCHAR2(30) :=
353                                                   'EMCS_CLUSTER_COMP_DISABLE';
354 
355   -- high-level cluster threshold
356   emcs_cluster_thresh        CONSTANT VARCHAR2(30) := 'EMCS_CLUSTER_THRESH';
357 
358   -- max number of 2D attributes
359   emcs_max_num_attr_2d       CONSTANT VARCHAR2(30) := 'EMCS_MAX_NUM_ATTR_2D';
360 
361   -- number of projections
362   emcs_num_projections       CONSTANT VARCHAR2(30) := 'EMCS_NUM_PROJECTIONS';
363 
364   -- number of quantile bins
365   emcs_num_quantile_bins     CONSTANT VARCHAR2(30) := 'EMCS_NUM_QUANTILE_BINS';
366 
367   -- number of topN bins
368   emcs_num_topn_bins         CONSTANT VARCHAR2(30) := 'EMCS_NUM_TOPN_BINS';
369 
370   -- number of equi-width bins
371   emcs_num_equiwidth_bins    CONSTANT VARCHAR2(30) :=
372                                                   'EMCS_NUM_EQUIWIDTH_BINS';
373 
374   -- minimum percentage attribute support
375   emcs_min_pct_attr_support  CONSTANT VARCHAR2(30) :=
376                                                   'EMCS_MIN_PCT_ATTR_SUPPORT';
377   -- full covariance (next release)
378 --  emcs_full_covariance       CONSTANT VARCHAR2(30) := 'EMCS_FULL_COVARIANCE';
379   -- values for emcs_full_covariance
380 --  emcs_full_cov_enable       CONSTANT VARCHAR2(30) := 'EMCS_FULL_COV_ENABLE';
381 --  emcs_full_cov_disable      CONSTANT VARCHAR2(30) := 'EMCS_FULL_COV_DISABLE';
382 
383   -- cluster statistics
384   emcs_cluster_statistics       CONSTANT VARCHAR2(30) := 'EMCS_CLUSTER_STATISTICS';
385   -- values for emcs_cluster_statistics
386   emcs_clus_stats_enable       CONSTANT VARCHAR2(30) := 'EMCS_CLUS_STATS_ENABLE';
387   emcs_clus_stats_disable      CONSTANT VARCHAR2(30) := 'EMCS_CLUS_STATS_DISABLE';
388 
389   -- distribution for modeling numerical attributes
390   emcs_num_distribution      CONSTANT VARCHAR2(30) := 'EMCS_NUM_DISTRIBUTION';
391   -- values for emcs_num_distribution
392   emcs_num_distr_bernoulli   CONSTANT VARCHAR2(30) :=
393                                                    'EMCS_NUM_DISTR_BERNOULLI';
394   emcs_num_distr_gaussian    CONSTANT VARCHAR2(30) :=
395                                                    'EMCS_NUM_DISTR_GAUSSIAN';
396   emcs_num_distr_system      CONSTANT VARCHAR2(30) := 'EMCS_NUM_DISTR_SYSTEM';
397 
398   -- number of iterations
399   emcs_num_iterations        CONSTANT VARCHAR2(30) := 'EMCS_NUM_ITERATIONS';
400 
401   -- required log likelihood improvement
402   emcs_loglike_improvement   CONSTANT VARCHAR2(30) :=
403                                                    'EMCS_LOGLIKE_IMPROVEMENT';
404 
405   -- linkage function
406   emcs_linkage_function       CONSTANT VARCHAR2(30) := 'EMCS_LINKAGE_FUNCTION';
407   -- values for linkage function
408   emcs_linkage_single        CONSTANT VARCHAR2(30) := 'EMCS_LINKAGE_SINGLE';
409   emcs_linkage_average       CONSTANT VARCHAR2(30) := 'EMCS_LINKAGE_AVERAGE';
410   emcs_linkage_complete      CONSTANT VARCHAR2(30) := 'EMCS_LINKAGE_COMPLETE';
411 
412   -- attribute filtering
413   emcs_attribute_filter      CONSTANT VARCHAR2(30) := 'EMCS_ATTRIBUTE_FILTER';
414   -- values for attribute filtering
415   emcs_attr_filter_enable    CONSTANT VARCHAR2(30) :=
416                                                   'EMCS_ATTR_FILTER_ENABLE';
417   emcs_attr_filter_disable   CONSTANT VARCHAR2(30) :=
418                                                   'EMCS_ATTR_FILTER_DISABLE';
419   -- convergence criterion
420   emcs_convergence_criterion CONSTANT VARCHAR2(30) :=
421                                                   'EMCS_CONVERGENCE_CRITERION';
422   -- values for convergence criterion
423   emcs_conv_crit_heldaside   CONSTANT VARCHAR2(30) :=
424                                                   'EMCS_CONV_CRIT_HELDASIDE';
425   emcs_conv_crit_bic         CONSTANT VARCHAR2(30) :=
426                                                   'EMCS_CONV_CRIT_BIC';
427 
428   -----------   Function and Algorithm Settings - End ------------------------
429 
430   --------------
431   -- LOCAL TYPES
432   --
433   SUBTYPE TRANSFORM_LIST IS dbms_data_mining_transform.TRANSFORM_LIST;
434 
435   -- Default values for model build settings
436   TYPE default_settings_type IS TABLE OF VARCHAR2(30) INDEX BY VARCHAR2(30);
437   external_default_settings default_settings_type;
438   internal_default_settings default_settings_type;
439 
440   ---------------------------
441   -- PROCEDURES AND FUNCTIONS
442   --
443   PROCEDURE apply(model_name          IN VARCHAR2,
444                   data_table_name     IN VARCHAR2,
445                   case_id_column_name IN VARCHAR2,
449   PROCEDURE compute_confusion_matrix(
446                   result_table_name   IN VARCHAR2,
447                   data_schema_name    IN VARCHAR2 DEFAULT NULL);
448 
450                   accuracy                    OUT NUMBER,
451                   apply_result_table_name     IN  VARCHAR2,
452                   target_table_name           IN  VARCHAR2,
453                   case_id_column_name         IN  VARCHAR2,
454                   target_column_name          IN  VARCHAR2,
455                   confusion_matrix_table_name IN  VARCHAR2,
456                   score_column_name           IN  VARCHAR2 DEFAULT
457                                                              'PREDICTION',
458                   score_criterion_column_name IN  VARCHAR2 DEFAULT
459                                                              'PROBABILITY',
460                   cost_matrix_table_name      IN  VARCHAR2 DEFAULT NULL,
461                   apply_result_schema_name    IN  VARCHAR2 DEFAULT NULL,
462                   target_schema_name          IN  VARCHAR2 DEFAULT NULL,
463                   cost_matrix_schema_name     IN  VARCHAR2 DEFAULT NULL,
464                   score_criterion_type        IN  VARCHAR2 DEFAULT NULL);
465 
466   PROCEDURE compute_lift(
467                   apply_result_table_name     IN VARCHAR2,
468                   target_table_name           IN VARCHAR2,
469                   case_id_column_name         IN VARCHAR2,
470                   target_column_name          IN VARCHAR2,
471                   lift_table_name             IN VARCHAR2,
472                   positive_target_value       IN VARCHAR2,
473                   score_column_name           IN VARCHAR2 DEFAULT
474                                                             'PREDICTION',
475                   score_criterion_column_name IN VARCHAR2 DEFAULT
476                                                             'PROBABILITY',
477                   num_quantiles               IN NUMBER   DEFAULT 10,
478                   cost_matrix_table_name      IN VARCHAR2 DEFAULT NULL,
479                   apply_result_schema_name    IN VARCHAR2 DEFAULT NULL,
483 
480                   target_schema_name          IN VARCHAR2 DEFAULT NULL,
481                   cost_matrix_schema_name     IN VARCHAR2 DEFAULT NULL,
482                   score_criterion_type        IN VARCHAR2 DEFAULT NULL);
484   PROCEDURE compute_roc(
485                   roc_area_under_curve        OUT NUMBER,
486                   apply_result_table_name     IN  VARCHAR2,
487                   target_table_name           IN  VARCHAR2,
488                   case_id_column_name         IN  VARCHAR2,
489                   target_column_name          IN  VARCHAR2,
490                   roc_table_name              IN  VARCHAR2,
491                   positive_target_value       IN  VARCHAR2,
492                   score_column_name           IN  VARCHAR2 DEFAULT
493                                                              'PREDICTION',
494                   score_criterion_column_name IN  VARCHAR2 DEFAULT
495                                                              'PROBABILITY',
496                   apply_result_schema_name    IN  VARCHAR2 DEFAULT NULL,
497                   target_schema_name          IN  VARCHAR2 DEFAULT NULL);
498 
499   PROCEDURE create_model(
500                   model_name            IN VARCHAR2,
501                   mining_function       IN VARCHAR2,
502                   data_table_name       IN VARCHAR2,
503                   case_id_column_name   IN VARCHAR2,
504                   target_column_name    IN VARCHAR2 DEFAULT NULL,
505                   settings_table_name   IN VARCHAR2 DEFAULT NULL,
506                   data_schema_name      IN VARCHAR2 DEFAULT NULL,
507                   settings_schema_name  IN VARCHAR2 DEFAULT NULL,
508                   xform_list            IN TRANSFORM_LIST DEFAULT NULL);
509 
510   PROCEDURE drop_model(model_name IN VARCHAR2,
511                        force      IN BOOLEAN DEFAULT FALSE);
512 
513   PROCEDURE export_model (filename      IN VARCHAR2,
514                           directory     IN VARCHAR2,
515                           model_filter  IN VARCHAR2 DEFAULT NULL,
516                           filesize      IN VARCHAR2 DEFAULT NULL,
517                           operation     IN VARCHAR2 DEFAULT NULL,
518                           remote_link   IN VARCHAR2 DEFAULT NULL,
519                           jobname       IN VARCHAR2 DEFAULT NULL);
520 
521   -- XML (PMML) versions of get model details
522   FUNCTION get_model_details_xml(model_name IN VARCHAR2)
523   RETURN XMLType;
524 
525   -- Specifying topn orders by confidence DESC, support DESC
526   --   otherwise by rule_id
527   FUNCTION get_association_rules(model_name       IN VARCHAR2,
528                                  topn             IN NUMBER DEFAULT NULL,
529                                  rule_id          IN INTEGER DEFAULT NULL,
530                                  min_confidence   IN NUMBER DEFAULT NULL,
531                                  min_support      IN NUMBER DEFAULT NULL,
532                                  max_rule_length  IN INTEGER DEFAULT NULL,
533                                  min_rule_length  IN INTEGER DEFAULT NULL,
534                                  sort_order       IN ORA_MINING_VARCHAR2_NT DEFAULT NULL,
535                                  antecedent_items IN DM_ITEMS DEFAULT NULL,
536                                  consequent_items IN DM_ITEMS DEFAULT NULL,
537                                  min_lift         IN NUMBER DEFAULT NULL)
538   RETURN DM_Rules PIPELINED;
539 
540   -- Specifying topn orders by support DESC otherwise there
541   --   is no ordering
542   FUNCTION get_frequent_itemsets(model_name IN VARCHAR2,
543                                  topn IN NUMBER DEFAULT NULL,
544                                  max_itemset_length IN NUMBER DEFAULT NULL)
545   RETURN DM_ItemSets PIPELINED;
546 
547   FUNCTION get_model_details_ai(model_name IN VARCHAR2)
548   RETURN dm_ranked_attributes pipelined;
549 
550   FUNCTION get_model_details_glm(model_name IN VARCHAR2)
551   RETURN DM_GLM_Coeff_Set PIPELINED;
552 
553   FUNCTION get_model_details_svd(model_name IN VARCHAR2,
554                                  matrix_type IN VARCHAR2 DEFAULT NULL)
555   RETURN DM_SVD_MATRIX_Set PIPELINED;
556 
557   FUNCTION get_model_details_km(model_name VARCHAR2,
558                                 cluster_id NUMBER   DEFAULT NULL,
559                                 attribute  VARCHAR2 DEFAULT NULL,
560                                 centroid   NUMBER   DEFAULT 1,
561                                 histogram  NUMBER   DEFAULT 1,
562                                 rules      NUMBER   DEFAULT 2,
563                                 attribute_subname  VARCHAR2 DEFAULT NULL,
564                                 topn_attributes NUMBER DEFAULT NULL)
565 
566   RETURN dm_clusters PIPELINED;
567 
568   FUNCTION get_model_details_nb(model_name IN VARCHAR2)
569   RETURN DM_NB_Details PIPELINED;
570 
571   FUNCTION get_model_details_nmf(model_name IN VARCHAR2)
572   RETURN DM_NMF_Feature_Set PIPELINED;
573 
574   FUNCTION get_model_details_oc(model_name VARCHAR2,
575                                 cluster_id NUMBER   DEFAULT NULL,
576                                 attribute  VARCHAR2 DEFAULT NULL,
577                                 centroid   NUMBER   DEFAULT 1,
578                                 histogram  NUMBER   DEFAULT 1,
579                                 rules      NUMBER   DEFAULT 2,
580                                 topn_attributes NUMBER DEFAULT NULL)
581   RETURN dm_clusters PIPELINED;
582 
583   FUNCTION get_model_details_svm(model_name   VARCHAR2,
584                                  reverse_coef NUMBER DEFAULT 0)
585   RETURN DM_SVM_Linear_Coeff_Set PIPELINED;
586 
587   FUNCTION get_model_details_em(model_name VARCHAR2,
588                                 cluster_id NUMBER   DEFAULT NULL,
589                                 attribute  VARCHAR2 DEFAULT NULL,
590                                 centroid   NUMBER   DEFAULT 1,
594                                 topn_attributes NUMBER DEFAULT NULL)
591                                 histogram  NUMBER   DEFAULT 1,
592                                 rules      NUMBER   DEFAULT 2,
593                                 attribute_subname  VARCHAR2 DEFAULT NULL,
595   RETURN dm_clusters PIPELINED;
596 
597   FUNCTION get_model_details_em_comp(model_name IN VARCHAR2)
598   RETURN DM_EM_COMPONENT_SET PIPELINED;
599 
600   FUNCTION get_model_details_em_proj(model_name IN VARCHAR2)
601   RETURN DM_EM_PROJECTION_SET PIPELINED;
602 
603   FUNCTION get_model_details_global(model_name IN VARCHAR2)
604   RETURN DM_model_global_details PIPELINED;
605 
606   FUNCTION get_model_settings(model_name IN VARCHAR2)
607   RETURN DM_Model_Settings PIPELINED;
608 
609   FUNCTION get_default_settings
610   RETURN DM_Model_Settings PIPELINED;
611 
612   FUNCTION get_model_signature(model_name IN VARCHAR2)
613   RETURN DM_Model_Signature PIPELINED;
614 
615   FUNCTION get_model_transformations(model_name IN VARCHAR2)
616   RETURN DM_Transforms PIPELINED;
617 
618   PROCEDURE get_transform_list(xform_list   OUT NOCOPY TRANSFORM_LIST,
619                                model_xforms IN DM_TRANSFORMS);
620 
621   PROCEDURE import_model (filename         IN VARCHAR2,
622                           directory        IN VARCHAR2,
623                           model_filter     IN VARCHAR2 DEFAULT NULL,
624                           operation        IN VARCHAR2 DEFAULT NULL,
625                           remote_link      IN VARCHAR2 DEFAULT NULL,
626                           jobname          IN VARCHAR2 DEFAULT NULL,
627                           schema_remap     IN VARCHAR2 DEFAULT NULL,
628                           tablespace_remap IN VARCHAR2 DEFAULT NULL);
629 
630   PROCEDURE import_model (model_name      IN VARCHAR2,
631                           pmmldoc         IN XMLTYPE,
632                           strict_check    IN BOOLEAN DEFAULT FALSE);
633 
634   PROCEDURE rank_apply(apply_result_table_name     IN VARCHAR2,
635                        case_id_column_name         IN VARCHAR2,
636                        score_column_name           IN VARCHAR2,
637                        score_criterion_column_name IN VARCHAR2,
638                        ranked_apply_table_name     IN VARCHAR2,
639                        top_n                       IN INTEGER  DEFAULT 1,
640                        cost_matrix_table_name      IN VARCHAR2 DEFAULT NULL,
641                        apply_result_schema_name    IN VARCHAR2 DEFAULT NULL,
642                        cost_matrix_schema_name     IN VARCHAR2 DEFAULT NULL);
643 
644   PROCEDURE rename_model(model_name     IN VARCHAR2,
645                          new_model_name IN VARCHAR2,
646                          versioned_model_name IN VARCHAR2 DEFAULT NULL);
647 
648   PROCEDURE add_cost_matrix(model_name              IN VARCHAR2,
649                             cost_matrix_table_name  IN VARCHAR2,
650                             cost_matrix_schema_name IN VARCHAR2 DEFAULT NULL);
651 
652   PROCEDURE remove_cost_matrix(model_name IN VARCHAR2);
653 
654   FUNCTION get_model_cost_matrix(model_name  IN VARCHAR2,
655                                  matrix_type IN VARCHAR2
656                                                 DEFAULT cost_matrix_type_score)
657   RETURN DM_COST_MATRIX PIPELINED;
658 
659   PROCEDURE alter_reverse_expression(
660     model_name                     VARCHAR2,
661     expression                     CLOB,
662     attribute_name                 VARCHAR2 DEFAULT NULL,
663     attribute_subname              VARCHAR2 DEFAULT NULL);
664 
665 END dbms_data_mining;