1 PACKAGE dbms_predictive_analytics AUTHID CURRENT_USER AS
2 --
3 -- PUBLIC PROCEDURES AND FUNCTIONS
4 --
5
6 -- Procedure: PREDICT
7 -- The purpose of this procedure is to produce predictions for unknown
8 -- targets. The input data table should contain records where the target
9 -- value is known (not null). The known cases will be used to train and test
10 -- a model. Any cases where the target is unknown, i.e. where the target
11 -- value is null, will not be considered during model training. Once a
12 -- mining model is built internally, it will be used to score all the
13 -- records from the input data (both known and unknown), and a table will be
14 -- persisted containing the results. In the case of binary classification,
15 -- an ROC analysis of the results will be performed, and the predictions
16 -- will be adjusted to support the optimal probability threshold resulting
17 -- in the highest True Positive Rate (TPR) versus False Positive Rate (FPR).
18 PROCEDURE predict(
19 accuracy OUT NUMBER,
20 data_table_name IN VARCHAR2,
21 case_id_column_name IN VARCHAR2,
22 target_column_name IN VARCHAR2,
23 result_table_name IN VARCHAR2,
24 data_schema_name IN VARCHAR2 DEFAULT NULL);
25
26 -- Procedure: EXPLAIN
27 -- This procedure is used for identifying attributes that are important/
28 -- useful for explaining the variation on an attribute of interest (e.g. a
29 -- measure of an OLAP fact table). Only known cases (i.e. cases where the
30 -- value of the explain column is not null) will be taken into consideration
31 -- when assessing the importance of the input attributes upon the dependent
32 -- attribute. The resulting table will contain one row for each of the input
33 -- attributes.
34 PROCEDURE explain(
35 data_table_name IN VARCHAR2,
36 explain_column_name IN VARCHAR2,
37 result_table_name IN VARCHAR2,
38 data_schema_name IN VARCHAR2 DEFAULT NULL);
39
40 -- Procedure: SEGMENT
41 -- This procedure is used to segment similar records together. It uses
42 -- segmentation analysis to identify groups embedded in the data, where a
43 -- group is a collection of data objects that are similar to one another. The
44 -- SEGMENT task can be applied to a wide range of business problems such as:
45 -- customer segmentation, gene and protein analysis, product grouping,
46 -- finding numerical taxonomies, and text mining.
47 -- PROCEDURE segment(
48 -- data_table_name IN VARCHAR2,
49 -- case_id_column_name IN VARCHAR2,
50 -- segment_result_table_name IN VARCHAR2,
51 -- details_result_table_name IN VARCHAR2,
52 -- number_of_segments IN NUMBER DEFAULT 10,
53 -- max_descriptive_attributes IN NUMBER DEFAULT 5,
54 -- data_schema_name IN VARCHAR2 DEFAULT NULL);
55
56 -- Procedure: DETECT
57 -- This procedure is used to find anomalies or atypical records within sets of
58 -- data. It can be described as an indicator of strange behavior Identifying
59 -- such anomalies or outliers can be useful in problems such as fraud
60 -- detection (insurance, tax, credit card, etc.) and computer network
61 -- intrusion detection. Anomaly detection estimates whether a data point is
62 -- typical for a given distribution or not. An atypical data point can be
63 -- either an outlier or an instance of a previously unseen class.
64 -- PROCEDURE detect(
65 -- data_table_name IN VARCHAR2,
66 -- case_id_column_name IN VARCHAR2,
67 -- result_table_name IN VARCHAR2,
68 -- detect_column_name IN VARCHAR2 DEFAULT NULL,
69 -- detection_rate IN NUMBER DEFAULT 0.01,
70 -- max_descriptive_attributes IN NUMBER DEFAULT 5,
71 -- data_schema_name IN VARCHAR2 DEFAULT NULL);
72
73 -- Procedure: PROFILE
74 -- This procedure is used to segment data based on some target attribute and
75 -- value. It will create profiles or rules for records where the specific
76 -- attribute and value exist, in some sense it can be seen directed or
77 -- supervised segmentation.
78 PROCEDURE profile(
79 data_table_name IN VARCHAR2,
80 target_column_name IN VARCHAR2,
81 result_table_name IN VARCHAR2,
82 data_schema_name IN VARCHAR2 DEFAULT NULL);
83
84 END dbms_predictive_analytics;