eTRM uses javascript that opens and focuses a separate window to display reports. Your browser does not appear to support javascript. This should not prevent you from using eTRM but it does mean the window focus feature will not work. Please check the contents of other windows on your desktop if you click on a link that does not appear to do anything.
PACKAGE: SYS.DBMS_DATA_MINING_TRANSFORM

Source


1 PACKAGE dbms_data_mining_transform AUTHID CURRENT_USER AS
2   -----------------------------------------------------------------------------
3   -- COLUMN LIST collection type
4   -----------------------------------------------------------------------------
5   -- This type is used to store both quoted and non-quoted identifiers for
6   -- column names of a table.
7   TYPE
8     COLUMN_LIST                    IS VARRAY(1000) OF VARCHAR2(32);
9 
10   -----------------------------------------------------------------------------
11   -- DESCribe LIST collection TYPE
12   -----------------------------------------------------------------------------
13   -- This type holds the results of describe operations. Note that col_name
14   -- field of DESC_TAB2 is VARCHAR2(32767). This is applicable to cases when a
15   -- SELECT expression is not given an alias and thus the expression itself is
16   -- used as name (with whitespaces removed) and thus can be larger than 30
17   -- bytes. However when those names get larger than 30 bytes the corresponding
18   -- SELECT cannot be used as an inline view.
19   SUBTYPE
20     DESCRIBE_LIST                  IS dbms_sql.DESC_TAB2;
21 
22   SUBTYPE
23     DESCRIBE_LIST3                 IS dbms_sql.DESC_TAB3;
24 
25   -----------------------------------------------------------------------------
26   -- EXPRESSION RECord type
27   -----------------------------------------------------------------------------
28   -- This type is used for storing transformation expressions. Unlike VARCHAR2
29   -- it can be used for expression that can grow larger than 32K. Field lb is
30   -- the smallest index and ub is the largest index of the elements of the
31   -- VARCHAR2A array.
32   TYPE
33     EXPRESSION_REC                 IS RECORD (
34       lstmt                          dbms_sql.VARCHAR2A,
35       lb                             BINARY_INTEGER DEFAULT 1,
36       ub                             BINARY_INTEGER DEFAULT 0);
37 
38   -----------------------------------------------------------------------------
39   -- TRAMSFORMation RECord type
40   -----------------------------------------------------------------------------
41   -- This type is used for storing in-memory transformation stack definition
42   -- for a single attribute.
43   -- Attribute specification field is used to specify additional information
44   -- and actions for an attribute:
45   --   NOPREP - disables data prep for an attribute.
46   --            This is applicable to both stack_* interfaces in this package
47   --            as well as dbms_data_mining.create_model. Stack_* methods do
48   --            not do any stacking for attributes with NOPREP specified, while
49   --            create_model does not do any auto data prep even when the
50   --            global PREP_AUTO setting is set to ON.
51   TYPE
52     TRANSFORM_REC                  IS RECORD (
53       attribute_name                 VARCHAR2(30),
54       attribute_subname              VARCHAR2(4000),
55       expression                     EXPRESSION_REC,
56       reverse_expression             EXPRESSION_REC,
57       attribute_spec                 VARCHAR2(4000));
58 
59   -----------------------------------------------------------------------------
60   -- TRAMSFORMation LIST collection type
61   -----------------------------------------------------------------------------
62   -- This type is used for storing in-memory transformation stack definition.
63   TYPE
64     TRANSFORM_LIST                 IS TABLE OF TRANSFORM_REC;
65 
66   -----------------------------------------------------------------------------
67   -- NESTed COLumn TYPEs
68   -----------------------------------------------------------------------------
69   nest_num_col_type              CONSTANT NUMBER := 100001;
70   nest_cat_col_type              CONSTANT NUMBER := 100002;
71   nest_bd_col_type               CONSTANT NUMBER := 100003;
72   nest_bf_col_type               CONSTANT NUMBER := 100004;
73 
74   nest_nums_col_name       CONSTANT VARCHAR2(30) := 'DM_NESTED_NUMERICALS';
75   nest_cats_col_name       CONSTANT VARCHAR2(30) := 'DM_NESTED_CATEGORICALS';
76   nest_bds_col_name        CONSTANT VARCHAR2(30) := 'DM_NESTED_BINARY_DOUBLES';
77   nest_bfs_col_name        CONSTANT VARCHAR2(30) := 'DM_NESTED_BINARY_FLOATS';
78 
79   -----------------------------------------------------------------------------
80   --                            create_col_rem                               --
81   -----------------------------------------------------------------------------
82   -- NAME
83   --   create_col_rem - CREATE COLumn REMoval definition table
84   -- DESCRIPTION
85   --   Creates column removal definition table:
86   --       CREATE TABLE <col_rem>(
87   --         col   VARCHAR2(30),
88   --         att   VARCHAR2(4000))
89   --   This table is used to guide query generation process that removes
90   --   specified columns.
91   -- PARAMETERS
92   --   rem_table_name                 - column removal definition table
93   --   rem_schema_name                - definition table schema name
94   -- RETURNS
95   --   None
96   -- EXCEPTIONS
97   --   None
98   -- NOTES
99   --   Note, that {col} is case sensitive since it generates quoted
100   --   identifiers. It is allowed to have multiple entries in the xform
101   --   defintion table for the same {col}.
102   -----------------------------------------------------------------------------
103   PROCEDURE create_col_rem(
104     rem_table_name                 VARCHAR2,
105     rem_schema_name                VARCHAR2 DEFAULT NULL);
106 
107   -----------------------------------------------------------------------------
108   --                              xform_col_rem                              --
109   -----------------------------------------------------------------------------
110   -- NAME
111   --   xform_col_rem - XFORMation COLumn REMoval
112   -- DESCRIPTION
113   --   Creates a view that perfoms column removal from the data table. Only the
114   --   columns that are specified in the xform definition are removed, the
115   --   remaining columns do not change.
116   -- PARAMETERS
117   --   rem_table_name                 - xform definition table
118   --   data_table_name                - data table
119   --   xform_view_name                - xform view
120   --   rem_schema_name                - xform definition table schema name
121   --   data_schema_name               - data table schema name
122   --   xform_schema_name              - xform view schema name
123   -- RETURNS
124   --   None
125   -- EXCEPTIONS
126   --   None
127   -- NOTES
128   --   None
129   -----------------------------------------------------------------------------
130   PROCEDURE xform_col_rem(
131     rem_table_name                 VARCHAR2,
132     data_table_name                VARCHAR2,
133     xform_view_name                VARCHAR2,
134     rem_schema_name                VARCHAR2 DEFAULT NULL,
135     data_schema_name               VARCHAR2 DEFAULT NULL,
136     xform_schema_name              VARCHAR2 DEFAULT NULL);
137 
138   -----------------------------------------------------------------------------
139   --                              stack_col_rem                              --
140   -----------------------------------------------------------------------------
141   -- NAME
142   --   stack_col_rem - STACK COLumn REMoval
143   -- DESCRIPTION
144   --   For every column in the stack definition that has a matching entry in
145   --   the transformation definition, stacks column removal expression (NULL
146   --   expression) instead of the existing expression and updates the stack
147   --   definition. Columns in the stack definition that have no matching
148   --   entries in the transformation definition or have NULL expression are not
149   --   changed. Columns in the transformation definition that have no matching
150   --   entries in the stack definition are added to the stack definition (with
151   --   NULL expression). Reverse expressions in the stack definition are
152   --   updated accordingly, that is if an expression is updated, added or
153   --   remains unchanged then a corresponding reverse expression is also
154   --   updated, added or remains unchanged. When reverse expression is NULL it
155   --   is treated as "{attr}" (identity function) or "VALUE" for nested
156   --   columns. Given an entry ({attr}, {expr}, {rexp}) in the stack definition
157   --   and a matching entry in the transformation definition, after stacking
158   --   {expr} and {rexp} are both changed to NULL.
159   --   Example 1. Given transformation definition:
160   --       {col = COL1, att = NULL}
161   --       {col = COL2, att = NULL}
162   --   and stack definition:
163   --       {attr = COL1, expr = log(10, COL1), rexp = power(10, COL1)}
164   --       {attr = COL3, expr = ln(COL3),      rexp = exp(COL3)}
165   --   the following updated stack definition is generated:
166   --       {attr = COL1, expr = NULL,     rexp = NULL}
167   --       {attr = COL3, expr = ln(COL3), rexp = exp(COL3)}
168   --       {attr = COL2, expr = NULL,     rexp = NULL}
169   -- PARAMETERS
170   --   rem_table_name                 - xform definition table
171   --   xform_list                     - stack definition
172   --   rem_schema_name                - xform definition table schema name
173   -- RETURNS
174   --   xform_list                     - updated stack definition
175   -- EXCEPTIONS
176   --   None
177   -- NOTES
178   --   None
179   -----------------------------------------------------------------------------
180   PROCEDURE stack_col_rem(
181     rem_table_name                 VARCHAR2,
182     xform_list                     IN OUT NOCOPY TRANSFORM_LIST,
183     rem_schema_name                VARCHAR2 DEFAULT NULL);
184 
185   -----------------------------------------------------------------------------
186   --                            create_norm_lin                              --
187   -----------------------------------------------------------------------------
188   -- NAME
189   --   create_norm_lin - CREATE LINear NORMalization definition table
190   -- DESCRIPTION
191   --   Creates linear normalization definition table:
192   --       CREATE TABLE <norm_lin>(
193   --         col   VARCHAR2(30),
194   --         att   VARCHAR2(4000),
195   --         shift NUMBER,
196   --         scale NUMBER)
197   --   This table is used to guide query generation process to construct
198   --   linear normalization expressions of the following form:
199   --       ("{col}" - {shift})/{scale} "{col}"
200   --   For example when col = 'my_col', shift = -1.5 and scale = 20 the
201   --   following expression is generated:
202   --       ("my_col" - (-1.5))/20 "my_col"
203   -- PARAMETERS
204   --   norm_table_name                - linear normalization definition table
205   --   norm_schema_name               - definition table schema name
206   -- RETURNS
207   --   None
208   -- EXCEPTIONS
209   --   None
210   -- NOTES
211   --   Note, that {col} is case sensitive since it generates quoted
212   --   identifiers.  When there are multiple entries in the xform defintion
213   --   table for the same {col} the behavior is undefined.  Any one of the
214   --   definitions may be used in query generation. NULL values remain
215   --   unchanged.
216   -----------------------------------------------------------------------------
217   PROCEDURE create_norm_lin(
218     norm_table_name                VARCHAR2,
219     norm_schema_name               VARCHAR2 DEFAULT NULL);
220 
221   -----------------------------------------------------------------------------
222   --                          insert_norm_lin_minmax                         --
223   -----------------------------------------------------------------------------
224   -- NAME
225   --   insert_norm_lin_minmax - INSERT into LINear NORMalization MINMAX
226   -- DESCRIPTION
227   --   For every NUMBER column in the data table that is not in the exclusion
228   --   list finds normalization definition and inserts it into the definition
229   --   table. Definition for each relevant column is computed based on the min
230   --   and max values that are computed from the data table:
231   --       shift = min
232   --       scale = max - min
233   --   The values of shift and scale are rounded to round_num significant
234   --   digits prior to storing them in the definition table.
235   -- PARAMETERS
236   --   norm_table_name                - linear normalization definition table
237   --   data_table_name                - data table
238   --   exclude_list                   - column exclusion list
239   --   round_num                      - number of significant digits
240   --   norm_schema_name               - definition table schema name
241   --   data_schema_name               - data table schema name
242   -- RETURNS
243   --   None
244   -- EXCEPTIONS
245   --   None
246   -- NOTES
247   --   Columns with all NULLs or only one unique value are ignored.
248   -----------------------------------------------------------------------------
249   PROCEDURE insert_norm_lin_minmax(
250     norm_table_name                VARCHAR2,
251     data_table_name                VARCHAR2,
252     exclude_list                   COLUMN_LIST DEFAULT NULL,
253     round_num                      PLS_INTEGER DEFAULT 6,
254     norm_schema_name               VARCHAR2 DEFAULT NULL,
255     data_schema_name               VARCHAR2 DEFAULT NULL);
256 
257   -----------------------------------------------------------------------------
258   --                          insert_norm_lin_scale                          --
259   -----------------------------------------------------------------------------
260   -- NAME
261   --   insert_norm_lin_scale - INSERT into LINear NORMalization SCALE
262   -- DESCRIPTION
263   --   For every NUMBER column in the data table that is not in the exclusion
264   --   list finds normalization definition and inserts it into the definition
265   --   table. Definition for each relevant column is computed based on the min
266   --   and max values that are computed from the data table:
267   --       shift = 0
268   --       scale = greatest(abs(max), abs(min))
269   --   The value of scale is rounded to round_num significant digits prior to
270   --   storing it in the definition table.
271   -- PARAMETERS
272   --   norm_table_name                - linear normalization definition table
273   --   data_table_name                - data table
274   --   exclude_list                   - column exclusion list
275   --   round_num                      - number of significant digits
276   --   norm_schema_name               - definition table schema name
277   --   data_schema_name               - data table schema name
278   -- RETURNS
279   --   None
280   -- EXCEPTIONS
281   --   None
282   -- NOTES
283   --   Columns with all NULLs or all zeros are ignored.
284   -----------------------------------------------------------------------------
285   PROCEDURE insert_norm_lin_scale(
286     norm_table_name                VARCHAR2,
287     data_table_name                VARCHAR2,
288     exclude_list                   COLUMN_LIST DEFAULT NULL,
289     round_num                      PLS_INTEGER DEFAULT 6,
290     norm_schema_name               VARCHAR2 DEFAULT NULL,
291     data_schema_name               VARCHAR2 DEFAULT NULL);
292 
293   -----------------------------------------------------------------------------
294   --                          insert_norm_lin_zscore                         --
295   -----------------------------------------------------------------------------
296   -- NAME
297   --   insert_norm_lin_zscore - INSERT into LINear NORMalization Z-SCORE
298   -- DESCRIPTION
299   --   For every NUMBER column in the data table that is not in the exclusion
300   --   list finds normalization definition and inserts it into the definition
301   --   table. Definition for each relevant column is computed based on the
302   --   mean and standard deviation that are estimated from the data table:
303   --       shift = mean
304   --       scale = stddev
305   --   The values of shift and scale are rounded to round_num significant
306   --   digits prior to storing them in the definition table.
307   -- PARAMETERS
308   --   norm_table_name                - linear normalization definition table
309   --   data_table_name                - data table
310   --   exclude_list                   - column exclusion list
311   --   round_num                      - number of significant digits
312   --   norm_schema_name               - definition table schema name
313   --   data_schema_name               - data table schema name
314   -- RETURNS
315   --   None
316   -- EXCEPTIONS
317   --   None
318   -- NOTES
319   --   Columns with all NULLs or only one unique value are ignored.
320   -----------------------------------------------------------------------------
321   PROCEDURE insert_norm_lin_zscore(
322     norm_table_name                VARCHAR2,
323     data_table_name                VARCHAR2,
327     data_schema_name               VARCHAR2 DEFAULT NULL);
324     exclude_list                   COLUMN_LIST DEFAULT NULL,
325     round_num                      PLS_INTEGER DEFAULT 6,
326     norm_schema_name               VARCHAR2 DEFAULT NULL,
328 
329   -----------------------------------------------------------------------------
330   --                              xform_norm_lin                             --
331   -----------------------------------------------------------------------------
332   -- NAME
333   --   xform_norm_lin - XFORMation LINear NORMalization
334   -- DESCRIPTION
335   --   Creates a view that perfoms linear normalization of the data table
336   --   Only the columns that are specified in the xform definition are
337   --   normalized, the remaining columns do not change.
338   -- PARAMETERS
339   --   norm_table_name                - xform definition table
340   --   data_table_name                - data table
341   --   xform_view_name                - xform view
342   --   norm_schema_name               - xform definition table schema name
343   --   data_schema_name               - data table schema name
344   --   xform_schema_name              - xform view schema name
345   -- RETURNS
346   --   None
347   -- EXCEPTIONS
348   --   None
349   -- NOTES
350   --   None
351   -----------------------------------------------------------------------------
352   PROCEDURE xform_norm_lin(
353     norm_table_name                VARCHAR2,
354     data_table_name                VARCHAR2,
355     xform_view_name                VARCHAR2,
356     norm_schema_name               VARCHAR2 DEFAULT NULL,
357     data_schema_name               VARCHAR2 DEFAULT NULL,
358     xform_schema_name              VARCHAR2 DEFAULT NULL);
359 
360   -----------------------------------------------------------------------------
361   --                              stack_norm_lin                             --
362   -----------------------------------------------------------------------------
363   -- NAME
364   --   stack_norm_lin - STACK LINear NORMalization
365   -- DESCRIPTION
366   --   For every column in the stack definition that has a matching entry in
367   --   the transformation definition, stacks linear normalization expression on
368   --   top of the existing expression and updates the stack definition.
369   --   Columns in the stack definition that have no matching entries in the
370   --   transformation definition or have NULL expression are not changed.
371   --   Columns in the transformation definition that have no matching entries
372   --   in the stack definition are added to the stack definition (using "{col}"
373   --   in place of the original expression or "VALUE" for nested attributes).
374   --   Reverse expressions in the stack definition are updated accordingly,
375   --   that is if an expression is updated, added or remains unchanged then a
376   --   corresponding reverse expression is also updated, added or remains
377   --   unchanged. When reverse expression is NULL it is treated as identity
378   --   function ("{attr}" or "VALUE" for nested attributes).
379   --   Given an entry ({attr}, {expr}, {rexp}) in the stack definition and a
380   --   matching entry in the transformation definition, after stacking {expr}
381   --   has the following form:
382   --       ({expr} - {shift})/{scale}
383   --   and {rexp} maintains the following form with every occurance of {attr}
384   --   replaced with:
385   --       {attr}*{scale} + {shift}
386   --   Example 1. Given transformation definition:
387   --       {col = COL1, shift = -1.5, scale = 20}
388   --       {col = COL2, shift = 0,    scale = 10}
389   --   and stack definition:
390   --       {attr = COL1, expr = log(10, COL1), rexp = power(10, COL1)}
391   --       {attr = COL3, expr = ln(COL3),      rexp = exp(COL3)}
392   --   the following updated stack definition is generated:
393   --       {attr = COL1,
394   --        expr = (log(10, COL1) - (-1.5)) / 20,
395   --        rexp = power(10, COL1*20 + (-1.5))}
396   --       {attr = COL3,
397   --        expr = ln(COL3),
398   --        rexp = exp(COL3)}
399   --       {attr = COL2,
400   --        expr = (COL2 - 0) / 10,
401   --        rexp = COL2*10 + 0}
402   -- PARAMETERS
403   --   norm_table_name                - xform definition table
404   --   xform_list                     - stack definition
405   --   norm_schema_name               - xform definition table schema name
406   -- RETURNS
407   --   xform_list                     - updated stack definition
408   -- EXCEPTIONS
409   --   None
410   -- NOTES
411   --   None
412   -----------------------------------------------------------------------------
413   PROCEDURE stack_norm_lin(
414     norm_table_name                VARCHAR2,
415     xform_list                     IN OUT NOCOPY TRANSFORM_LIST,
416     norm_schema_name               VARCHAR2 DEFAULT NULL);
417 
418   -----------------------------------------------------------------------------
419   --                            create_bin_num                               --
420   -----------------------------------------------------------------------------
421   -- NAME
422   --   create_bin_num - CREATE NUMerical BINning definition table
423   -- DESCRIPTION
424   --   Creates numerical binning definition table:
425   --       CREATE TABLE <bin_num>(
426   --         col   VARCHAR2(30),
427   --         att   VARCHAR2(4000),
428   --         val   NUMBER,
429   --         bin   VARCHAR2(4000))
430   --   This table is used to guide query generation process to construct
431   --   numerical binning expressions of the following form:
432   --       CASE WHEN "{col}" <  {val0}   THEN '{bin0}'
436   --            WHEN "{col}" IS NOT NULL THEN '{bin(N+1)}'
433   --            WHEN "{col}" <= {val1}   THEN '{bin1}'
434   --            ...
435   --            WHEN "{col}" <= {valN}   THEN '{binN}'
437   --       END "{col}"
438   --   This expression maps values in the range [{val0};{valN}] into N bins
439   --   {bin1}, ..., {binN}, values outside of this range into {bin0} or
440   --   {bin(N+1)}, such that
441   --       (-inf; {val0})       -> {bin0}
442   --       [{val0}; {val1}]     -> {bin1}
443   --       ...
444   --       ({val(N-1)}; {valN}] -> {binN}
445   --       ({valN}; +inf)       -> {bin(N+1)}.
446   --   NULL values remain unchanged. {bin(N+1)} is optional. If it is not
447   --   specified the values ("{col}" > {valN}) are mapped to NULL. To specify
448   --   {bin(N+1)} provide a row with {val} = NULL. The order of the WHEN ..
449   --   THEN pairs is based on the ascending order of {val} for a given {col}.
450   --   Example 1. <bin_num> contains four rows with {col} = 'mycol':
451   --       {col = 'mycol', val = 15.5, bin = 'small'}
452   --       {col = 'mycol', val = 10,   bin = 'tiny'}
453   --       {col = 'mycol', val = 20,   bin = 'large'}
454   --       {col = 'mycol', val = NULL, bin = 'huge'}
455   --   the following expression is generated:
456   --       CASE WHEN "mycol" <  10       THEN 'tiny'
457   --            WHEN "mycol" <= 15.5     THEN 'small'
458   --            WHEN "mycol" <= 20       THEN 'large'
459   --            WHEN "mycol" IS NOT NULL THEN 'huge'
460   --       END "mycol"
461   --   Example 2. <bin_num> contains three rows with {col} = 'mycol':
462   --       {col = 'mycol', val = 15.5, bin = NULL}
463   --       {col = 'mycol', val = 10,   bin = 'tiny'}
464   --       {col = 'mycol', val = 20,   bin = 'large'}
465   --   the following expression is generated:
466   --       CASE WHEN "mycol" <  10   THEN NULL
467   --            WHEN "mycol" <= 15.5 THEN 'small'
468   --            WHEN "mycol" <= 20   THEN 'large'
469   --       END "mycol"
470   -- PARAMETERS
471   --   bin_table_name                 - numerical binning definition table
472   --   bin_schema_name                - definition table schema name
473   -- RETURNS
474   --   None
475   -- EXCEPTIONS
476   --   None
477   -- NOTES
478   --   Note, that {col} is case sensitive since it generates quoted
479   --   identifiers. In cases when there are multiple entries with the same
480   --   {col}, {val} combiniation with different {bin} the behavior is
481   --   undefined. Any one of the {bin} might be used. The maximum number of
482   --   arguments in a CASE expression is 255, and each WHEN ... THEN pair
483   --   counts as two arguments.
484   -----------------------------------------------------------------------------
485   PROCEDURE create_bin_num(
486     bin_table_name                 VARCHAR2,
487     bin_schema_name                VARCHAR2 DEFAULT NULL);
488 
489   -----------------------------------------------------------------------------
490   --                          insert_bin_num_eqwidth                         --
491   -----------------------------------------------------------------------------
492   -- NAME
493   --   insert_bin_num_eqwidth - INSERT into NUMerical BINning EQual-WIDTH
494   -- DESCRIPTION
495   --   For every NUMBER column in the data table that is not in the exclusion
496   --   list finds numerical binning definition and inserts it into the
497   --   definition table. Definition for each relevant column is computed based
498   --   on the min and max values that are computed from the data table. Each
499   --   of the N (p_bin_num) bins {bin1}, ..., {binN} span ranges of equal
500   --   width
501   --       inc = (max - min) / N
502   --   where {binI} = I when N > 0 or {binI} = N+1-I when N < 0, and
503   --   {bin0} = {bin(N+1)} = NULL. For example, when N=2, col='mycol', min=10,
504   --   and max = 21, the following three rows are inserted into the
505   --   definition table (inc = 5.5):
506   --       COL     VAL BIN
507   --       ----- ----- -----
508   --       mycol    10 NULL
509   --       mycol  15.5 1
510   --       mycol    21 2
511   --   The values of {val} are rounded to round_num significant digits prior
512   --   to storing them in the definition table.
513   -- PARAMETERS
514   --   bin_table_name                 - numerical binning definition table
515   --   data_table_name                - data table
516   --   bin_num                        - number of bins
517   --   exclude_list                   - column exclusion list
518   --   round_num                      - number of significant digits
519   --   bin_schema_name                - definition table schema name
520   --   data_schema_name               - data table schema name
521   -- RETURNS
522   --   None
523   -- EXCEPTIONS
524   --   None
525   -- NOTES
526   --   Columns with all NULLs or only one unique value are ignored. Nothing
527   --   is done when bin_num IS NULL or bin_num = 0.
528   -----------------------------------------------------------------------------
529   PROCEDURE insert_bin_num_eqwidth(
530     bin_table_name                 VARCHAR2,
531     data_table_name                VARCHAR2,
532     bin_num                        PLS_INTEGER DEFAULT 10,
533     exclude_list                   COLUMN_LIST DEFAULT NULL,
534     round_num                      PLS_INTEGER DEFAULT 6,
535     bin_schema_name                VARCHAR2 DEFAULT NULL,
536     data_schema_name               VARCHAR2 DEFAULT NULL);
537 
538   -----------------------------------------------------------------------------
542   --   insert_bin_num_qtile - INSERT into NUMerical BINning QuanTILE
539   --                          insert_bin_num_qtile                           --
540   -----------------------------------------------------------------------------
541   -- NAME
543   -- DESCRIPTION
544   --   For every NUMBER column in the data table that is not in the exclusion
545   --   list finds numerical binning definition and inserts it into the
546   --   definition table. Definition for each relevant column is computed based
547   --   on the min values per quantile, where quantiles are computed from the
548   --   data using NTILE function. Bin {bin1} spans range [min(1), min(2)],
549   --   bins {bin2}, ..., {bin(N-1)} span ranges (min(I), min(I+1)] and {binN}
550   --   range (min(N), max(N)] with {binI} = I when N > 0 or {binI} = N+1-I
551   --   when N < 0, and {bin0}={bin(N+1)} = NULL. Bins with equal left and
552   --   right boundaries are collapsed. For example, when N=4, col='mycol',
553   --   and data is {1,2,2,2,2,3,4}, the following three rows are inserted into
554   --   the definition table:
555   --       COL     VAL BIN
556   --       ----- ----- -----
557   --       mycol     1 NULL
558   --       mycol     2 1
559   --       mycol     4 2
560   --   Here quantiles are {1,2}, {2,2}, {2,3}, {4} and min(1) = 1, min(2) = 2,
561   --   min(3) = 2, min(4) = 4, max(4) = 4, and ranges are [1,2], (2,2], (2,4],
562   --   (4,4]. After collapsing [1,2], (2,4].
563   -- PARAMETERS
564   --   bin_table_name                 - numerical binning definition table
565   --   data_table_name                - data table
566   --   bin_num                        - number of bins
567   --   exclude_list                   - column exclusion list
568   --   bin_schema_name                - definition table schema name
569   --   data_schema_name               - data table schema name
570   -- RETURNS
571   --   None
572   -- EXCEPTIONS
573   --   None
574   -- NOTES
575   --   Columns with all NULLs or only one unique value are ignored. Nothing
576   --   is done when bin_num IS NULL or bin_num = 0.
577   -----------------------------------------------------------------------------
578   PROCEDURE insert_bin_num_qtile(
579     bin_table_name                 VARCHAR2,
580     data_table_name                VARCHAR2,
581     bin_num                        PLS_INTEGER DEFAULT 10,
582     exclude_list                   COLUMN_LIST DEFAULT NULL,
583     bin_schema_name                VARCHAR2 DEFAULT NULL,
584     data_schema_name               VARCHAR2 DEFAULT NULL);
585 
586   -----------------------------------------------------------------------------
587   --                      insert_autobin_num_eqwidth                         --
588   -----------------------------------------------------------------------------
589   -- NAME
590   --   insert_autobin_num_eqwidth - INSERT into NUMerical BINning AUTOmated
591   --                                EQual-WIDTH
592   -- DESCRIPTION
593   --   For every NUMBER column in the data table that is not in the exclusion
594   --   list finds numerical binning definition and inserts it into the
595   --   definition table. Definition for each relevant column is computed using
596   --   equal-width method (see description for insert_bin_nume_eqwidth). The
597   --   number of bins (N) is computed for each column separately and is based
598   --   on the number of non-NULL values (cnt), min and max values, and the
599   --   standard deviation (dev)
600   --       N = floor(power(cnt, 1/3)*(max - min)/(C*dev))
601   --   where C = 3.49/0.9. Parameter bin_num is used to adjust N to be at
602   --   least bin_num. No adjustment is done when bin_num is NULL or zero.
603   --   Parameter max_bin_num is used to adjust N to be at most max_bin_num.
604   --   No adjustment is done when max_bin_num is NULL or zero. For columns
605   --   with all integer values (discrete columns) N is adjusted to be at most
606   --   the maximum number of distinct values in the obseved range
607   --       max - min + 1
608   --   Parameter sample_size is used to adjust cnt to be at most sample_size.
609   --   No adjustment is done when sample_size is NULL or zero.
610   -- PARAMETERS
611   --   bin_table_name                 - numerical binning definition table
612   --   data_table_name                - data table
613   --   bin_num                        - minimum number of bins
614   --   max_bin_num                    - maximum number of bins
615   --   exclude_list                   - column exclusion list
616   --   round_num                      - number of significant digits
617   --   sample_size                    - maximum size of the sample
618   --   bin_schema_name                - definition table schema name
619   --   data_schema_name               - data table schema name
620   --   rem_table_name                 - column removal definition table
621   --   rem_schema_name                - removal definition table schema
622   -- RETURNS
623   --   None
624   -- EXCEPTIONS
625   --   None
626   -- NOTES
627   --   Columns with all NULLs or only one unique value are ignored. The sign
628   --   of bin_num, max_bin_num, sample_size has no effect on the result, only
629   --   the absolute values are being used. The value adjustment for N is done
630   --   in the following order: first, bin_num, then max_bin_num, and then
631   --   discrete column adjustment.
632   --   Column removal definition table is optional. If specified columns with
633   --   all NULLs or only one unique value will be inserted into the column
634   --   removal definition table.
635   -----------------------------------------------------------------------------
639     bin_num                        PLS_INTEGER DEFAULT 3,
636   PROCEDURE insert_autobin_num_eqwidth(
637     bin_table_name                 VARCHAR2,
638     data_table_name                VARCHAR2,
640     max_bin_num                    PLS_INTEGER DEFAULT 100,
641     exclude_list                   COLUMN_LIST DEFAULT NULL,
642     round_num                      PLS_INTEGER DEFAULT 6,
643     sample_size                    PLS_INTEGER DEFAULT 50000,
644     bin_schema_name                VARCHAR2 DEFAULT NULL,
645     data_schema_name               VARCHAR2 DEFAULT NULL,
646     rem_table_name                 VARCHAR2 DEFAULT NULL,
647     rem_schema_name                VARCHAR2 DEFAULT NULL);
648 
649   -----------------------------------------------------------------------------
650   --                              xform_bin_num                              --
651   -----------------------------------------------------------------------------
652   -- NAME
653   --   xform_bin_num - XFORMation NUMerical BINning
654   -- DESCRIPTION
655   --   Creates a view that perfoms numerical binning of the data table. Only
656   --   the columns that are specified in the xform definition are binned, the
657   --   remaining columns do not change.
658   -- PARAMETERS
659   --   bin_table_name                 - xform definition table
660   --   data_table_name                - data table
661   --   xform_view_name                - xform view
662   --   literal_flag                   - literal flag
663   --   bin_schema_name                - xform definition table schema name
664   --   data_schema_name               - data table schema name
665   --   xform_schema_name              - xform view schema name
666   -- RETURNS
667   --   None
668   -- EXCEPTIONS
669   --   None
670   -- NOTES
671   --   Literal flag indicates whether the values in {bin} are valid SQL
672   --   literals. When the the flag is set to TRUE the value of {bin} is used
673   --   as is in query generation, otherwise it is converted into a valid text
674   --   literal (surround by quotes and double the quotes inside). By default
675   --   the flag is set to FALSE. One example when it can be set to TRUE is in
676   --   cases when all {bin} are numbers. In that case the xformed column will
677   --   remain numeric as opposed to textual (default behavior). For example,
678   --   for the following xfrom definition:
679   --       COL     VAL BIN
680   --       ----- ----- -----
681   --       mycol    10 NULL
682   --       mycol  15.5 1
683   --       mycol    21 2
684   --   the following expression is generated when the flag is set to FALSE:
685   --       CASE WHEN "mycol" <  10   THEN NULL
686   --            WHEN "mycol" <= 15.5 THEN '1'
687   --            WHEN "mycol" <= 20   THEN '2'
688   --       END "mycol"
689   --   and when the flag is set to TRUE:
690   --       CASE WHEN "mycol" <  10   THEN NULL
691   --            WHEN "mycol" <= 15.5 THEN 1
692   --            WHEN "mycol" <= 20   THEN 2
693   --       END "mycol"
694   -----------------------------------------------------------------------------
695   PROCEDURE xform_bin_num(
696     bin_table_name                 VARCHAR2,
697     data_table_name                VARCHAR2,
698     xform_view_name                VARCHAR2,
699     literal_flag                   BOOLEAN DEFAULT FALSE,
700     bin_schema_name                VARCHAR2 DEFAULT NULL,
701     data_schema_name               VARCHAR2 DEFAULT NULL,
702     xform_schema_name              VARCHAR2 DEFAULT NULL);
703 
704   -----------------------------------------------------------------------------
705   --                              stack_bin_num                              --
706   -----------------------------------------------------------------------------
707   -- NAME
708   --   stack_bin_num - STACK NUMerical BINning
709   -- DESCRIPTION
710   --   For every column in the stack definition that has a matching entry in
711   --   the transformation definition, stacks numerical binning expression on
712   --   top of the existing expression and updates the stack definition.
713   --   Columns in the stack definition that have no matching entries in the
714   --   transformation definition or have NULL expression are not changed.
715   --   Columns in the transformation definition that have no matching entries
716   --   in the stack definition are added to the stack definition (using "{col}"
717   --   in place of the original expression or "VALUE" for nested attributes).
718   --   Reverse expressions in the stack definition are updated accordingly,
719   --   that is if an expression is updated, added or remains unchanged then a
720   --   corresponding reverse expression is also updated, added or remains
721   --   unchanged. When reverse expression is NULL it is treated as identity
722   --   function ("{attr}" or "VALUE" for nested attributes).
723   --   Given an entry ({attr}, {expr}, {rexp}) in the stack definition and a
724   --   matching entry in the transformation definition, after stacking {expr}
725   --   has the following form:
726   --       CASE WHEN ({expr}) <  {val0}   THEN '{bin0}'
727   --            WHEN ({expr}) <= {val1}   THEN '{bin1}'
728   --            ...
729   --            WHEN ({expr}) <= {valN}   THEN '{binN}'
730   --            WHEN ({expr}) IS NOT NULL THEN '{bin(N+1)}'
731   --       END
732   --   and {rexp} has the following form:
733   --       DECODE("{attr}", '{bin0}',     '( ; {rev0})',
734   --                        '{bin1}',     '[{rev0}; {rev1})',
735   --                        ...
736   --                        '{binN}',     '[{rev(N-1)}; {revN}]',
740   --   If {binI} and {binJ} are equal then the corresponding entries of the
737   --                        '{bin(N+1)}', '({revN}; )',
738   --                        NULL,         'NULL')
739   --   where {revI} is the result of applying reverese expression to {valI}.
741   --   DECODE function above are merged into:
742   --       '{binI}', '[{rev(I-1)}; {revI}), [{rev(J-1)}; {revJ})'
743   --   Note that reverse expressions implicitly maps invalid bins to NULL.
744   --   Example 1. Given transformation definition:
745   --       {col = COL1, val = 0,   bin = NULL}
746   --       {col = COL1, val = 1,   bin = A}
747   --       {col = COL1, val = 2,   bin = B}
748   --       {col = COL1, val = 3,   bin = A}
749   --       {col = COL2, val = 10,  bin = NULL}
750   --       {col = COL2, val = 15,  bin = 1}
751   --       {col = COL2, val = 20,  bin = 2}
752   --   and stack definition:
753   --       {attr = COL1, expr = log(10, COL1), rexp = power(10, COL1)}
754   --       {attr = COL3, expr = ln(COL3),      rexp = exp(COL3)}
755   --   the following updated stack definition is generated:
756   --       {attr = COL1,
757   --        expr = CASE WHEN (log(10, COL1)) <  0 THEN NULL
758   --                    WHEN (log(10, COL1)) <= 1 THEN 'A'
759   --                    WHEN (log(10, COL1)) <= 2 THEN 'B'
760   --                    WHEN (log(10, COL1)) <= 3 THEN 'A' END,
761   --        rexp = DECODE("COL1", 'A',   '[1; 10), [100; 1000]',
762   --                              'B',   '[10; 100)',
763   --                               NULL, '( ; 1), (1000; ), NULL')}
764   --       {attr = COL3,
765   --        expr = ln(COL3),
766   --        rexp = exp(COL3)}
767   --       {attr = COL2,
768   --        expr = CASE WHEN "COL2" <  10 THEN NULL
769   --                    WHEN "COL2" <= 15 THEN '1'
770   --                    WHEN "COL2" <= 20 THEN '2' END
771   --        rexp = DECODE("COL2", '1',  '[10; 15)',
772   --                              '2',  '[15; 20]',
773   --                              NULL, '( ; 10) OR (20; ) OR NULL')}
774   -- PARAMETERS
775   --   bin_table_name                 - xform definition table
776   --   xform_list                     - stack definition
777   --   literal_flag                   - literal flag
778   --   bin_schema_name                - xform definition table schema name
779   -- RETURNS
780   --   xform_list                     - updated stack definition
781   -- EXCEPTIONS
782   --   None
783   -- NOTES
784   --   Literal flag indicates whether the values in {bin} are valid SQL
785   --   literals. When the the flag is set to TRUE the value of {bin} is used
786   --   "as is" in the expression generation, otherwise it is converted into a
787   --   valid text literal (surround by quotes and double the quotes inside). By
788   --   default the flag is set to FALSE. One example when it can be set to TRUE
789   --   is in cases when all {bin} are numbers. In that case the xformed column
790   --   will remain numeric as opposed to textual (default behavior). For
791   --   example, for the following xfrom definition:
792   --       COL     VAL BIN
793   --       ----- ----- -----
794   --       mycol    10 NULL
795   --       mycol    15 1
796   --       mycol    20 2
797   --   the following {expr} and {rexp} are generated when the flag is FALSE:
798   --        expr = CASE WHEN "mycol" <  10 THEN NULL
799   --                    WHEN "mycol" <= 15 THEN '1'
800   --                    WHEN "mycol" <= 20 THEN '2' END
801   --        rexp = DECODE("mycol", '1',  '[10; 15)',
802   --                               '2',  '[15; 20]'
803   --                               NULL, '( ; 10) OR (20; ) OR NULL')
804   --   and when the flag is set to TRUE:
805   --        expr = CASE WHEN "mycol" <  10 THEN NULL
806   --                    WHEN "mycol" <= 15 THEN 1
807   --                    WHEN "mycol" <= 20 THEN 2 END
808   --        rexp = DECODE("mycol", 1,    '[10; 15)',
809   --                               2,    '[15; 20]',
810   --                               NULL, '( ; 10) OR (20; ) OR NULL')
811   -----------------------------------------------------------------------------
812   PROCEDURE stack_bin_num(
813     bin_table_name                 VARCHAR2,
814     xform_list                     IN OUT NOCOPY TRANSFORM_LIST,
815     literal_flag                   BOOLEAN DEFAULT FALSE,
816     bin_schema_name                VARCHAR2 DEFAULT NULL);
817 
818   -----------------------------------------------------------------------------
819   --                            create_bin_cat                               --
820   -----------------------------------------------------------------------------
821   -- NAME
822   --   create_bin_cat - CREATE CATegorical BINning definition table
823   -- DESCRIPTION
824   --   Creates categorical binning definition table:
825   --       CREATE TABLE <bin_cat>(
826   --         col   VARCHAR2(30),
827   --         att   VARCHAR2(4000),
828   --         val   VARCHAR2(4000),
829   --         bin   VARCHAR2(4000))
830   --   This table is used to guide query generation process to construct
831   --   categorical binning expressions of the following form:
832   --       DECODE("{col}", {val1}, {bin1},
833   --                       ...
834   --                       {valN}, {binN},
835   --                       NULL,   NULL,
836   --                               {bin(N+1)}) "{col}"
837   --   This expression maps values {val1}, ..., {valN} into N bins {bin1},...,
838   --   {binN}, and other values into {bin(N+1)}, while NULL values remain
842   --       {col = 'mycol', val = 'Waltham',        bin = 'MA'}
839   --   unchanged. {bin(N+1)} is optional. If it is not specified it defaults
840   --   to NULL. To specify {bin(N+1)} provide a row with {val} = NULL.
841   --   Example 1. <bin_cat> contains four rows with {col} = 'mycol':
843   --       {col = 'mycol', val = 'Burlington',     bin = 'MA'}
844   --       {col = 'mycol', val = 'Redwood Shores', bin = 'CA'}
845   --       {col = 'mycol', val = NULL,             bin = 'OTHER'}
846   --   the following expression is generated:
847   --       DECODE("mycol", 'Waltham',        'MA',
848   --                       'Burlington',     'MA',
849   --                       'Redwood Shores', 'CA',
850   --                       NULL,             NULL,
851   --                                         'OTHER') "mycol"
852   --   Example 2. <bin_cat> contains three rows with {col} = 'mycol':
853   --       {col = 'mycol', val = 'Waltham',        bin = 'MA'}
854   --       {col = 'mycol', val = 'Burlington',     bin = 'MA'}
855   --       {col = 'mycol', val = 'Redwood Shores', bin = 'CA'}
856   --   the following expression is generated:
857   --       DECODE("mycol", 'Waltham',        'MA',
858   --                       'Burlington',     'MA',
859   --                       'Redwood Shores', 'CA') "mycol"
860   -- PARAMETERS
861   --   bin_table_name                 - categorical binning definition table
862   --   bin_schema_name                - definition table schema name
863   -- RETURNS
864   --   None
865   -- EXCEPTIONS
866   --   None
867   -- NOTES
868   --   Note, that {col} is case sensitive since it generates quoted
869   --   identifiers. In cases when there are multiple entries with the same
870   --   {col}, {val} combiniation with different {bin} the behavior is
871   --   undefined. Any one of the {bin} might be used. The maximum number of
872   --   arguments of a DECODE function is 255.
873   -----------------------------------------------------------------------------
874   PROCEDURE create_bin_cat(
875     bin_table_name                 VARCHAR2,
876     bin_schema_name                VARCHAR2 DEFAULT NULL);
877 
878   -----------------------------------------------------------------------------
879   --                          insert_bin_cat_freq                            --
880   -----------------------------------------------------------------------------
881   -- NAME
882   --   insert_bin_cat_freq - INSERT into CATegorical BINning top-FREQuency
883   -- DESCRIPTION
884   --   For every VARCHAR2, CHAR column in the data table that is not in the
885   --   exclusion list finds categorical binning definition and inserts it into
886   --   the definition table. Definition for each relevant column is computed
887   --   based on occurence frequency of column values that are computed from
888   --   the data table reference. Each of the N (bin_num) bins {bin1}, ...,
889   --   {binN} correspond to the values with top frequencies when N > 0 or
890   --   bottom frequencies when N < 0, and {bin(N+1)} to all remaining
891   --   values, where {binI} = I. Ordering ties among identical frequencies are
892   --   broken by ordering on column values (ASC for N > 0 or DESC for N < 0).
893   --   When the the number of distinct values C < N only C+1 bins will be
894   --   created. Parameter default_num (D) is used for prunning based on the
895   --   number of values that fall in the default bin. When D > 0 only columns
896   --   that have at least D defaults are kept while others are ignored. When
897   --   D < 0 only columns that have at most D values are kept. No prunning is
898   --   done when D is NULL or when D = 0. Parameter bin_support (SUP) is used
899   --   for restricting bins to frequent (SUP > 0) values frq >= SUP*tot, or
900   --   infrequent (SUP < 0) ones frq <= (-SUP)*tot, where frq is a given value
901   --   count and tot is a sum of all counts as computed from the data. No
902   --   support filtering is done when SUP is NULL or when SUP = 0.
903   -- PARAMETERS
904   --   bin_table_name                 - categorical binning definition table
905   --   data_table_name                - data table
906   --   bin_num                        - number of bins
907   --   exclude_list                   - column exclusion list
908   --   default_num                    - number of default values
909   --   bin_support                    - bin support (fraction)
910   --   bin_schema_name                - definition table schema name
911   --   data_schema_name               - data table schema name
912   -- RETURNS
913   --   None
914   -- EXCEPTIONS
915   --   None
916   -- NOTES
917   --   Nothing is done when bin_num IS NULL or bin_num = 0. NULL values
918   --   are not counted. Columns with all NULLs are ignored.
919   -----------------------------------------------------------------------------
920   PROCEDURE insert_bin_cat_freq(
921     bin_table_name                 VARCHAR2,
922     data_table_name                VARCHAR2,
923     bin_num                        PLS_INTEGER DEFAULT 9,
924     exclude_list                   COLUMN_LIST DEFAULT NULL,
925     default_num                    PLS_INTEGER DEFAULT 2,
926     bin_support                    NUMBER DEFAULT NULL,
927     bin_schema_name                VARCHAR2 DEFAULT NULL,
928     data_schema_name               VARCHAR2 DEFAULT NULL);
929 
930   -----------------------------------------------------------------------------
931   --                              xform_bin_cat                              --
932   -----------------------------------------------------------------------------
933   -- NAME
934   --   xform_bin_cat - XFORMation CATegorical BINning
935   -- DESCRIPTION
939   -- PARAMETERS
936   --   Creates a view that perfoms categorical binning of the data table. Only
937   --   the columns that are specified in the xform definition are binned, the
938   --   remaining columns do not change.
940   --   bin_table_name                 - xform definition table
941   --   data_table_name                - data table
942   --   xform_view_name                - xform view
943   --   litiral_flag                   - literal flag
944   --   bin_schema_name                - xform definition table schema name
945   --   data_schema_name               - data table schema name
946   --   xform_schema_name              - xform view schema name
947   -- RETURNS
948   --   None
949   -- EXCEPTIONS
950   --   None
951   -- NOTES
952   --   Literal flag indicates whether the values in {bin} are valid SQL
953   --   literals. When the the flag is set to TRUE the value of {bin} is used
954   --   as is in query generation, otherwise it is converted into a valid text
955   --   literal (surround by quotes and double the quotes inside). By default
956   --   the flag is set to FALSE. One example when it can be set to TRUE is in
957   --   cases when all {bin} are numbers. In that case the xformed column will
958   --   be numeric as opposed to textual (default behavior). For example,
959   --   for the following xfrom definition:
960   --       COL   VAL            BIN
961   --       ----- -------------- ----
962   --       mycol Waltham        1
963   --       mycol Burlington     1
964   --       mycol Redwood Shores 2
965   --   the following expression is generated when the flag is set to FALSE:
966   --       DECODE("mycol", 'Waltham',       '1',
967   --                       'Burlington',    '1',
968   --                       'Redwood Shores','2') "mycol"
969   --   and when the flag is set to TRUE:
970   --       DECODE("mycol", 'Waltham',        1,
971   --                       'Burlington',     1,
972   --                       'Redwood Shores', 2) "mycol"
973   -----------------------------------------------------------------------------
974   PROCEDURE xform_bin_cat(
975     bin_table_name                 VARCHAR2,
976     data_table_name                VARCHAR2,
977     xform_view_name                VARCHAR2,
978     literal_flag                   BOOLEAN DEFAULT FALSE,
979     bin_schema_name                VARCHAR2 DEFAULT NULL,
980     data_schema_name               VARCHAR2 DEFAULT NULL,
981     xform_schema_name              VARCHAR2 DEFAULT NULL);
982 
983   -----------------------------------------------------------------------------
984   --                              stack_bin_cat                              --
985   -----------------------------------------------------------------------------
986   -- NAME
987   --   stack_bin_cat - STACK CATegorical BINning
988   -- DESCRIPTION
989   --   For every column in the stack definition that has a matching entry in
990   --   the transformation definition, stacks categorical binning expression on
991   --   top of the existing expression and updates the stack definition.
992   --   Columns in the transformation definition that have no matching entries
993   --   in the stack definition are added to the stack definition (using "{col}"
994   --   in place of the original expression or "VALUE" for nested attributes).
995   --   Reverse expressions in the stack definition are updated accordingly,
996   --   that is if an expression is updated, added or remains unchanged then a
997   --   corresponding reverse expression is also updated, added or remains
998   --   unchanged. When reverse expression is NULL it is treated as identity
999   --   function ("{attr}" or "VALUE" for nested attributes).
1000   --   Given an entry ({attr}, {expr}, {rexp}) in the stack definition and a
1001   --   matching entry in the transformation definition, after stacking {expr}
1002   --   has the following form:
1003   --       DECODE({expr}, '{val1}', '{bin1}',
1004   --                      ...
1005   --                      '{valN}', '{binN}',
1006   --                      NULL,     NULL,
1007   --                                '{bin(N+1)}')
1008   --   and {rexp} has the following form:
1009   --       DECODE("{attr}", '{bin1}',     '{rev1}',
1010   --                        ...
1011   --                        '{binN}',     '{revN}',
1012   --                        NULL,         'NULL',
1013   --                        '{bin(N+1)}', 'DEFAULT')
1014   --   where {revI} is the result of applying reverese expression to {valI}.
1015   --   If {binI} and {binJ} are equal then the corresponding entries of the
1016   --   DECODE function above are merged into:
1017   --       '{binI}', '{revI}, {revJ}'
1018   --   If more than one entry maps to the default bin {bin(N+1)} they are all
1019   --   merged to
1020   --       '{bin(N+1)}', 'DEFAULT'
1021   --   Note that reverse expression implicitly maps invalid bins to NULL.
1022   --   Example 1. Given transformation definition:
1023   --       {col = COL1, val = waltham,        bin = MA}
1024   --       {col = COL1, val = burlington,     bin = MA}
1025   --       {col = COL1, val = redwood shores, bin = CA}
1026   --       {col = COL2, val = MA,             bin = East}
1027   --       {col = COL2, val = CA,             bin = West}
1028   --       {col = COL2, val = NULL,           bin = USA}
1029   --   and stack definition:
1030   --       {attr = COL1, expr = lower(COL1), rexp = initcap(COL1)}
1034   --        expr = DECODE(lower(COL1), 'waltham',        'MA',
1031   --       {attr = COL3, expr = upper(COL3), rexp = initcap(COL3)}
1032   --   the following updated stack definition is generated:
1033   --       {attr = COL1,
1035   --                                   'burlington',     'MA',
1036   --                                   'redwood shores', 'CA'),
1037   --        rexp = DECODE("COL1", 'MA', '''Waltham'', ''Burlington''',
1038   --                              'CA', '''Redwood Shores''',
1039   --                              NULL, 'DEFAULT')}
1040   --       {attr = COL3,
1041   --        expr = upper(COL3),
1042   --        rexp = initcap(COL3)}
1043   --       {attr = COL2,
1044   --        expr = DECODE("COL2", 'MA', 'East',
1045   --                              'NY', 'East',
1046   --                              'CA', 'West',
1047   --                              NULL, NULL,
1048   --                                    'USA')
1049   --        rexp = DECODE("COL2", 'East', '''MA''',
1050   --                              'West', '''CA''',
1051   --                              NULL,   'NULL',
1052   --                              'USA',  'DEFAULT')}
1053   -- PARAMETERS
1054   --   bin_table_name                 - xform definition table
1055   --   xform_list                     - stack definition
1056   --   literal_flag                   - literal flag
1057   --   bin_schema_name                - xform definition table schema name
1058   -- RETURNS
1059   --   xform_list                     - updated stack definition
1060   -- EXCEPTIONS
1061   --   None
1062   -- NOTES
1063   --   Literal flag indicates whether the values in {bin} are valid SQL
1064   --   literals. When the the flag is set to TRUE the value of {bin} is used
1065   --   "as is" in the expression generation, otherwise it is converted into a
1066   --   valid text literal (surround by quotes and double the quotes inside). By
1067   --   default the flag is set to FALSE. One example when it can be set to TRUE
1068   --   is in cases when all {bin} are numbers. In that case the xformed column
1069   --   will remain numeric as opposed to textual (default behavior). For
1070   --   example, for the following xfrom definition:
1071   --       COL   VAL            BIN
1072   --       ----- -------------- ----
1073   --       mycol Waltham        1
1074   --       mycol Burlington     1
1075   --       mycol Redwood Shores 2
1076   --   the following {expr} and {rexp} are generated when the flag is FALSE:
1077   --        expr = DECODE("mycol", 'Waltham',        '1',
1078   --                               'Burlington',     '1',
1079   --                               'Redwood Shores', '2')
1080   --        rexp = DECODE("COL1", '1',  '''Waltham'', ''Burlington''',
1081   --                              '2',  '''Redwood Shores''',
1082   --                              NULL, 'DEFAULT')
1083   --   and when the flag is set to TRUE:
1084   --        expr = DECODE("mycol", 'Waltham',        1,
1085   --                               'Burlington',     1,
1086   --                               'Redwood Shores', 2)
1087   --        rexp = DECODE("COL1", 1,    '''Waltham'', ''Burlington''',
1088   --                              2,    '''Redwood Shores''',
1089   --                              NULL, 'DEFAULT')
1090   -----------------------------------------------------------------------------
1091   PROCEDURE stack_bin_cat(
1092     bin_table_name                 VARCHAR2,
1093     xform_list                     IN OUT NOCOPY TRANSFORM_LIST,
1094     literal_flag                   BOOLEAN DEFAULT FALSE,
1095     bin_schema_name                VARCHAR2 DEFAULT NULL);
1096 
1097   -----------------------------------------------------------------------------
1098   --                            insert_bin_super                             --
1099   -----------------------------------------------------------------------------
1100   -- NAME
1101   --   insert_bin_super - INSERT into BINning SUPERvised
1102   -- DESCRIPTION
1103   --   For every NUMBER, VARCHAR2, or CHAR, as well as DM_NESTED_NUMERICAL, or
1104   --   DM_NESTED_CATEGORICAL column in the data table that is not in the
1105   --   exclusion list finds numerical and categorical binning definition
1106   --   and inserts them into the corresponding definition tables. Definition
1107   --   for each relevant column is computed based on the splits found by the
1108   --   Decision Tree model build on a single predictor. Columns that have no
1109   --   splits are inserted into the column removal definition table.
1110   -- PARAMETERS
1111   --   num_table_name                 - numerical binning definition table
1112   --   cat_table_name                 - categorical binning definition table
1113   --   data_table_name                - data table
1114   --   target_column_name             - target column
1115   --   max_bin_num                    - maximum number of bins
1116   --   exclude_list                   - column exclusion list
1117   --   num_schema_name                - numerical definition table schema
1118   --   cat_schema_name                - categorical definition table schema
1119   --   data_schema_name               - data table schema name
1120   --   rem_table_name                 - column removal definition table
1121   --   rem_schema_name                - removal definition table schema
1122   -- RETURNS
1123   --   None
1124   -- EXCEPTIONS
1125   --   None
1126   -- NOTES
1127   --   Column removal definition table is optional.
1128   -----------------------------------------------------------------------------
1129   PROCEDURE insert_bin_super(
1130     num_table_name                 VARCHAR2,
1131     cat_table_name                 VARCHAR2,
1132     data_table_name                VARCHAR2,
1136     num_schema_name                VARCHAR2 DEFAULT NULL,
1133     target_column_name             VARCHAR2,
1134     max_bin_num                    PLS_INTEGER DEFAULT 1000,
1135     exclude_list                   COLUMN_LIST DEFAULT NULL,
1137     cat_schema_name                VARCHAR2 DEFAULT NULL,
1138     data_schema_name               VARCHAR2 DEFAULT NULL,
1139     rem_table_name                 VARCHAR2 DEFAULT NULL,
1140     rem_schema_name                VARCHAR2 DEFAULT NULL);
1141 
1142   -----------------------------------------------------------------------------
1143   --                             xform_expr_num                              --
1144   -----------------------------------------------------------------------------
1145   -- NAME
1146   --   xform_expr_num - XFORMation EXPRession NUMber
1147   -- DESCRIPTION
1148   --   Creates a view that applies a given expression for every NUMBER column
1149   --   in the data table that is not in the exclusion list and in the
1150   --   inclusion list. The remaining columns do not change. Expressions are
1151   --   constructed from the expression pattern by replacing every occurance of
1152   --   the column pattern with an actual column name.
1153   --   Example 1. For a table TAB with two NUMBER columns CN1, CN3 and one
1154   --   CHAR columns CC2 and expression pattern TO_CHAR(:col) the following
1155   --   query is generated:
1156   --       SELECT TO_CHAR("CN1") "CN1", "CC2", TO_CHAR("CN3") "CN3"
1157   --         FROM TAB
1158   --   Example 2. This procedure can be used for clipping (winsorizing)
1159   --   normalized data to a [0..1] range, that is values x > 1 become 1 and
1160   --   values x < 0 become 0. For the table in example 1 and pattern
1161   --       CASE WHEN :col < 0 THEN 0 WHEN :col > 1 THEN 1 ELSE :col END
1162   --   the following query is generated:
1163   --       SELECT CASE WHEN "CN1" < 0 THEN 0 WHEN "CN1" > 1 THEN 1
1164   --                   ELSE "CN1" END "CN1",
1165   --              "CC2",
1166   --              CASE WHEN "CN3" < 0 THEN 0 WHEN "CN3" > 1 THEN 1
1167   --                   ELSE "CN3" END "CN3"
1168   --         FROM TAB
1169   -- PARAMETERS
1170   --   expr_pattern                   - expression pattern
1171   --   data_table_name                - data table
1172   --   xform_view_name                - xform view
1173   --   exclude_list                   - column exclusion list
1174   --   include_list                   - column inclusion list
1175   --   col_pattern                    - column pattern
1176   --   data_schema_name               - data table schema name
1177   --   xform_schema_name              - xform view schema name
1178   -- RETURNS
1179   --   None
1180   -- EXCEPTIONS
1181   --   None
1182   -- NOTES
1183   --   The default value of column pattern is ':col'. Column pattern is case
1184   --   sensetive. Expressions are constructed using SQL REPLACE function:
1185   --       REPALCE(expr_pattern, col_pattern, '"<column>"')||' "<column>"'
1186   --   NULL exclusion list is treated as an empty set (exclude none) and NULL
1187   --   inclusion list is treated as a full set (include all).
1188   -----------------------------------------------------------------------------
1189   PROCEDURE xform_expr_num(
1190     expr_pattern                   VARCHAR2,
1191     data_table_name                VARCHAR2,
1192     xform_view_name                VARCHAR2,
1193     exclude_list                   COLUMN_LIST DEFAULT NULL,
1194     include_list                   COLUMN_LIST DEFAULT NULL,
1195     col_pattern                    VARCHAR2 DEFAULT ':col',
1196     data_schema_name               VARCHAR2 DEFAULT NULL,
1197     xform_schema_name              VARCHAR2 DEFAULT NULL);
1198 
1199   -----------------------------------------------------------------------------
1200   --                             xform_expr_str                              --
1201   -----------------------------------------------------------------------------
1202   -- NAME
1203   --   xform_expr_str - XFORMation EXPRession STRing
1204   -- DESCRIPTION
1205   --   Similar to xform_expr_num, except that it applies to CHAR and VARCHAR2
1206   --   columns instead of NUMBER.
1207   -- PARAMETERS
1208   --   expr_pattern                   - expression pattern
1209   --   data_table_name                - data table
1210   --   xform_view_name                - xform view
1211   --   exclude_list                   - column exclusion list
1212   --   include_list                   - column inclusion list
1213   --   col_pattern                    - column pattern
1214   --   data_schema_name               - data table schema name
1215   --   xform_schema_name              - xform view schema name
1216   -- RETURNS
1217   --   None
1218   -- EXCEPTIONS
1219   --   None
1220   -- NOTES
1221   --   None
1222   -----------------------------------------------------------------------------
1223   PROCEDURE xform_expr_str(
1224     expr_pattern                   VARCHAR2,
1225     data_table_name                VARCHAR2,
1226     xform_view_name                VARCHAR2,
1227     exclude_list                   COLUMN_LIST DEFAULT NULL,
1228     include_list                   COLUMN_LIST DEFAULT NULL,
1229     col_pattern                    VARCHAR2 DEFAULT ':col',
1230     data_schema_name               VARCHAR2 DEFAULT NULL,
1231     xform_schema_name              VARCHAR2 DEFAULT NULL);
1232 
1233   -----------------------------------------------------------------------------
1234   --                              create_clip                                --
1235   -----------------------------------------------------------------------------
1236   -- NAME
1237   --   create_clip - CREATE CLIPping definition table
1241   --         col  VARCHAR2(30),
1238   -- DESCRIPTION
1239   --   Creates clippping definition table:
1240   --       CREATE TABLE <clip>(
1242   --         att  VARCHAR2(4000),
1243   --         lcut NUMBER,
1244   --         lval NUMBER,
1245   --         rcut NUMBER,
1246   --         rval NUMBER)
1247   --   This table is used to guide query generation process to construct
1248   --   clipping expressions of the following form:
1249   --       CASE WHEN "{col}" < {lcut} THEN {lval}
1250   --            WHEN "{col}" > {rcut} THEN {rval}
1251   --                                  ELSE "{col}"
1252   --       END "{col}"
1253   --   Example 1. (winsorizing) When col = 'my_col', lcut = -1.5, lval = -1.5,
1254   --   and rcut = 4.5 and rval = 4.5 the following expression is generated:
1255   --       CASE WHEN "my_col" < -1.5 THEN -1.5
1256   --            WHEN "my_col" >  4.5 THEN  4.5
1257   --                                 ELSE "my_col"
1258   --       END "my_col"
1259   --   Example 2. (trimming) When col = 'my_col', lcut = -1.5, lval = NULL,
1260   --   and rcut = 4.5 and rval = NULL the following expression is generated:
1261   --       CASE WHEN "my_col" < -1.5 THEN NULL
1262   --            WHEN "my_col" >  4.5 THEN NULL
1263   --                                 ELSE "my_col"
1264   --       END "my_col"
1265   -- PARAMETERS
1266   --   clip_table_name                - clipping definition table
1267   --   clip_schema_name               - clipping definition table schema name
1268   -- RETURNS
1269   --   None
1270   -- EXCEPTIONS
1271   --   None
1272   -- NOTES
1273   --   Note, that {col} is case sensitive since it generates quoted
1274   --   identifiers.  When there are multiple entries in the xform defintion
1275   --   table for the same {col} the behavior is undefined.  Any one of the
1276   --   definitions may be used in query generation. NULL values remain
1277   --   unchanged.
1278   -----------------------------------------------------------------------------
1279   PROCEDURE create_clip(
1280     clip_table_name                VARCHAR2,
1281     clip_schema_name               VARCHAR2 DEFAULT NULL);
1282 
1283   -----------------------------------------------------------------------------
1284   --                        insert_clip_winsor_tail                          --
1285   -----------------------------------------------------------------------------
1286   -- NAME
1287   --   insert_clip_winsor_tail - INSERT into CLIPping WINSORizing TAIL
1288   -- DESCRIPTION
1289   --   For every NUMBER column in the data table that is not in the exclusion
1290   --   list finds clipping definition and inserts it into the definition
1291   --   table. Definition for each relevant column is computed based on the
1292   --   non-NULL values sorted in ascending order such that val(1)<val(2)<...<
1293   --   val(N), where N is a total number of non-NULL values in a column:
1294   --       lcut = val(1+floor(N*q))
1295   --       lval = lcut
1296   --       rcut = val(N-floor(N*q))
1297   --       rval = rcut
1298   --   where q = ABS(NVL(tail_frac,0)). Nothing is done when q >= 0.5.
1299   -- PARAMETERS
1300   --   clip_table_name                - clipping definition table
1301   --   data_table_name                - data table
1302   --   tail_frac                      - tail fraction
1303   --   exclude_list                   - column exclusion list
1304   --   clip_schema_name               - definition table schema name
1305   --   data_schema_name               - data table schema name
1306   -- RETURNS
1307   --   None
1308   -- EXCEPTIONS
1309   --   None
1310   -- NOTES
1311   --   Columns with all NULLs are ignored.
1312   -----------------------------------------------------------------------------
1313   PROCEDURE insert_clip_winsor_tail(
1314     clip_table_name                VARCHAR2,
1315     data_table_name                VARCHAR2,
1316     tail_frac                      NUMBER DEFAULT 0.025,
1317     exclude_list                   COLUMN_LIST DEFAULT NULL,
1318     clip_schema_name               VARCHAR2 DEFAULT NULL,
1319     data_schema_name               VARCHAR2 DEFAULT NULL);
1320 
1321   -----------------------------------------------------------------------------
1322   --                        insert_clip_trim_tail                            --
1323   -----------------------------------------------------------------------------
1324   -- NAME
1325   --   insert_clip_trim_tail - INSERT into CLIPping TRIMming TAIL
1326   -- DESCRIPTION
1327   --   For every NUMBER column in the data table that is not in the exclusion
1328   --   list finds clipping definition and inserts it into the definition
1329   --   table. Definition for each relevant column is computed based on the
1330   --   non-NULL values sorted in ascending order such that val(1)<val(2)<...<
1331   --   val(N), where N is a total number of non-NULL values in a column:
1332   --       lcut = val(1+floor(N*q))
1333   --       lval = NULL
1334   --       rcut = val(N-floor(N*q))
1335   --       rval = NULL
1336   --   where q = ABS(NVL(tail_frac,0)). Nothing is done when q >= 0.5.
1337   -- PARAMETERS
1338   --   clip_table_name                - clipping definition table
1339   --   data_table_name                - data table
1340   --   tail_frac                      - tail fraction
1341   --   exclude_list                   - column exclusion list
1342   --   clip_schema_name               - definition table schema name
1343   --   data_schema_name               - data table schema name
1344   -- RETURNS
1345   --   None
1346   -- EXCEPTIONS
1347   --   None
1348   -- NOTES
1349   --   Columns with all NULLs are ignored.
1353     data_table_name                VARCHAR2,
1350   -----------------------------------------------------------------------------
1351   PROCEDURE insert_clip_trim_tail(
1352     clip_table_name                VARCHAR2,
1354     tail_frac                      NUMBER DEFAULT 0.025,
1355     exclude_list                   COLUMN_LIST DEFAULT NULL,
1356     clip_schema_name               VARCHAR2 DEFAULT NULL,
1357     data_schema_name               VARCHAR2 DEFAULT NULL);
1358 
1359   -----------------------------------------------------------------------------
1360   --                               xform_clip                                --
1361   -----------------------------------------------------------------------------
1362   -- NAME
1363   --   xform_clip - XFORMation CLIPping
1364   -- DESCRIPTION
1365   --   Creates a view that perfoms clipping of the data table. Only the
1366   --   columns that are specified in the xform definition are clipped, the
1367   --   remaining columns do not change.
1368   -- PARAMETERS
1369   --   clip_table_name                - xform definition table
1370   --   data_table_name                - data table
1371   --   xform_view_name                - xform view
1372   --   clip_schema_name               - xform definition table schema name
1373   --   data_schema_name               - data table schema name
1374   --   xform_schema_name              - xform view schema name
1375   -- RETURNS
1376   --   None
1377   -- EXCEPTIONS
1378   --   None
1379   -- NOTES
1380   --   None
1381   -----------------------------------------------------------------------------
1382   PROCEDURE xform_clip(
1383     clip_table_name                VARCHAR2,
1384     data_table_name                VARCHAR2,
1385     xform_view_name                VARCHAR2,
1386     clip_schema_name               VARCHAR2 DEFAULT NULL,
1387     data_schema_name               VARCHAR2 DEFAULT NULL,
1388     xform_schema_name              VARCHAR2 DEFAULT NULL);
1389 
1390   -----------------------------------------------------------------------------
1391   --                                stack_clip                               --
1392   -----------------------------------------------------------------------------
1393   -- NAME
1394   --   stack_clip - STACK CLIPping
1395   -- DESCRIPTION
1396   --   For every column in the stack definition that has a matching entry in
1397   --   the transformation definition, stacks clipping expression on top of the
1398   --   existing expression and updates the stack definition. Columns in the
1399   --   stack definition that have no matching entries in the transformation
1400   --   definition or have NULL expression are not changed. Columns in the
1401   --   transformation definition that have no matching entries in the stack
1402   --   definition are added to the stack definition (using "{col}" in place of
1403   --   the original expression or "VALUE" for nested attributes).
1404   --   Reverse expressions in the stack definition are updated accordingly,
1405   --   that is if an expression is updated, added or remains unchanged then a
1406   --   corresponding reverse expression is also updated, added or remains
1407   --   unchanged. When reverse expression is NULL it is treated as identity
1408   --   function ("{attr}" or "VALUE" for nested attributes).
1409   --   Given an entry ({attr}, {expr}, {rexp}) in the stack definition and a
1410   --   matching entry in the transformation definition, after stacking {expr}
1411   --   has the following form:
1412   --       CASE WHEN {expr} < {lcut} THEN {lval}
1413   --            WHEN {expr} > {rcut} THEN {rval}
1414   --                                 ELSE {expr}
1415   --       END
1416   --   and {rexp} remains unchanged
1417   --   Example 1. Given transformation definition:
1418   --       {col = COL1, lcut = -1.5, lval = -1.5, rcut = 4.5, rval = 4.5}
1419   --       {col = COL2, lcut = 0,    lval = 0,    rcut = 1,   rval = 1}
1420   --   and stack definition:
1421   --       {attr = COL1, expr = log(10, COL1), rexp = power(10, COL1)}
1422   --       {attr = COL3, expr = ln(COL3),      rexp = exp(COL3)}
1423   --   the following updated stack definition is generated:
1424   --       {attr = COL1,
1425   --        expr = CASE WHEN log(10, COL1) < -1.5 THEN -1.5
1426   --                    WHEN log(10, COL1) > 4.5  THEN 4.5
1427   --                                              ELSE log(10, COL1)
1428   --               END,
1429   --        rexp = power(10, COL1)}
1430   --       {attr = COL3,
1431   --        expr = ln(COL3),
1432   --        rexp = exp(COL3)}
1433   --       {attr = COL2,
1434   --        expr = CASE WHEN COL2 < 0 THEN 0
1435   --                    WHEN COL2 > 1 THEN 1
1436   --                                  ELSE COL2
1437   --               END,
1438   --        rexp = NULL}
1439   -- PARAMETERS
1440   --   clip_table_name                - xform definition table
1441   --   xform_list                     - stack definition
1442   --   clip_schema_name               - xform definition table schema name
1443   -- RETURNS
1444   --   xform_list                     - updated stack definition
1445   -- EXCEPTIONS
1446   --   None
1447   -- NOTES
1448   --   None
1449   -----------------------------------------------------------------------------
1450   PROCEDURE stack_clip(
1451     clip_table_name                VARCHAR2,
1452     xform_list                     IN OUT NOCOPY TRANSFORM_LIST,
1453     clip_schema_name               VARCHAR2 DEFAULT NULL);
1454 
1455   -----------------------------------------------------------------------------
1456   --                            create_miss_num                              --
1460   --                     table
1457   -----------------------------------------------------------------------------
1458   -- NAME
1459   --   create_miss_num - CREATE NUMerical MISSing value treatment definition
1461   -- DESCRIPTION
1462   --   Creates numerical missing value treatment definition table:
1463   --       CREATE TABLE <miss_num>(
1464   --         col VARCHAR2(30),
1465   --         att VARCHAR2(4000),
1466   --         val NUMBER)
1467   --   This table is used to guide query generation process to construct
1468   --   missing value treatment expressions of the following form:
1469   --       NVL("{col}", {val}) "{col}"
1470   --   For example when col = 'my_col', val = 20 the
1471   --   following expression is generated:
1472   --       NVL("my_col", 20) "my_col"
1473   -- PARAMETERS
1474   --   miss_table_name                - definition table
1475   --   miss_schema_name               - definition table schema name
1476   -- RETURNS
1477   --   None
1478   -- EXCEPTIONS
1479   --   None
1480   -- NOTES
1481   --   Note, that {col} is case sensitive since it generates quoted
1482   --   identifiers.  When there are multiple entries in the xform defintion
1483   --   table for the same {col} the behavior is undefined.  Any one of the
1484   --   definitions may be used in query generation.
1485   -----------------------------------------------------------------------------
1486   PROCEDURE create_miss_num(
1487     miss_table_name                VARCHAR2,
1488     miss_schema_name               VARCHAR2 DEFAULT NULL);
1489 
1490   -----------------------------------------------------------------------------
1491   --                           insert_miss_num_mean                          --
1492   -----------------------------------------------------------------------------
1493   -- NAME
1494   --   insert_miss_num_mean - INSERT into NUMerical MISSining value treatment
1495   --                          MEAN
1496   -- DESCRIPTION
1497   --   For every NUMBER column in the data table that is not in the exclusion
1498   --   list finds missing value treatment definition and inserts it into the
1499   --   definition table. Definition for each relevant column is computed based
1500   --   on the mean (average) value that is computed from the data table:
1501   --       val = mean
1502   --   The value of mean is rounded to round_num significant digits prior to
1503   --   storing it in the definition table.
1504   -- PARAMETERS
1505   --   miss_table_name                - definition table
1506   --   data_table_name                - data table
1507   --   exclude_list                   - column exclusion list
1508   --   round_num                      - number of significant digits
1509   --   miss_schema_name               - definition table schema name
1510   --   data_schema_name               - data table schema name
1511   -- RETURNS
1512   --   None
1513   -- EXCEPTIONS
1514   --   None
1515   -- NOTES
1516   --   Columns with all NULLs are ignored.
1517   -----------------------------------------------------------------------------
1518   PROCEDURE insert_miss_num_mean(
1519     miss_table_name                VARCHAR2,
1520     data_table_name                VARCHAR2,
1521     exclude_list                   COLUMN_LIST DEFAULT NULL,
1522     round_num                      PLS_INTEGER DEFAULT 6,
1523     miss_schema_name               VARCHAR2 DEFAULT NULL,
1524     data_schema_name               VARCHAR2 DEFAULT NULL);
1525 
1526   -----------------------------------------------------------------------------
1527   --                              xform_miss_num                             --
1528   -----------------------------------------------------------------------------
1529   -- NAME
1530   --   xform_miss_num - XFORMation NUMerical MISSing value treatment
1531   -- DESCRIPTION
1532   --   Creates a view that perfoms numerical missing value treatment of the
1533   --   data table. Only the columns that are specified in the xform definition
1534   --   are treated, the remaining columns do not change.
1535   -- PARAMETERS
1536   --   miss_table_name                - xform definition table
1537   --   data_table_name                - data table
1538   --   xform_view_name                - xform view
1539   --   miss_schema_name               - xform definition table schema name
1540   --   data_schema_name               - data table schema name
1541   --   xform_schema_name              - xform view schema name
1542   -- RETURNS
1543   --   None
1544   -- EXCEPTIONS
1545   --   None
1546   -- NOTES
1547   --   None
1548   -----------------------------------------------------------------------------
1549   PROCEDURE xform_miss_num(
1550     miss_table_name                VARCHAR2,
1551     data_table_name                VARCHAR2,
1552     xform_view_name                VARCHAR2,
1553     miss_schema_name               VARCHAR2 DEFAULT NULL,
1554     data_schema_name               VARCHAR2 DEFAULT NULL,
1555     xform_schema_name              VARCHAR2 DEFAULT NULL);
1556 
1557   -----------------------------------------------------------------------------
1558   --                              stack_miss_num                             --
1559   -----------------------------------------------------------------------------
1560   -- NAME
1561   --   stack_miss_num - STACK NUMerical MISSing value treatment
1562   -- DESCRIPTION
1563   --   For every column in the stack definition that has a matching entry in
1564   --   the transformation definition, stacks numerical missing value treatment
1565   --   expression on top of the existing expression and updates the stack
1569   --   matching entries in the stack definition are added to the stack
1566   --   definition. Columns in the stack definition that have no matching
1567   --   entries in the transformation definition or have NULL expression are
1568   --   not changed. Columns in the transformation definition that have no
1570   --   definition (using "{col}" in place of the original expression or "VALUE"
1571   --   for nested attributes). Reverse expressions in the stack definition are
1572   --   updated accordingly, that is if an expression is updated, added or
1573   --   remains unchanged then a corresponding reverse expression is also
1574   --   updated, added or remains unchanged. When reverse expression is NULL it
1575   --   is treated as identity function ("{attr}" or "VALUE" for nested
1576   --   attributes). Given an entry ({attr}, {expr}, {rexp}) in the stack
1577   --   definition and a matching entry in the transformation definition, after
1578   --   stacking {expr} has the following form:
1579   --       NVL({expr}, {val})
1580   --   and {rexp} remains unchanged
1581   --   Example 1. Given transformation definition:
1582   --       {col = COL1, val = 4.5}
1583   --       {col = COL2, val = 0}
1584   --   and stack definition:
1585   --       {attr = COL1, expr = log(10, COL1), rexp = power(10, COL1)}
1586   --       {attr = COL3, expr = ln(COL3),      rexp = exp(COL3)}
1587   --   the following updated stack definition is generated:
1588   --       {attr = COL1,
1589   --        expr = NVL(log(10, COL1), 4.5),
1590   --        rexp = power(10, COL1)}
1591   --       {attr = COL3,
1592   --        expr = ln(COL3),
1593   --        rexp = exp(COL3)}
1594   --       {attr = COL2,
1595   --        expr = NVL(COL2, 0),
1596   --        rexp = NULL}
1597   -- PARAMETERS
1598   --   miss_table_name                - xform definition table
1599   --   xform_list                     - stack definition
1600   --   miss_schema_name               - xform definition table schema name
1601   -- RETURNS
1602   --   xform_list                     - updated stack definition
1603   -- EXCEPTIONS
1604   --   None
1605   -- NOTES
1606   --   None
1607   -----------------------------------------------------------------------------
1608   PROCEDURE stack_miss_num(
1609     miss_table_name                VARCHAR2,
1610     xform_list                     IN OUT NOCOPY TRANSFORM_LIST,
1611     miss_schema_name               VARCHAR2 DEFAULT NULL);
1612 
1613   -----------------------------------------------------------------------------
1614   --                            create_miss_cat                              --
1615   -----------------------------------------------------------------------------
1616   -- NAME
1617   --   create_miss_cat - CREATE CATegorical MISSing value treatment definition
1618   --                     table
1619   -- DESCRIPTION
1620   --   Creates categorical missing value treatment definition table:
1621   --       CREATE TABLE <miss_cat>(
1622   --         col VARCHAR2(30),
1623   --         att VARCHAR2(4000),
1624   --         val VARCHAR2(4000))
1625   --   This table is used to guide query generation process to construct
1626   --   missing value treatment expressions of the following form:
1627   --       NVL("{col}", {val}) "{col}"
1628   --   For example when col = 'zip_code', val = 'MA' the
1629   --   following expression is generated:
1630   --       NVL("zip_code", 'MA') "zip_code"
1631   -- PARAMETERS
1632   --   miss_table_name                - definition table
1633   --   miss_schema_name               - definition table schema name
1634   -- RETURNS
1635   --   None
1636   -- EXCEPTIONS
1637   --   None
1638   -- NOTES
1639   --   Note, that {col} is case sensitive since it generates quoted
1640   --   identifiers.  When there are multiple entries in the xform defintion
1641   --   table for the same {col} the behavior is undefined.  Any one of the
1642   --   definitions may be used in query generation.
1643   -----------------------------------------------------------------------------
1644   PROCEDURE create_miss_cat(
1645     miss_table_name                VARCHAR2,
1646     miss_schema_name               VARCHAR2 DEFAULT NULL);
1647 
1648   -----------------------------------------------------------------------------
1649   --                           insert_miss_cat_mode                          --
1650   -----------------------------------------------------------------------------
1651   -- NAME
1652   --   insert_miss_cat_mode - INSERT into CATegorical MISSining value
1653   --                          treatment MODE
1654   -- DESCRIPTION
1655   --   For every VARCHAR2, CHAR column in the data table that is not in the
1656   --   exclusion list finds missing value treatment definition and inserts it
1657   --   into the definition table. Definition for each relevant column is
1658   --   computed based on the mode value that is computed from the data table:
1659   --       val = mode
1660   -- PARAMETERS
1661   --   miss_table_name                - definition table
1662   --   data_table_name                - data table
1663   --   exclude_list                   - column exclusion list
1664   --   miss_schema_name               - definition table schema name
1665   --   data_schema_name               - data table schema name
1666   -- RETURNS
1667   --   None
1668   -- EXCEPTIONS
1669   --   None
1670   -- NOTES
1671   --   Columns with all NULLs are ignored.
1672   -----------------------------------------------------------------------------
1673   PROCEDURE insert_miss_cat_mode(
1674     miss_table_name                VARCHAR2,
1675     data_table_name                VARCHAR2,
1676     exclude_list                   COLUMN_LIST DEFAULT NULL,
1680   -----------------------------------------------------------------------------
1677     miss_schema_name               VARCHAR2 DEFAULT NULL,
1678     data_schema_name               VARCHAR2 DEFAULT NULL);
1679 
1681   --                              xform_miss_cat                             --
1682   -----------------------------------------------------------------------------
1683   -- NAME
1684   --   xform_miss_cat - XFORMation CATegorical MISSing value treatment
1685   -- DESCRIPTION
1686   --   Creates a view that perfoms categorical missing value treatment of the
1687   --   data table. Only the columns that are specified in the xform definition
1688   --   are treated, the remaining columns do not change.
1689   -- PARAMETERS
1690   --   miss_table_name                - xform definition table
1691   --   data_table_name                - data table
1692   --   xform_view_name                - xform view
1693   --   miss_schema_name               - xform definition table schema name
1694   --   data_schema_name               - data table schema name
1695   --   xform_schema_name              - xform view schema name
1696   -- RETURNS
1697   --   None
1698   -- EXCEPTIONS
1699   --   None
1700   -- NOTES
1701   --   The data type of the xformed columns is preseved by putting a CAST
1702   --   expression around NVL. For example, when col = 'zip_code', val = 'MA'
1703   --   the data type is CHAR(2) the following expression is generated:
1704   --       CAST(NVL("zip_code", 'MA') AS CHAR(2)) "zip_code"
1705   -----------------------------------------------------------------------------
1706   PROCEDURE xform_miss_cat(
1707     miss_table_name                VARCHAR2,
1708     data_table_name                VARCHAR2,
1709     xform_view_name                VARCHAR2,
1710     miss_schema_name               VARCHAR2 DEFAULT NULL,
1711     data_schema_name               VARCHAR2 DEFAULT NULL,
1712     xform_schema_name              VARCHAR2 DEFAULT NULL);
1713 
1714   -----------------------------------------------------------------------------
1715   --                              stack_miss_cat                             --
1716   -----------------------------------------------------------------------------
1717   -- NAME
1718   --   stack_miss_cat - STACK CATegorical MISSing value treatment
1719   -- DESCRIPTION
1720   --   For every column in the stack definition that has a matching entry in
1721   --   the transformation definition, stacks categorical missing value
1722   --   treatment expression on top of the existing expression and updates the
1723   --   stack definition. Columns in the stack definition that have no matching
1724   --   entries in the transformation definition or have NULL expression are
1725   --   not changed. Columns in the transformation definition that have no
1726   --   matching entries in the stack definition are added to the stack
1727   --   definition (using "{col}" in place of the original expression or "VALUE"
1728   --   for nested attributes). Reverse expressions in the stack definition are
1729   --   updated accordingly, that is if an expression is updated, added or
1730   --   remains unchanged then a corresponding reverse expression is also
1731   --   updated, added or remains unchanged. When reverse expression is NULL it
1732   --   is treated as identity function ("{attr}" or "VALUE" for nested
1733   --   attributes). Given an entry ({attr}, {expr}, {rexp}) in the stack
1734   --   definition and a matching entry in the transformation definition, after
1735   --   stacking {expr} has the following form:
1736   --       NVL({expr}, {val})
1737   --   and {rexp} remains unchanged
1738   --   Example 1. Given transformation definition:
1739   --       {col = COL1, val = 'ma'}
1740   --       {col = COL2, val = 'CA'}
1741   --   and stack definition:
1742   --       {attr = COL1, expr = lower(COL1), rexp = initcap(COL1)}
1743   --       {attr = COL3, expr = upper(COL3), rexp = initcap(COL3)}
1744   --   the following updated stack definition is generated:
1745   --       {attr = COL1,
1746   --        expr = NVL(lower(COL1), 'ma'),
1747   --        rexp = initcap(COL1)}
1748   --       {attr = COL3,
1749   --        expr = upper(COL3),
1750   --        rexp = initcap(COL3)}
1751   --       {attr = COL2,
1752   --        expr = NVL(COL2, 'CA'),
1753   --        rexp = NULL}
1754   -- PARAMETERS
1755   --   miss_table_name                - xform definition table
1756   --   xform_list                     - stack definition
1757   --   miss_schema_name               - xform definition table schema name
1758   -- RETURNS
1759   --   xform_list                     - updated stack definition
1760   -- EXCEPTIONS
1761   --   None
1762   -- NOTES
1763   --   None
1764   -----------------------------------------------------------------------------
1765   PROCEDURE stack_miss_cat(
1766     miss_table_name                VARCHAR2,
1767     xform_list                     IN OUT NOCOPY TRANSFORM_LIST,
1768     miss_schema_name               VARCHAR2 DEFAULT NULL);
1769 
1770   -----------------------------------------------------------------------------
1771   --                              xform_stack                                --
1772   -----------------------------------------------------------------------------
1773   -- NAME
1774   --   xform_stack - XFORMation STACK
1775   -- DESCRIPTION
1776   --   Creates a view that perfoms transformation of the data table specified
1777   --   by the stack definition. Only the columns that are specified in the
1778   --   stack definition are transformed, the remaining columns do not change.
1779   --   All columns in the stack definition are applied. Columns with NULL value
1780   --   in the expression field are excluded. Columns in the stack definition
1784   --   data_table_name                - data table
1781   --   that do not have a matching column in the data are added to the view.
1782   -- PARAMETERS
1783   --   xform_list                     - stack definition
1785   --   xform_view_name                - xform view
1786   --   data_schema_name               - data table schema name
1787   --   xform_schema_name              - xform view schema name
1788   -- RETURNS
1789   --   None
1790   -- EXCEPTIONS
1791   --   None
1792   -- NOTES
1793   --   None
1794   -----------------------------------------------------------------------------
1795   PROCEDURE xform_stack(
1796     xform_list                     TRANSFORM_LIST,
1797     data_table_name                VARCHAR2,
1798     xform_view_name                VARCHAR2,
1799     data_schema_name               VARCHAR2 DEFAULT NULL,
1800     xform_schema_name              VARCHAR2 DEFAULT NULL);
1801 
1802   -----------------------------------------------------------------------------
1803   --                             describe_stack                              --
1804   -----------------------------------------------------------------------------
1805   -- NAME
1806   --   describe_stack - DESCRIBE STACK
1807   -- DESCRIPTION
1808   --   Desribes transformation of the data table specified by the stack
1809   --   definition. Only the columns that are specified in the stack definition
1810   --   are transformed, the remaining columns do not change. All columns in the
1811   --   stack definition are applied. Columns with NULL value in the expression
1812   --   field are excluded. Columns in the stack definition that do not have a
1813   --   matching column in the data are added to the describe list.
1814   -- PARAMETERS
1815   --   xform_list                     - stack definition
1816   --   data_table_name                - data table
1817   --   describe_list                  - describe list/describe list3
1818   --   data_schema_name               - data table schema name
1819   -- RETURNS
1820   --   None
1821   -- EXCEPTIONS
1822   --   None
1823   -- NOTES
1824   --   When the DESCRIBE_LIST type is specified for the describe_list parameter
1825   --   a DM_NESTED_NUMERICALS type is indicated by a value of nest_num_col_type
1826   --   in the col_type field and nest_cat_col_type for DM_NESTED_CATEGORICALS.
1827   --
1828   --   When the DESCRIBE_LIST3 type is specified for the describe_list
1829   --   parameter, nested types are indicated with the ADT type in the col_type
1830   --   field with the specific nested type indicated in the col_name_type
1831   --   field as a string.  This behavior is consistent with the call to
1832   --   describe_columns3 in the dbms_sql package (dbmssql.sql).
1833   -----------------------------------------------------------------------------
1834   PROCEDURE describe_stack(
1835     xform_list                     TRANSFORM_LIST,
1836     data_table_name                VARCHAR2,
1837     describe_list                  OUT DESCRIBE_LIST,
1838     data_schema_name               VARCHAR2 DEFAULT NULL);
1839 
1840   PROCEDURE describe_stack(
1841     xform_list                     TRANSFORM_LIST,
1842     data_table_name                VARCHAR2,
1843     describe_list                  OUT DESCRIBE_LIST3,
1844     data_schema_name               VARCHAR2 DEFAULT NULL);
1845 
1846   -----------------------------------------------------------------------------
1847   --                             set_expression                              --
1848   -----------------------------------------------------------------------------
1849   -- NAME
1850   --   set_expression - SET xform EXPRESSION
1851   -- DESCRIPTION
1852   --   Appends a VARCHAR2 chunk to the expression. Appeding NULL clears the
1853   --   expression.
1854   -- PARAMETERS
1855   --   expression                     - expression
1856   --   chunk                          - VARCHAR2 chunk
1857   -- RETURNS
1858   --   None
1859   -- EXCEPTIONS
1860   --   None
1861   -- NOTES
1862   --   None
1863   -----------------------------------------------------------------------------
1864   PROCEDURE set_expression(
1865     expression                     IN OUT NOCOPY EXPRESSION_REC,
1866     chunk                          VARCHAR2 DEFAULT NULL);
1867 
1868   -----------------------------------------------------------------------------
1869   --                             get_expression                              --
1870   -----------------------------------------------------------------------------
1871   -- NAME
1872   --   get_expression - GET xform EXPRESSION
1873   -- DESCRIPTION
1874   --   Gets a VARCHAR2 chunk from the expression. Chunks numbering starts from
1875   --   one. For chunks outside of the range the return value is NULL. When
1876   --   chunk number is NULL the whole expression is returned as a string. If
1877   --   expression is too big VALUE_ERROR is raised.
1878   -- PARAMETERS
1879   --   expression                     - expression
1880   --   chunk_num                      - chunk number
1881   -- RETURNS
1882   --   VARCHAR2 chunk.
1883   -- EXCEPTIONS
1884   --   VALUE_ERROR
1885   -- NOTES
1886   --   None
1887   -----------------------------------------------------------------------------
1888   FUNCTION get_expression(
1889     expression                     EXPRESSION_REC,
1890     chunk_num                      PLS_INTEGER DEFAULT NULL)
1891   RETURN VARCHAR2;
1892 
1893   -----------------------------------------------------------------------------
1894   --                             set_transform                               --
1895   -----------------------------------------------------------------------------
1896   -- NAME
1897   --   set_transform - SET TRANSFORM list
1898   -- DESCRIPTION
1899   --   Appends an element to the transformation list.
1900   -- PARAMETERS
1901   --   expression                     - expression
1902   --   chunk                          - VARCHAR2 chunk
1903   -- RETURNS
1904   --   None
1905   -- EXCEPTIONS
1906   --   None
1907   -- NOTES
1908   --   None
1909   -----------------------------------------------------------------------------
1910   PROCEDURE set_transform(
1911     xform_list                     IN OUT NOCOPY TRANSFORM_LIST,
1912     attribute_name                 VARCHAR2,
1913     attribute_subname              VARCHAR2,
1914     expression                     CLOB,
1915     reverse_expression             VARCHAR2,
1916     attribute_spec                 VARCHAR2 DEFAULT NULL);
1917 END dbms_data_mining_transform;