DBA Data[Home] [Help]

PACKAGE: SYS.UTL_I18N

Source


1 PACKAGE utl_i18n AS
2 
3   /* GDK (Globalization Development Kit) is a set of services that can help
4    * monolingual application developers to create similar multilingual
5    * application with minimal knowledge about internationalization issues.
6    * Oracle globalization services provide developers a set of consistent,
7    * high performance and easy to use tools.
8    */
9 
10   /* Convert function constant */
11   SHIFT_IN         CONSTANT PLS_INTEGER  :=0;
12   SHIFT_OUT        CONSTANT PLS_INTEGER  :=1;
13 
14   /* Miscellaneous flags used by the locale-mapping API */
15   ORACLE_TO_IANA    CONSTANT PLS_INTEGER := 0;
16   IANA_TO_ORACLE    CONSTANT PLS_INTEGER := 1;
17 
18   MAIL_GENERIC      CONSTANT PLS_INTEGER := 0;
19   MAIL_WINDOWS      CONSTANT PLS_INTEGER := 1;
20 
21   GENERIC_CONTEXT   CONSTANT PLS_INTEGER := 0;
22   MAIL_CONTEXT      CONSTANT PLS_INTEGER := 1;
23 
24   /* ENCODE_SQL_XML function constant */
25   XMLTAG_TO_SQLNAME   CONSTANT PLS_INTEGER :=0;
26   SQLNAME_TO_XMLTAG   CONSTANT PLS_INTEGER :=1;
27 
28   /* for transliteration */
29   KANA_FWKATAKANA  CONSTANT VARCHAR2(30) := 'kana_fwkatakana';
30   KANA_HWKATAKANA  CONSTANT VARCHAR2(30) := 'kana_hwkatakana';
31   KANA_HIRAGANA    CONSTANT VARCHAR2(30) := 'kana_hiragana' ;
32   FWKATAKANA_HWKATAKANA  CONSTANT VARCHAR2(30) := 'fwkatakana_hwkatakana' ;
33   FWKATAKANA_HIRAGANA    CONSTANT VARCHAR2(30) := 'fwkatakana_hiragana' ;
34   HWKATAKANA_FWKATAKANA  CONSTANT VARCHAR2(30) := 'hwkatakana_fwkatakana';
35   HWKATAKANA_HIRAGANA    CONSTANT VARCHAR2(30) := 'hwkatakana_hiragana' ;
36   HIRAGANA_FWKATAKANA    CONSTANT VARCHAR2(30) := 'hiragana_fwkatakana';
37   HIRAGANA_HWKATAKANA    CONSTANT VARCHAR2(30) := 'hiragana_hwkatakana';
38 
39   /* pre-defined exceptions */
40   UNSUPPORTED_TRANSLITERATION  EXCEPTION;
41   PRAGMA EXCEPTION_INIT(UNSUPPORTED_TRANSLITERATION, -3001);
42 
43   -- translation flag for GET_TRANSLATION --
44   LANGUAGE_TRANS           CONSTANT PLS_INTEGER :=0;
45   TERRITORY_TRANS          CONSTANT PLS_INTEGER :=1;
46   LANGUAGE_TERRITORY_TRANS CONSTANT PLS_INTEGER :=2;
47 
48   /* List of String data type */
49   TYPE string_array IS TABLE of VARCHAR2(32767)
50     INDEX BY BINARY_INTEGER;
51 
52   /**
53    * Convert a VARCHAR2/NVARCHAR2 string to another charset
54    *  return the result in RAW variable
55    *
56    * For example, utl_i18n.string_to_raw('abcde'||chr(170), 'utf8')
57    * will return a raw of hex value '616263646566C2AA'.
58    * If user inputs an invalid character set or an empty input string,
59    * an empty string will be returned.
60    *
61    * PARAMETERS
62    *   data        The input VARCHAR2/NVARCHAR to convert.
63    *   dst_charset The destination charset to be converted to.
64    *
65    * RETURN
66    *   The byte string after conversion in raw format
67    * EXCEPTIONS
68    *   miscellaneous runtime exceptions.
69    */
70   FUNCTION string_to_raw(data IN VARCHAR2 CHARACTER SET ANY_CS,
71                           dst_charset IN VARCHAR2 DEFAULT NULL)
72                           RETURN RAW;
73 
74   /**
75    * Convert a raw buffer which is encode in another charset
76    * back to VARCHAR2 string.
77    *
78    * For example, utl_i18n.raw_to_char(hextoraw('616263646566C2AA', 'utf8')
79    * will return a string (encoded in database charset) 'abcde'||chr(170).
80    * If user inputs an invalid character set or an empty raw buffer,
81    * an empty string will be returned.
82    *
83    * PARAMETERS
84    *   data        The input byte arrays in raw.
85    *   src_charset The source charset raw data is converted from.
86    *
87    * RETURN
88    *   The string converted back into database charset encoding.
89    * EXCEPTIONS
90    *   miscellaneous runtime exceptions.
91    */
92   FUNCTION raw_to_char(data IN RAW,
93                        src_charset IN VARCHAR2 DEFAULT NULL)
94                        RETURN VARCHAR2;
95 
96  /**
97    * Convert a raw buffer which is encode in another charset
98    * back to NVARCHAR2 string.
99    *
100    * For example, utl_i18n.raw_to_nchar(hextoraw('616263646566C2AA', 'utf8')
101    * will return a string (encoded in nation charset) 'abcde'||chr(170).
102    * If user inputs an invalid character set or an empty raw buffer,
103    * an empty string will be returned.
104    *
105    * PARAMETERS
106    *   data        The input byte arrays in raw.
107    *   src_charset The source charset raw data is converted from.
108    *
109    * RETURN
110    *   The string converted back into national charset encoding.
111    * EXCEPTIONS
112    *   miscellaneous runtime exceptions.
113    */
114   FUNCTION raw_to_nchar(data IN RAW,
115                         src_charset IN VARCHAR2 DEFAULT NULL)
116                         RETURN NVARCHAR2;
117 
118  /**
119    * Convert a raw buffer which is encode in another charset
120    * back to NVARCHAR2 string and return the shift status and
121    * scanned length for the input. Those information can be used
122    * into piece wise conversion.
123    *
124    * For example, utl_i18n.raw_to_char(hextoraw('616263646566C2AA',
125    *  'utf8', shf, slen) will return a string 'abcde'||chr(170) and
126    * set shf=8, slen = SHIFT_IN.
127    * If user inputs an invalid character set or an empty raw buffer,
128    * an empty string will be returned.
129    *
130    * PARAMETERS
131    *   data           The input byte arrays in raw.
132    *   src_charset    The source charset raw data is converted from.
133    *   scanned_length The scanned byte of input raw data. (OUT)
134    *   shift_status   The shift status at the end of this scan. (IN/OUT)
135    *                  User must set this variable to be SHIFT_IN the first
136    *                  time it is called in piece wise cnversion.
137    * RETURN
138    *   The string converted back into database charset encoding.
139    * EXCEPTIONS
140    *   miscellaneous runtime exceptions.
141    */
142   Function raw_to_char(data IN RAW,
143                        src_charset IN VARCHAR2 DEFAULT NULL,
144                        scanned_length OUT PLS_INTEGER,
145                        shift_status IN OUT PLS_INTEGER)
146                        RETURN VARCHAR2;
147 
148  /**
149    * Convert a raw buffer which is encode in another charset
150    * back to VARCHAR2 string and return the shift status and
151    * scanned length for the input. Those information can be used
152    * into piece wise conversion.
153    *
154    * For example, utl_i18n.raw_to_nchar(hextoraw('616263646566C2AA',
155    *  'utf8', shf, slen) will return a string 'abcde'||chr(170) and
156    * set shf=8, slen = SHIFT_IN.
157    * If user inputs an invalid character set or an empty raw buffer,
158    * an empty string will be returned.
159    *
160    * PARAMETERS
161    *   data           The input byte arrays in raw.
162    *   src_charset    The source charset raw data is converted from.
163    *   scanned_length The scanned byte of input raw data. (OUT)
164    *   shift_status   The shift status at the end of this scan. (IN/OUT)
165    *                  User must set this variable to be SHIFT_IN the first
166    *                  time it is called in piece wise cnversion.
167    * RETURN
168    *   The string converted back into national charset encoding.
169    * EXCEPTIONS
170    *   miscellaneous runtime exceptions.
171    */
172   Function raw_to_nchar(data IN RAW,
173                         src_charset IN VARCHAR2 DEFAULT NULL,
174                         scanned_length OUT PLS_INTEGER,
175                         shift_status IN OUT PLS_INTEGER)
176                         RETURN NVARCHAR2;
177 
178  /**
179    * Escape a VARCHAR2/NVARCHAR2 to a character reference represenation
180    * Two kinds of characters will be converted
181    *   (1) The predefined character which has special meaning
182    *       For example,  &, <, > etc.
183    *   (2) Multibyte character which can not be converted to
184    *       web page character set
185    *
186    * For example, utl_i18n.escape_reference('ab'||chr(170), 'us7ascii')
187    *  will return a string 'abª'.
188    * If user inputs an invalid character set or an empty string,
189    * an empty string will be returned.
190    *
191    * PARAMETERS
192    *   str            The input string to escape.
193    *   page_cs_name   The name of webpage encoding character set.
194    * RETURN
195    *   The string escaped to character reference representation.
196    * EXCEPTIONS
197    *   miscellaneous runtime exceptions.
198    */
199   Function escape_reference(str IN VARCHAR2 CHARACTER SET ANY_CS,
200                             page_cs_name IN VARCHAR2 DEFAULT NULL)
201                             RETURN VARCHAR2 CHARACTER SET str%CHARSET;
202 
203 
204  /**
205    * Unescape a VARCHAR2/NVARCHAR2 from character reference represenation
206    *
207    * For example, utl_i18n.escape_unreference('abª')
208    *  will return a string 'ab'||chr(170).
209    * If input is an empty string, an empty string will be returned.
210    *
211    * PARAMETERS
212    *   str            The input string to unescape.
213    * RETURN
214    *   The string unescaped from character reference representation.
215    * EXCEPTIONS
216    *   miscellaneous runtime exceptions.
217    */
218   Function unescape_reference(str IN VARCHAR2 CHARACTER SET ANY_CS)
219                              RETURN VARCHAR2 CHARACTER SET str%CHARSET;
220 
221   /**
222    * Map ORACLE character set name to IANA name and vice versa or map
223    * a generic character set to a MAIL character set. For example,
224    * utl_i18n.charset_map('iso-8859-p1',utl_i18n.GENERIC_CONTEXT,
225    * utl_i18n.IANA_TO_ORACLE) will return 'WE8ISO8859P1'. If user
226    * inputs an invalid character set or invalid flag name, an empty
227    * string will be returned. If user does not specify the flag,
228    * we will use "ORACLE_TO_IANA" as the default flag. For example,
229    * if user does not specify the conversion direction, we will always assume
230    * that the current string uses Oracle standard.
231    *
232    * PARAMETERS
233    *   charset  The character set name to map. The mapping is
234    *             case-insensitive.
235    *   context   GENERIC_CONTEXT - map bewteen ORACLE and IANA
236    *             MAIL_CONTEXT    - map bewteen generic character set to
237    *                                 MAIL character set
238    *   flag      ORACLE_TO_IANA  - map from ORACLE name to IANA name.
239    *             IANA_TO_ORACLE  - map from IANA name to ORACLE name.
240    * RETURN
241    *   The mapped character set name if a match is found. NULL if no match
242    *   is found or the flag is invalid.
243    * EXCEPTIONS
244    *   miscellaneous runtime exceptions.
245    */
246   FUNCTION map_charset(charset  IN VARCHAR2,
247                        context  IN PLS_INTEGER DEFAULT GENERIC_CONTEXT,
248                        flag     IN PLS_INTEGER DEFAULT ORACLE_TO_IANA)
249                     RETURN VARCHAR2;
250 
251  /**
252    * Get ORACLE langugage name from an isolocale
253    *
254    * For example, utl_i18n.map_language_from_iso('en_US') will return
255    * 'American'.
256    * If user inputs an invalid locale string, an empty string will be
257    * returned.
258    *
259    * PARAMETERS
260    *   isolocale  The iso locale string to map. The mapping is
261    *             case-insensitive.
262    *
263    * RETURN
264    *   The mapped language name if found. NULL if locale is invalid
265    * EXCEPTIONS
266    *   miscellaneous runtime exceptions.
267    */
268    Function map_language_from_iso(isolocale IN VARCHAR2)
269                     RETURN VARCHAR2;
270 
271  /**
272    * Get ORACLE territory name from an isolocale
273    *
274    * For example, utl_i18n.map_territory_from_iso('en_US') will return
275    * 'America'.
276    * If user inputs an invalid locale string, an empty string will be
277    * returned.
278    *
279    * PARAMETERS
280    *   isolocale  The iso locale string to map. The mapping is
281    *             case-insensitive.
282    *
283    * RETURN
284    *   The mapped territory name if found. NULL if locale is invalid
285    * EXCEPTIONS
286    *   miscellaneous runtime exceptions.
287    */
288   Function map_territory_from_iso(isolocale IN VARCHAR2)
289                     RETURN VARCHAR2;
290 
291  /**
292    * Get ISO locale from an oracle language and an oracle territory
293    *
294    * For example, utl_i18n.map_territory_from_iso('American', 'America')
295    * will return 'en_US'.
296    * If user inputs an invalid string, an empty string will be returned.
297    *
298    * PARAMETERS
299    *   ora_language  The ORACLE language string. It is case-insensitive.
300    *   ora_territory The ORACLE territory string. It is case-insensitive.
301    *
302    * RETURN
303    *   The mapped iso locale string if success. NULL if language or
304    *   territory is invalid
305    * EXCEPTIONS
306    *   miscellaneous runtime exceptions.
307    */
308   Function map_locale_to_iso(ora_language  IN VARCHAR2,
309                              ora_territory IN VARCHAR2)
310                     RETURN VARCHAR2;
311 
312   /**
313    * Get default ORACLE character set name from a language for general
314    * cases or only for MAIL application.
315    *
316    * For example, utl_i18n.get_default_charset('French',
317    * utl_i18n.GENERIC_CONTEXT, FALSE) will return 'WE8ISO8859P1'.
318    * If user inputs an invalid character set or invalid flag name,
319    * an empty string will be returned.
320    *
321    * PARAMETERS
322    *   language  The language name to map. The mapping is
323    *             case-insensitive.
324    *   context   GENERIC_CONTEXT - get default charset for general cases
325    *             MAIL_CONTEXT    - get default charset used in MAIL
326    *                               application
327    *   iswindow  When MAIL_CONTEXT is set, the MAIL charset used is different
328    *             in windows platform and other platform.
329    *             If GENERIC_CONTEXT is set, this variable has no effect
330    *
331    * RETURN
332    *   The default character set name if a match is found. NULL if no match
333    *   is found or the flag is invalid.
334    * EXCEPTIONS
335    *   miscellaneous runtime exceptions.
336    */
337   Function get_default_charset(language  IN VARCHAR2,
338                                context   IN PLS_INTEGER DEFAULT GENERIC_CONTEXT,
339                                iswindows IN BOOLEAN DEFAULT FALSE)
340                     RETURN VARCHAR2;
341 
342 
343   /**
344    * validate oracle object name
345    *
346    * PARAMETERS
347    *   name  the oracle object name to be validated
348    *
349    * RETURN
350    *   the index of first invalid character.
351    *   returns 0 if it is a valid SQL name
352    * EXCEPTIONS
353    *   none
354    */
355 
356 Function VALIDATE_SQLNAME(name VARCHAR2 CHARACTER SET ANY_CS)
357          RETURN PLS_INTEGER;
358 
359 
360 
361   /**
362    * convert between XML name and SQL identifier.
363    * conversion rules are as following:
364    *  SQLNAME_TO_XMLTAG:   SQL identifier -> XML name
365    *
366    *                        A character can be escaped into _xHHHH_ or
367    *                        _xHHHHHHHH_, where HHHH is the uppercase hexadecimal
368    *                        UCS2 representation of the character if it is in
369    *                        the UCS2 range, and HHHHHHHH is the uppercase
370    *                        hexadecimal UCS4 representation of the character
371    *                        if it is out of UCS2 range but in UCS4 range.
372    *
373    *                        The encoding is based on following rules,
374    *                        (XML standard 1.0, Second Edition,
375    *                         SQL/XML candidate base document, 09-FEB-2001)
376    *
377    *                        (1) If the 1st character of the SQL identifier is
381    *                            m or M, l or L, add a leading _xFFFF_ before
378    *                            not a valid 1st character of XML names, escape
379    *                            it into _xHHHH_ or _xHHHHHHHH_
380    *                        (2) If the leading three characters are x or X,
382    *                            these three characters
383    *                        (3) If a character is ':', escape it to _x003A_
384    *                        (4) If it is a '_' followed by a 'x', escape the '_'
385    *                            to _x005F_
386    *                        (5) If a character is not the 1st character of the
387    *                            SQL identifier and it is not a valid XML name
388    *                            character, escape it to _xHHHH_ or _xHHHHHHHH_
389    *
390    * XMLTAG_TO_SQLNAME:   XML name -> SQL identifier
391    *
392    *                        (1) If the XML name has a leading _xFFFF_, skip it
393    *                        (2) convert those escaped characters, which are in
394    *                            a format of _xHHHH_ or _xHHHHHHHH_, back into
395    *                            its corresponding character encode in the give
396    *                            character set
397    *
398    *
399    * PARAMETERS
400    *   name    the name to be converted;
401    *   flag    which way the conversion goes
402    *           XMLTAG_TO_SQLNAME -- from xml name to sql identifier
403    *           SQLNAME_TO_XMLTAG -- from sql identifier to xml name
404    *
405    * RETURN
406    *   the converted name
407    * EXCEPTIONS
408    *   27102 --  out of memory
409    *   1722  --  invalid number,
410    *             cause: during XMLTAG_TO_SQLNAME
411    *             the escaping format is invalid
412    *             either the number format after _x is not a valid number
413    *             or there is no "_" appended after _xHHHH
414    */
415 
416 Function ENCODE_SQL_XML(name VARCHAR2 CHARACTER SET ANY_CS,
417                         flag PLS_INTEGER default XMLTAG_TO_SQLNAME)
418          RETURN VARCHAR2 CHARACTER SET name%CHARSET;
419 
420 
421 
422 
423 
424   /**
425    *  This function is to perform script transliteration.
426    *  In 10GR2, only supports conversions between
427    *  Japanese Hiragana and Katakana characters
428    *
429    * PARAMETERS
430    *  data:  the data to be converted. Either CHAR or NCHAR data type
431    *  name:  the transliteration name
432    *
433    * RETURNS
434    *  The converted string.
435    *
436    * EXCEPTIONS
437    *  3001:  unsupported feature,
438    *         means the specified transliteration is not supported
439    *  27102: out of memory
440    */
441 
442 
443 Function  TRANSLITERATE (
444   data IN VARCHAR2 CHARACTER SET ANY_CS,
445   name IN VARCHAR2)
446 RETURN VARCHAR2 CHARACTER SET data%CHARSET;
447 
448 
449 
450   /**
451    * returns the default linguistic sorting name for the specified language
452    *
453    * PARAMETERS
454    *   language the Oracle language name. Case-insensitive
455    *
456    * RETURN
457    *   the default linguistic sorting name. NULL if the given language
458    *   is invalid.
459    *
460    * EXCEPTIONS
461    *   none
462    */
463 Function GET_DEFAULT_LINGUISTIC_SORT(
464   language IN VARCHAR2 )
465 RETURN VARCHAR2;
466 
467   /**
468    * returns the default ISO 4217 currency code for the specified territory
469    *
470    * PARAMETERS
471    *   territory the Oracle territory name. Case-insensitive
472    *
473    * RETURN
474    *   the default ISO 4217 currency code. NULL if the given territory
475    *   is invalid.
476    *
477    * EXCEPTIONS
478    *   none
479    */
480 Function GET_DEFAULT_ISO_CURRENCY(
481   territory IN VARCHAR2 )
482 RETURN VARCHAR2;
483 
484   /**
485    * returns the local linguistic sorting names for the specified language
486    *
487    * PARAMETERS
488    *   language the Oracle language name. Case-insensitive
489    *
490    * RETURN
491    *   the list of local linguistic sorting names. NULL if the given language
492    *   is invalid.
493    *
494    * EXCEPTIONS
495    *   none
496    */
497 Function GET_LOCAL_LINGUISTIC_SORTS(
498   language IN VARCHAR2 )
499 RETURN string_array;
500 
501   /**
502    * returns the local time zone names for the specified territory
503    *
504    * PARAMETERS
505    *   territory the Oracle territory name. Case-insensitive.
506    *
507    * RETURN
508    *   the list of local time zone names. NULL if the given territory
509    *   is invalid.
510    *
511    * EXCEPTIONS
512    *   none
513    */
514 Function GET_LOCAL_TIME_ZONES(
515   territory IN VARCHAR2 )
516 RETURN string_array;
517 
518   /**
519    * returns the common time zone names
520    *
521    * RETURN
522    *   the list of common time zone names
523    *
524    * EXCEPTIONS
525    *   none
526    */
527 Function GET_COMMON_TIME_ZONES
528 RETURN string_array;
529 
530   /**
531    * returns the local territory names for the specified language
532    *
533    * PARAMETERS
537    *   the list of local territory names. NULL if the given language is
534    *   language the Oracle language name. Case-insensitive
535    *
536    * RETURN
538    *   invalid.
539    *
540    * EXCEPTIONS
541    *   none
542    */
543 Function GET_LOCAL_TERRITORIES(
544   language IN VARCHAR2 )
545 RETURN string_array;
546 
547   /**
548    * returns the local language names for the specified territory
549    *
550    * PARAMETERS
551    *   territory the Oracle territory name. Case-insensitive
552    *
553    * RETURN
554    *   the list of local language names. NULL if the given territory is
555    *   invalid.
556    *
557    * EXCEPTIONS
558    *   none
559    */
560 Function GET_LOCAL_LANGUAGES(
561   territory IN VARCHAR2 )
562 RETURN string_array;
563 
564 
565   /**
566    * maps an Oracle full language name to short language name
567    *
568    * PARAMETERS
569    *   language an Oracle full language name
570    *
571    * RETURN
572    *   the corresponding Oracle short language name
573    */
574 Function MAP_TO_SHORT_LANGUAGE(
575   language IN VARCHAR2)
576 RETURN VARCHAR2;
577 
578   /**
579    * maps an Oracle short language name and full language name
580    *
581    * PARAMETERS
582    *   language the Oracle short language name
583    *
584    * RETURN
585    *   The corresponding Oracle full language name
586    */
587 Function MAP_FROM_SHORT_LANGUAGE(
588   language IN VARCHAR2)
589 RETURN VARCHAR2;
590 
591   /**
592    * returns the translation of the language and territory name in the
593    * translation language
594    *
595    * PARAMETERS
596    *   param1    a valid language name, territory name, or combined string
597    *             in the form of '<language>_<territory>'. Case-insensitive.
598    *   trans_language a translation language name, e.g., ITALIAN for the
599    *             Italian translation. The default translation is 'AMERICAN'.
600    *   flag      a translation type:
601    *           - LANGUAGE_TRANS  - the language translation
602    *           - TERRITORY_TRANS - the territory translation
603    *           - LANGUAGE_TERRITORY_TRANS - the language and territory
604    *                                        translation
605    *             the default translation type is LANGUAGE_TRANS
606    *
607    * RETURN
608    *   The translation
609    */
610 Function GET_TRANSLATION(
611   param1         IN VARCHAR2 CHARACTER SET ANY_CS,
612   trans_language IN VARCHAR2 DEFAULT 'AMERICAN',
613   flag           IN PLS_INTEGER DEFAULT LANGUAGE_TRANS)
614 RETURN VARCHAR2 CHARACTER SET param1%CHARSET;
615 
616   /**
617    * returns the max character size of a given character set
618    *
619    * PARAMETERS
620    *   charset_name  A case-insensitive but valid character set name
621    *
622    * RETURN
623    *   The max character size
624    */
625 FUNCTION get_max_character_size(charset_name IN VARCHAR2)
626 RETURN PLS_INTEGER;
627 
628 END utl_i18n;