1 PACKAGE utl_i18n AS
2
3 /* GDK (Globalization Development Kit) is a set of services that can help
4 * monolingual application developers to create similar multilingual
5 * application with minimal knowledge about internationalization issues.
6 * Oracle globalization services provide developers a set of consistent,
7 * high performance and easy to use tools.
8 */
9
10 /* Convert function constant */
11 SHIFT_IN CONSTANT PLS_INTEGER :=0;
12 SHIFT_OUT CONSTANT PLS_INTEGER :=1;
13
14 /* Miscellaneous flags used by the locale-mapping API */
15 ORACLE_TO_IANA CONSTANT PLS_INTEGER := 0;
16 IANA_TO_ORACLE CONSTANT PLS_INTEGER := 1;
17
18 MAIL_GENERIC CONSTANT PLS_INTEGER := 0;
19 MAIL_WINDOWS CONSTANT PLS_INTEGER := 1;
20
21 GENERIC_CONTEXT CONSTANT PLS_INTEGER := 0;
22 MAIL_CONTEXT CONSTANT PLS_INTEGER := 1;
23
24 /* ENCODE_SQL_XML function constant */
25 XMLTAG_TO_SQLNAME CONSTANT PLS_INTEGER :=0;
26 SQLNAME_TO_XMLTAG CONSTANT PLS_INTEGER :=1;
27
28 /* for transliteration */
29 KANA_FWKATAKANA CONSTANT VARCHAR2(30) := 'kana_fwkatakana';
30 KANA_HWKATAKANA CONSTANT VARCHAR2(30) := 'kana_hwkatakana';
31 KANA_HIRAGANA CONSTANT VARCHAR2(30) := 'kana_hiragana' ;
32 FWKATAKANA_HWKATAKANA CONSTANT VARCHAR2(30) := 'fwkatakana_hwkatakana' ;
33 FWKATAKANA_HIRAGANA CONSTANT VARCHAR2(30) := 'fwkatakana_hiragana' ;
34 HWKATAKANA_FWKATAKANA CONSTANT VARCHAR2(30) := 'hwkatakana_fwkatakana';
35 HWKATAKANA_HIRAGANA CONSTANT VARCHAR2(30) := 'hwkatakana_hiragana' ;
36 HIRAGANA_FWKATAKANA CONSTANT VARCHAR2(30) := 'hiragana_fwkatakana';
37 HIRAGANA_HWKATAKANA CONSTANT VARCHAR2(30) := 'hiragana_hwkatakana';
38
39 /* pre-defined exceptions */
40 UNSUPPORTED_TRANSLITERATION EXCEPTION;
41 PRAGMA EXCEPTION_INIT(UNSUPPORTED_TRANSLITERATION, -3001);
42
43 -- translation flag for GET_TRANSLATION --
44 LANGUAGE_TRANS CONSTANT PLS_INTEGER :=0;
45 TERRITORY_TRANS CONSTANT PLS_INTEGER :=1;
46 LANGUAGE_TERRITORY_TRANS CONSTANT PLS_INTEGER :=2;
47
48 /* List of String data type */
49 TYPE string_array IS TABLE of VARCHAR2(32767)
50 INDEX BY BINARY_INTEGER;
51
52 /**
53 * Convert a VARCHAR2/NVARCHAR2 string to another charset
54 * return the result in RAW variable
55 *
56 * For example, utl_i18n.string_to_raw('abcde'||chr(170), 'utf8')
57 * will return a raw of hex value '616263646566C2AA'.
58 * If user inputs an invalid character set or an empty input string,
59 * an empty string will be returned.
60 *
61 * PARAMETERS
62 * data The input VARCHAR2/NVARCHAR to convert.
63 * dst_charset The destination charset to be converted to.
64 *
65 * RETURN
66 * The byte string after conversion in raw format
67 * EXCEPTIONS
68 * miscellaneous runtime exceptions.
69 */
70 FUNCTION string_to_raw(data IN VARCHAR2 CHARACTER SET ANY_CS,
71 dst_charset IN VARCHAR2 DEFAULT NULL)
72 RETURN RAW;
73
74 /**
75 * Convert a raw buffer which is encode in another charset
76 * back to VARCHAR2 string.
77 *
78 * For example, utl_i18n.raw_to_char(hextoraw('616263646566C2AA', 'utf8')
79 * will return a string (encoded in database charset) 'abcde'||chr(170).
80 * If user inputs an invalid character set or an empty raw buffer,
81 * an empty string will be returned.
82 *
83 * PARAMETERS
84 * data The input byte arrays in raw.
85 * src_charset The source charset raw data is converted from.
86 *
87 * RETURN
88 * The string converted back into database charset encoding.
89 * EXCEPTIONS
90 * miscellaneous runtime exceptions.
91 */
92 FUNCTION raw_to_char(data IN RAW,
93 src_charset IN VARCHAR2 DEFAULT NULL)
94 RETURN VARCHAR2;
95
96 /**
97 * Convert a raw buffer which is encode in another charset
98 * back to NVARCHAR2 string.
99 *
100 * For example, utl_i18n.raw_to_nchar(hextoraw('616263646566C2AA', 'utf8')
101 * will return a string (encoded in nation charset) 'abcde'||chr(170).
102 * If user inputs an invalid character set or an empty raw buffer,
103 * an empty string will be returned.
104 *
105 * PARAMETERS
106 * data The input byte arrays in raw.
107 * src_charset The source charset raw data is converted from.
108 *
109 * RETURN
110 * The string converted back into national charset encoding.
111 * EXCEPTIONS
112 * miscellaneous runtime exceptions.
113 */
114 FUNCTION raw_to_nchar(data IN RAW,
115 src_charset IN VARCHAR2 DEFAULT NULL)
116 RETURN NVARCHAR2;
117
118 /**
119 * Convert a raw buffer which is encode in another charset
120 * back to NVARCHAR2 string and return the shift status and
121 * scanned length for the input. Those information can be used
122 * into piece wise conversion.
123 *
124 * For example, utl_i18n.raw_to_char(hextoraw('616263646566C2AA',
125 * 'utf8', shf, slen) will return a string 'abcde'||chr(170) and
126 * set shf=8, slen = SHIFT_IN.
127 * If user inputs an invalid character set or an empty raw buffer,
128 * an empty string will be returned.
129 *
130 * PARAMETERS
131 * data The input byte arrays in raw.
132 * src_charset The source charset raw data is converted from.
133 * scanned_length The scanned byte of input raw data. (OUT)
134 * shift_status The shift status at the end of this scan. (IN/OUT)
135 * User must set this variable to be SHIFT_IN the first
136 * time it is called in piece wise cnversion.
137 * RETURN
138 * The string converted back into database charset encoding.
139 * EXCEPTIONS
140 * miscellaneous runtime exceptions.
141 */
142 Function raw_to_char(data IN RAW,
143 src_charset IN VARCHAR2 DEFAULT NULL,
144 scanned_length OUT PLS_INTEGER,
145 shift_status IN OUT PLS_INTEGER)
146 RETURN VARCHAR2;
147
148 /**
149 * Convert a raw buffer which is encode in another charset
150 * back to VARCHAR2 string and return the shift status and
151 * scanned length for the input. Those information can be used
152 * into piece wise conversion.
153 *
154 * For example, utl_i18n.raw_to_nchar(hextoraw('616263646566C2AA',
155 * 'utf8', shf, slen) will return a string 'abcde'||chr(170) and
156 * set shf=8, slen = SHIFT_IN.
157 * If user inputs an invalid character set or an empty raw buffer,
158 * an empty string will be returned.
159 *
160 * PARAMETERS
161 * data The input byte arrays in raw.
162 * src_charset The source charset raw data is converted from.
163 * scanned_length The scanned byte of input raw data. (OUT)
164 * shift_status The shift status at the end of this scan. (IN/OUT)
165 * User must set this variable to be SHIFT_IN the first
166 * time it is called in piece wise cnversion.
167 * RETURN
168 * The string converted back into national charset encoding.
169 * EXCEPTIONS
170 * miscellaneous runtime exceptions.
171 */
172 Function raw_to_nchar(data IN RAW,
173 src_charset IN VARCHAR2 DEFAULT NULL,
174 scanned_length OUT PLS_INTEGER,
175 shift_status IN OUT PLS_INTEGER)
176 RETURN NVARCHAR2;
177
178 /**
179 * Escape a VARCHAR2/NVARCHAR2 to a character reference represenation
180 * Two kinds of characters will be converted
181 * (1) The predefined character which has special meaning
182 * For example, &, <, > etc.
183 * (2) Multibyte character which can not be converted to
184 * web page character set
185 *
186 * For example, utl_i18n.escape_reference('ab'||chr(170), 'us7ascii')
187 * will return a string 'abª'.
188 * If user inputs an invalid character set or an empty string,
189 * an empty string will be returned.
190 *
191 * PARAMETERS
192 * str The input string to escape.
193 * page_cs_name The name of webpage encoding character set.
194 * RETURN
195 * The string escaped to character reference representation.
196 * EXCEPTIONS
197 * miscellaneous runtime exceptions.
198 */
199 Function escape_reference(str IN VARCHAR2 CHARACTER SET ANY_CS,
200 page_cs_name IN VARCHAR2 DEFAULT NULL)
201 RETURN VARCHAR2 CHARACTER SET str%CHARSET;
202
203
204 /**
205 * Unescape a VARCHAR2/NVARCHAR2 from character reference represenation
206 *
207 * For example, utl_i18n.escape_unreference('abª')
208 * will return a string 'ab'||chr(170).
209 * If input is an empty string, an empty string will be returned.
210 *
211 * PARAMETERS
212 * str The input string to unescape.
213 * RETURN
214 * The string unescaped from character reference representation.
215 * EXCEPTIONS
216 * miscellaneous runtime exceptions.
217 */
218 Function unescape_reference(str IN VARCHAR2 CHARACTER SET ANY_CS)
219 RETURN VARCHAR2 CHARACTER SET str%CHARSET;
220
221 /**
222 * Map ORACLE character set name to IANA name and vice versa or map
223 * a generic character set to a MAIL character set. For example,
224 * utl_i18n.charset_map('iso-8859-p1',utl_i18n.GENERIC_CONTEXT,
225 * utl_i18n.IANA_TO_ORACLE) will return 'WE8ISO8859P1'. If user
226 * inputs an invalid character set or invalid flag name, an empty
227 * string will be returned. If user does not specify the flag,
228 * we will use "ORACLE_TO_IANA" as the default flag. For example,
229 * if user does not specify the conversion direction, we will always assume
230 * that the current string uses Oracle standard.
231 *
232 * PARAMETERS
233 * charset The character set name to map. The mapping is
234 * case-insensitive.
235 * context GENERIC_CONTEXT - map bewteen ORACLE and IANA
236 * MAIL_CONTEXT - map bewteen generic character set to
237 * MAIL character set
238 * flag ORACLE_TO_IANA - map from ORACLE name to IANA name.
239 * IANA_TO_ORACLE - map from IANA name to ORACLE name.
240 * RETURN
241 * The mapped character set name if a match is found. NULL if no match
242 * is found or the flag is invalid.
243 * EXCEPTIONS
244 * miscellaneous runtime exceptions.
245 */
246 FUNCTION map_charset(charset IN VARCHAR2,
247 context IN PLS_INTEGER DEFAULT GENERIC_CONTEXT,
248 flag IN PLS_INTEGER DEFAULT ORACLE_TO_IANA)
249 RETURN VARCHAR2;
250
251 /**
252 * Get ORACLE langugage name from an isolocale
253 *
254 * For example, utl_i18n.map_language_from_iso('en_US') will return
255 * 'American'.
256 * If user inputs an invalid locale string, an empty string will be
257 * returned.
258 *
259 * PARAMETERS
260 * isolocale The iso locale string to map. The mapping is
261 * case-insensitive.
262 *
263 * RETURN
264 * The mapped language name if found. NULL if locale is invalid
265 * EXCEPTIONS
266 * miscellaneous runtime exceptions.
267 */
268 Function map_language_from_iso(isolocale IN VARCHAR2)
269 RETURN VARCHAR2;
270
271 /**
272 * Get ORACLE territory name from an isolocale
273 *
274 * For example, utl_i18n.map_territory_from_iso('en_US') will return
275 * 'America'.
276 * If user inputs an invalid locale string, an empty string will be
277 * returned.
278 *
279 * PARAMETERS
280 * isolocale The iso locale string to map. The mapping is
281 * case-insensitive.
282 *
283 * RETURN
284 * The mapped territory name if found. NULL if locale is invalid
285 * EXCEPTIONS
286 * miscellaneous runtime exceptions.
287 */
288 Function map_territory_from_iso(isolocale IN VARCHAR2)
289 RETURN VARCHAR2;
290
291 /**
292 * Get ISO locale from an oracle language and an oracle territory
293 *
294 * For example, utl_i18n.map_territory_from_iso('American', 'America')
295 * will return 'en_US'.
296 * If user inputs an invalid string, an empty string will be returned.
297 *
298 * PARAMETERS
299 * ora_language The ORACLE language string. It is case-insensitive.
300 * ora_territory The ORACLE territory string. It is case-insensitive.
301 *
302 * RETURN
303 * The mapped iso locale string if success. NULL if language or
304 * territory is invalid
305 * EXCEPTIONS
306 * miscellaneous runtime exceptions.
307 */
308 Function map_locale_to_iso(ora_language IN VARCHAR2,
309 ora_territory IN VARCHAR2)
310 RETURN VARCHAR2;
311
312 /**
313 * Get default ORACLE character set name from a language for general
314 * cases or only for MAIL application.
315 *
316 * For example, utl_i18n.get_default_charset('French',
317 * utl_i18n.GENERIC_CONTEXT, FALSE) will return 'WE8ISO8859P1'.
318 * If user inputs an invalid character set or invalid flag name,
319 * an empty string will be returned.
320 *
321 * PARAMETERS
322 * language The language name to map. The mapping is
323 * case-insensitive.
324 * context GENERIC_CONTEXT - get default charset for general cases
325 * MAIL_CONTEXT - get default charset used in MAIL
326 * application
327 * iswindow When MAIL_CONTEXT is set, the MAIL charset used is different
328 * in windows platform and other platform.
329 * If GENERIC_CONTEXT is set, this variable has no effect
330 *
331 * RETURN
332 * The default character set name if a match is found. NULL if no match
333 * is found or the flag is invalid.
334 * EXCEPTIONS
335 * miscellaneous runtime exceptions.
336 */
337 Function get_default_charset(language IN VARCHAR2,
338 context IN PLS_INTEGER DEFAULT GENERIC_CONTEXT,
339 iswindows IN BOOLEAN DEFAULT FALSE)
340 RETURN VARCHAR2;
341
342
343 /**
344 * validate oracle object name
345 *
346 * PARAMETERS
347 * name the oracle object name to be validated
348 *
349 * RETURN
350 * the index of first invalid character.
351 * returns 0 if it is a valid SQL name
352 * EXCEPTIONS
353 * none
354 */
355
356 Function VALIDATE_SQLNAME(name VARCHAR2 CHARACTER SET ANY_CS)
357 RETURN PLS_INTEGER;
358
359
360
361 /**
362 * convert between XML name and SQL identifier.
363 * conversion rules are as following:
364 * SQLNAME_TO_XMLTAG: SQL identifier -> XML name
365 *
366 * A character can be escaped into _xHHHH_ or
367 * _xHHHHHHHH_, where HHHH is the uppercase hexadecimal
368 * UCS2 representation of the character if it is in
369 * the UCS2 range, and HHHHHHHH is the uppercase
370 * hexadecimal UCS4 representation of the character
371 * if it is out of UCS2 range but in UCS4 range.
372 *
373 * The encoding is based on following rules,
374 * (XML standard 1.0, Second Edition,
375 * SQL/XML candidate base document, 09-FEB-2001)
376 *
377 * (1) If the 1st character of the SQL identifier is
381 * m or M, l or L, add a leading _xFFFF_ before
378 * not a valid 1st character of XML names, escape
379 * it into _xHHHH_ or _xHHHHHHHH_
380 * (2) If the leading three characters are x or X,
382 * these three characters
383 * (3) If a character is ':', escape it to _x003A_
384 * (4) If it is a '_' followed by a 'x', escape the '_'
385 * to _x005F_
386 * (5) If a character is not the 1st character of the
387 * SQL identifier and it is not a valid XML name
388 * character, escape it to _xHHHH_ or _xHHHHHHHH_
389 *
390 * XMLTAG_TO_SQLNAME: XML name -> SQL identifier
391 *
392 * (1) If the XML name has a leading _xFFFF_, skip it
393 * (2) convert those escaped characters, which are in
394 * a format of _xHHHH_ or _xHHHHHHHH_, back into
395 * its corresponding character encode in the give
396 * character set
397 *
398 *
399 * PARAMETERS
400 * name the name to be converted;
401 * flag which way the conversion goes
402 * XMLTAG_TO_SQLNAME -- from xml name to sql identifier
403 * SQLNAME_TO_XMLTAG -- from sql identifier to xml name
404 *
405 * RETURN
406 * the converted name
407 * EXCEPTIONS
408 * 27102 -- out of memory
409 * 1722 -- invalid number,
410 * cause: during XMLTAG_TO_SQLNAME
411 * the escaping format is invalid
412 * either the number format after _x is not a valid number
413 * or there is no "_" appended after _xHHHH
414 */
415
416 Function ENCODE_SQL_XML(name VARCHAR2 CHARACTER SET ANY_CS,
417 flag PLS_INTEGER default XMLTAG_TO_SQLNAME)
418 RETURN VARCHAR2 CHARACTER SET name%CHARSET;
419
420
421
422
423
424 /**
425 * This function is to perform script transliteration.
426 * In 10GR2, only supports conversions between
427 * Japanese Hiragana and Katakana characters
428 *
429 * PARAMETERS
430 * data: the data to be converted. Either CHAR or NCHAR data type
431 * name: the transliteration name
432 *
433 * RETURNS
434 * The converted string.
435 *
436 * EXCEPTIONS
437 * 3001: unsupported feature,
438 * means the specified transliteration is not supported
439 * 27102: out of memory
440 */
441
442
443 Function TRANSLITERATE (
444 data IN VARCHAR2 CHARACTER SET ANY_CS,
445 name IN VARCHAR2)
446 RETURN VARCHAR2 CHARACTER SET data%CHARSET;
447
448
449
450 /**
451 * returns the default linguistic sorting name for the specified language
452 *
453 * PARAMETERS
454 * language the Oracle language name. Case-insensitive
455 *
456 * RETURN
457 * the default linguistic sorting name. NULL if the given language
458 * is invalid.
459 *
460 * EXCEPTIONS
461 * none
462 */
463 Function GET_DEFAULT_LINGUISTIC_SORT(
464 language IN VARCHAR2 )
465 RETURN VARCHAR2;
466
467 /**
468 * returns the default ISO 4217 currency code for the specified territory
469 *
470 * PARAMETERS
471 * territory the Oracle territory name. Case-insensitive
472 *
473 * RETURN
474 * the default ISO 4217 currency code. NULL if the given territory
475 * is invalid.
476 *
477 * EXCEPTIONS
478 * none
479 */
480 Function GET_DEFAULT_ISO_CURRENCY(
481 territory IN VARCHAR2 )
482 RETURN VARCHAR2;
483
484 /**
485 * returns the local linguistic sorting names for the specified language
486 *
487 * PARAMETERS
488 * language the Oracle language name. Case-insensitive
489 *
490 * RETURN
491 * the list of local linguistic sorting names. NULL if the given language
492 * is invalid.
493 *
494 * EXCEPTIONS
495 * none
496 */
497 Function GET_LOCAL_LINGUISTIC_SORTS(
498 language IN VARCHAR2 )
499 RETURN string_array;
500
501 /**
502 * returns the local time zone names for the specified territory
503 *
504 * PARAMETERS
505 * territory the Oracle territory name. Case-insensitive.
506 *
507 * RETURN
508 * the list of local time zone names. NULL if the given territory
509 * is invalid.
510 *
511 * EXCEPTIONS
512 * none
513 */
514 Function GET_LOCAL_TIME_ZONES(
515 territory IN VARCHAR2 )
516 RETURN string_array;
517
518 /**
519 * returns the common time zone names
520 *
521 * RETURN
522 * the list of common time zone names
523 *
524 * EXCEPTIONS
525 * none
526 */
527 Function GET_COMMON_TIME_ZONES
528 RETURN string_array;
529
530 /**
531 * returns the local territory names for the specified language
532 *
533 * PARAMETERS
537 * the list of local territory names. NULL if the given language is
534 * language the Oracle language name. Case-insensitive
535 *
536 * RETURN
538 * invalid.
539 *
540 * EXCEPTIONS
541 * none
542 */
543 Function GET_LOCAL_TERRITORIES(
544 language IN VARCHAR2 )
545 RETURN string_array;
546
547 /**
548 * returns the local language names for the specified territory
549 *
550 * PARAMETERS
551 * territory the Oracle territory name. Case-insensitive
552 *
553 * RETURN
554 * the list of local language names. NULL if the given territory is
555 * invalid.
556 *
557 * EXCEPTIONS
558 * none
559 */
560 Function GET_LOCAL_LANGUAGES(
561 territory IN VARCHAR2 )
562 RETURN string_array;
563
564
565 /**
566 * maps an Oracle full language name to short language name
567 *
568 * PARAMETERS
569 * language an Oracle full language name
570 *
571 * RETURN
572 * the corresponding Oracle short language name
573 */
574 Function MAP_TO_SHORT_LANGUAGE(
575 language IN VARCHAR2)
576 RETURN VARCHAR2;
577
578 /**
579 * maps an Oracle short language name and full language name
580 *
581 * PARAMETERS
582 * language the Oracle short language name
583 *
584 * RETURN
585 * The corresponding Oracle full language name
586 */
587 Function MAP_FROM_SHORT_LANGUAGE(
588 language IN VARCHAR2)
589 RETURN VARCHAR2;
590
591 /**
592 * returns the translation of the language and territory name in the
593 * translation language
594 *
595 * PARAMETERS
596 * param1 a valid language name, territory name, or combined string
597 * in the form of '<language>_<territory>'. Case-insensitive.
598 * trans_language a translation language name, e.g., ITALIAN for the
599 * Italian translation. The default translation is 'AMERICAN'.
600 * flag a translation type:
601 * - LANGUAGE_TRANS - the language translation
602 * - TERRITORY_TRANS - the territory translation
603 * - LANGUAGE_TERRITORY_TRANS - the language and territory
604 * translation
605 * the default translation type is LANGUAGE_TRANS
606 *
607 * RETURN
608 * The translation
609 */
610 Function GET_TRANSLATION(
611 param1 IN VARCHAR2 CHARACTER SET ANY_CS,
612 trans_language IN VARCHAR2 DEFAULT 'AMERICAN',
613 flag IN PLS_INTEGER DEFAULT LANGUAGE_TRANS)
614 RETURN VARCHAR2 CHARACTER SET param1%CHARSET;
615
616 /**
617 * returns the max character size of a given character set
618 *
619 * PARAMETERS
620 * charset_name A case-insensitive but valid character set name
621 *
622 * RETURN
623 * The max character size
624 */
625 FUNCTION get_max_character_size(charset_name IN VARCHAR2)
626 RETURN PLS_INTEGER;
627
628 END utl_i18n;