diff options
Diffstat (limited to 'src/backend/utils/mb/mbutils.c')
-rw-r--r-- | src/backend/utils/mb/mbutils.c | 477 |
1 files changed, 0 insertions, 477 deletions
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c deleted file mode 100644 index 43fd05f5985..00000000000 --- a/src/backend/utils/mb/mbutils.c +++ /dev/null @@ -1,477 +0,0 @@ -/* - * This file contains public functions for conversion between - * client encoding and server internal encoding. - * (currently mule internal code (mic) is used) - * Tatsuo Ishii - * $Id: mbutils.c,v 1.27 2001/11/20 01:32:29 ishii Exp $ - */ -#include "postgres.h" - -#include "miscadmin.h" -#include "mb/pg_wchar.h" -#include "utils/builtins.h" - -/* - * We handle for actual FE and BE encoding setting encoding-identificator - * and encoding-name too. It prevent searching and conversion from encoding - * to encoding name in getdatabaseencoding() and other routines. - * - * Default is PG_SQL_ASCII encoding (but this is never used, because - * backend during startup init it by SetDatabaseEncoding()). - * - * Karel Zak (Aug 2001) - */ -static pg_enc2name *ClientEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]; -static pg_enc2name *DatabaseEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]; - -static to_mic_converter client_to_mic; /* something to MIC */ -static from_mic_converter client_from_mic; /* MIC to something */ -static to_mic_converter server_to_mic; /* something to MIC */ -static from_mic_converter server_from_mic; /* MIC to something */ - -/* - * find encoding table entry by encoding - */ -pg_enconv * -pg_get_enconv_by_encoding(int encoding) -{ - if (PG_VALID_ENCODING(encoding)) - { - Assert((&pg_enconv_tbl[encoding])->encoding == encoding); - return &pg_enconv_tbl[encoding]; - } - return 0; -} - -/* - * Find appropriate encoding conversion functions. If no such - * functions found, returns -1. - * - * Arguments: - * - * src, dest (in): source and destination encoding ids - * - * src_to_mic (out): pointer to a function which converts src to - * mic/unicode according to dest. if src == mic/unicode or no - * appropriate function found, set to 0. - * - * dest_from_mic (out): pointer to a function which converts - * mic/unicode to dest according to src. if dest == mic/unicode or no - * appropriate function found, set to 0. - */ -int -pg_find_encoding_converters(int src, int dest, - to_mic_converter *src_to_mic, - from_mic_converter *dest_from_mic) -{ - if (src == dest) - { /* src == dest? */ - *src_to_mic = *dest_from_mic = 0; - } - else if (src == PG_MULE_INTERNAL) - { /* src == MULE_INETRNAL? */ - *dest_from_mic = pg_get_enconv_by_encoding(dest)->from_mic; - if (*dest_from_mic == 0) - return (-1); - *src_to_mic = 0; - } - else if (dest == PG_MULE_INTERNAL) - { /* dest == MULE_INETRNAL? */ - *src_to_mic = pg_get_enconv_by_encoding(src)->to_mic; - if (*src_to_mic == 0) - return (-1); - *dest_from_mic = 0; - } - else if (src == PG_UTF8) - { /* src == UNICODE? */ - *dest_from_mic = pg_get_enconv_by_encoding(dest)->from_unicode; - if (*dest_from_mic == 0) - return (-1); - *src_to_mic = 0; - } - else if (dest == PG_UTF8) - { /* dest == UNICODE? */ - *src_to_mic = pg_get_enconv_by_encoding(src)->to_unicode; - if (*src_to_mic == 0) - return (-1); - *dest_from_mic = 0; - } - else - { - *src_to_mic = pg_get_enconv_by_encoding(src)->to_mic; - *dest_from_mic = pg_get_enconv_by_encoding(dest)->from_mic; - if (*src_to_mic == 0 || *dest_from_mic == 0) - return (-1); - } - return (0); -} - -/* - * set the client encoding. if encoding conversion between - * client/server encoding is not supported, returns -1 - */ -int -pg_set_client_encoding(int encoding) -{ - int current_server_encoding = DatabaseEncoding->encoding; - - if (!PG_VALID_FE_ENCODING(encoding)) - return (-1); - - if (pg_find_encoding_converters(encoding, current_server_encoding, &client_to_mic, &server_from_mic) < 0) - return (-1); - - ClientEncoding = &pg_enc2name_tbl[encoding]; - - Assert(ClientEncoding->encoding == encoding); - - if (pg_find_encoding_converters(current_server_encoding, encoding, &server_to_mic, &client_from_mic) < 0) - return (-1); - return 0; -} - -/* - * returns the current client encoding - */ -int -pg_get_client_encoding(void) -{ - Assert(ClientEncoding); - return (ClientEncoding->encoding); -} - -/* - * returns the current client encoding name - */ -const char * -pg_get_client_encoding_name(void) -{ - Assert(ClientEncoding); - return (ClientEncoding->name); -} - -/* - * Convert src encoding and returns it. Actual conversion is done by - * src_to_mic and dest_from_mic, which can be obtained by - * pg_find_encoding_converters(). The reason we require two conversion - * functions is that we have an intermediate encoding: MULE_INTERNAL - * Using intermediate encodings will reduce the number of functions - * doing encoding conversions. Special case is either src or dest is - * the intermediate encoding itself. In this case, you don't need src - * or dest (setting 0 will indicate there's no conversion - * function). Another case is you have direct-conversion function from - * src to dest. In this case either src_to_mic or dest_from_mic could - * be set to 0 also. - * - * Note that If src or dest is UNICODE, we have to do - * direct-conversion, since we don't support conversion bwteen UNICODE - * and MULE_INTERNAL, we cannot go through MULE_INTERNAL. - * - * CASE 1: if no conversion is required, then the given pointer s is returned. - * - * CASE 2: if conversion is required, a palloc'd string is returned. - * - * Callers must check whether return value differs from passed value - * to determine whether to pfree the result or not! - * - * Note: we assume that conversion cannot cause more than a 4-to-1 growth - * in the length of the string --- is this enough? */ - -unsigned char * -pg_do_encoding_conversion(unsigned char *src, int len, - to_mic_converter src_to_mic, - from_mic_converter dest_from_mic) -{ - unsigned char *result = src; - unsigned char *buf; - - if (src_to_mic) - { - buf = (unsigned char *) palloc(len * 4 + 1); - (*src_to_mic) (result, buf, len); - result = buf; - len = strlen(result); - } - if (dest_from_mic) - { - buf = (unsigned char *) palloc(len * 4 + 1); - (*dest_from_mic) (result, buf, len); - if (result != src) - pfree(result); /* release first buffer */ - result = buf; - } - return result; -} - -/* - * Convert string using encoding_nanme. We assume that string's - * encoding is same as DB encoding. - * - * TEXT convert(TEXT string, NAME encoding_name) - */ -Datum -pg_convert(PG_FUNCTION_ARGS) -{ - Datum string = PG_GETARG_DATUM(0); - Datum dest_encoding_name = PG_GETARG_DATUM(1); - Datum src_encoding_name = DirectFunctionCall1( - namein, CStringGetDatum(DatabaseEncoding->name)); - Datum result; - - result = DirectFunctionCall3( - pg_convert2, string, src_encoding_name, dest_encoding_name); - - /* free memory allocated by namein */ - pfree((void *)dest_encoding_name); - - PG_RETURN_TEXT_P(result); -} - -/* - * Convert string using encoding_nanme. - * - * TEXT convert(TEXT string, NAME src_encoding_name, NAME dest_encoding_name) - */ -Datum -pg_convert2(PG_FUNCTION_ARGS) -{ - text *string = PG_GETARG_TEXT_P(0); - char *src_encoding_name = NameStr(*PG_GETARG_NAME(1)); - int src_encoding = pg_char_to_encoding(src_encoding_name); - char *dest_encoding_name = NameStr(*PG_GETARG_NAME(2)); - int dest_encoding = pg_char_to_encoding(dest_encoding_name); - to_mic_converter src; - from_mic_converter dest; - unsigned char *result; - text *retval; - unsigned char *str; - int len; - - if (src_encoding < 0) - elog(ERROR, "Invalid source encoding name %s", src_encoding_name); - if (dest_encoding < 0) - elog(ERROR, "Invalid destination encoding name %s", dest_encoding_name); - - if (pg_find_encoding_converters(src_encoding, dest_encoding, &src, &dest) < 0) - { - elog(ERROR, "Conversion from %s to %s is not possible", - src_encoding_name, dest_encoding_name); - } - - /* make sure that source string is null terminated */ - len = VARSIZE(string) - VARHDRSZ; - str = palloc(len + 1); - memcpy(str, VARDATA(string), len); - *(str + len) = '\0'; - - result = pg_do_encoding_conversion(str, len, src, dest); - if (result == NULL) - elog(ERROR, "Encoding conversion failed"); - - /* build text data type structre. we cannot use textin() here, - since textin assumes that input string encoding is same as - database encoding. */ - len = strlen(result) + VARHDRSZ; - retval = palloc(len); - VARATT_SIZEP(retval) = len; - memcpy(VARDATA(retval), result, len - VARHDRSZ); - - if (result != str) - pfree(result); - pfree(str); - - /* free memory if allocated by the toaster */ - PG_FREE_IF_COPY(string, 0); - - PG_RETURN_TEXT_P(retval); -} - -/* - * convert client encoding to server encoding. - * - * CASE 1: if no conversion is required, then the given pointer s is returned. - * - * CASE 2: if conversion is required, a palloc'd string is returned. - * - * Callers must check whether return value differs from passed value - * to determine whether to pfree the result or not! - * - * Note: we assume that conversion cannot cause more than a 4-to-1 growth - * in the length of the string --- is this enough? - */ -unsigned char * -pg_client_to_server(unsigned char *s, int len) -{ - Assert(DatabaseEncoding); - Assert(ClientEncoding); - - if (ClientEncoding->encoding == DatabaseEncoding->encoding) - return s; - - return pg_do_encoding_conversion(s, len, client_to_mic, server_from_mic); -} - -/* - * convert server encoding to client encoding. - * - * CASE 1: if no conversion is required, then the given pointer s is returned. - * - * CASE 2: if conversion is required, a palloc'd string is returned. - * - * Callers must check whether return value differs from passed value - * to determine whether to pfree the result or not! - * - * Note: we assume that conversion cannot cause more than a 4-to-1 growth - * in the length of the string --- is this enough? - */ -unsigned char * -pg_server_to_client(unsigned char *s, int len) -{ - Assert(DatabaseEncoding); - Assert(ClientEncoding); - - if (ClientEncoding->encoding == DatabaseEncoding->encoding) - return s; - - return pg_do_encoding_conversion(s, len, server_to_mic, client_from_mic); -} - -/* convert a multi-byte string to a wchar */ -int -pg_mb2wchar(const unsigned char *from, pg_wchar *to) -{ - return (*pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len) (from, to, strlen(from)); -} - -/* convert a multi-byte string to a wchar with a limited length */ -int -pg_mb2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) -{ - return (*pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len) (from, to, len); -} - -/* returns the byte length of a multi-byte word */ -int -pg_mblen(const unsigned char *mbstr) -{ - return ((*pg_wchar_table[DatabaseEncoding->encoding].mblen) (mbstr)); -} - -/* returns the length (counted as a wchar) of a multi-byte string */ -int -pg_mbstrlen(const unsigned char *mbstr) -{ - int len = 0; - - while (*mbstr) - { - mbstr += pg_mblen(mbstr); - len++; - } - return (len); -} - -/* returns the length (counted as a wchar) of a multi-byte string - (not necessarily NULL terminated) */ -int -pg_mbstrlen_with_len(const unsigned char *mbstr, int limit) -{ - int len = 0; - int l; - - while (limit > 0 && *mbstr) - { - l = pg_mblen(mbstr); - limit -= l; - mbstr += l; - len++; - } - return (len); -} - -/* - * returns the byte length of a multi-byte string - * (not necessarily NULL terminated) - * that is no longer than limit. - * this function does not break multi-byte word boundary. - */ -int -pg_mbcliplen(const unsigned char *mbstr, int len, int limit) -{ - int clen = 0; - int l; - - while (len > 0 && *mbstr) - { - l = pg_mblen(mbstr); - if ((clen + l) > limit) - break; - clen += l; - if (clen == limit) - break; - len -= l; - mbstr += l; - } - return (clen); -} - -/* - * Similar to pg_mbcliplen but the limit parameter specifies the - * character length, not the byte length. */ -int -pg_mbcharcliplen(const unsigned char *mbstr, int len, int limit) -{ - int clen = 0; - int nch = 0; - int l; - - while (len > 0 && *mbstr) - { - l = pg_mblen(mbstr); - nch++; - if (nch > limit) - break; - clen += l; - len -= l; - mbstr += l; - } - return (clen); -} - -void -SetDatabaseEncoding(int encoding) -{ - if (!PG_VALID_BE_ENCODING(encoding)) - elog(ERROR, "SetDatabaseEncoding(): invalid database encoding"); - - DatabaseEncoding = &pg_enc2name_tbl[encoding]; - Assert(DatabaseEncoding->encoding == encoding); -} - -int -GetDatabaseEncoding(void) -{ - Assert(DatabaseEncoding); - return (DatabaseEncoding->encoding); -} - -const char * -GetDatabaseEncodingName(void) -{ - Assert(DatabaseEncoding); - return (DatabaseEncoding->name); -} - -Datum -getdatabaseencoding(PG_FUNCTION_ARGS) -{ - Assert(DatabaseEncoding); - return DirectFunctionCall1(namein, CStringGetDatum(DatabaseEncoding->name)); -} - -Datum -pg_client_encoding(PG_FUNCTION_ARGS) -{ - Assert(ClientEncoding); - return DirectFunctionCall1(namein, CStringGetDatum(ClientEncoding->name)); -} |