/* 
 * PHP3 Internationalization support program.
 *
 * Copyright (c) 1999 by the PHP3 internationalization team.
 * All rights reserved.
 *
 * This program is free software. You can use, redistribute and/or modify
 * without fee under the terms of the GNU General Public License version 2
 * as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY including implied or express warranty of
 * marchantability or fitness for a particular purpose.
 *
 * Currently, the "PHP3 internationalization team" has no relationship with
 * the "PHP Development Team". But we hope these code will be integrated
 * into the PHP3, and it will be distributed as a part of PHP3.
 *
 * See README_i18n for more detail.
 *
 * Authors:
 *    Hironori Sato <satoh@jpnnet.com>
 *    Shigeru Kanemoto <sgk@happysize.co.jp>
 *    Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
 */

/*
    FIXME Currently, this code supports only "ja_JP" locale.
*/

#include "php.h"
#include "internal_functions.h"

#ifdef	PHP3_I18N

#if HAVE_STRING_H
#include <string.h>
#else
#include <strings.h>
#endif

#include "i18n_functions.h"
#include "php3_i18n.h"
#include "i18n_ja_jp_filter.h"

#ifdef HAVE_MBREGEX
#include "mbregex.h"
#endif

/******************************************************************************/

/* php function registration */
static void php3_i18n_http_output(INTERNAL_FUNCTION_PARAMETERS);
static void php3_mbstrlen(INTERNAL_FUNCTION_PARAMETERS);
static void php3_mbstrpos(INTERNAL_FUNCTION_PARAMETERS);
static void php3_mbstrrpos(INTERNAL_FUNCTION_PARAMETERS);
static void php3_mbsubstr(INTERNAL_FUNCTION_PARAMETERS);
static void php3_mbstrcut(INTERNAL_FUNCTION_PARAMETERS);
static void php3_i18n_convert(INTERNAL_FUNCTION_PARAMETERS);
static void php3_i18n_discover_encoding(INTERNAL_FUNCTION_PARAMETERS);
static void php3_i18n_internal_encoding(INTERNAL_FUNCTION_PARAMETERS);
/*static void php3_i18n_script_encoding(INTERNAL_FUNCTION_PARAMETERS);*/
static void php3_i18n_http_input(INTERNAL_FUNCTION_PARAMETERS);
static void php3_i18n_http_input_identify(INTERNAL_FUNCTION_PARAMETERS);
static void php3_i18n_mime_header_encode(INTERNAL_FUNCTION_PARAMETERS);
static void php3_i18n_mime_header_decode(INTERNAL_FUNCTION_PARAMETERS);
static void php3_i18n_ja_jp_hantozen(INTERNAL_FUNCTION_PARAMETERS);

#ifdef HAVE_MBREGEX
static void php3_mbereg(INTERNAL_FUNCTION_PARAMETERS);
static void php3_mberegi(INTERNAL_FUNCTION_PARAMETERS);
static void php3_mberegreplace(INTERNAL_FUNCTION_PARAMETERS);
static void php3_mberegireplace(INTERNAL_FUNCTION_PARAMETERS);
static void php3_mbsplit(INTERNAL_FUNCTION_PARAMETERS);

extern unsigned char third_argument_force_ref[];
#endif

function_entry i18n_functions[] = {
    { "i18n_http_output", php3_i18n_http_output, NULL },
    { "setkanjioutput",	  php3_i18n_http_output, NULL },	/* backward compatibility */
    { "mbstrlen",         php3_mbstrlen,         NULL },
    { "mbstrpos",         php3_mbstrpos,         NULL },
    { "mbstrrpos",        php3_mbstrrpos,        NULL },
    { "mbsubstr",         php3_mbsubstr,         NULL },
    { "mbstrcut",         php3_mbstrcut,         NULL },
#ifdef HAVE_MBREGEX
    { "mbereg",           php3_mbereg,     third_argument_force_ref },
    { "mberegi",          php3_mberegi,    third_argument_force_ref },
    { "mbereg_replace",   php3_mberegreplace,   NULL },
    { "mberegi_replace",  php3_mberegireplace,  NULL },
    { "mbsplit",          php3_mbsplit,         NULL },
#endif
    { "i18n_convert",     php3_i18n_convert,    NULL },
    { "i18n_discover_encoding",   php3_i18n_discover_encoding,   NULL },
    { "i18n_internal_encoding",   php3_i18n_internal_encoding,   NULL },
/*  { "i18n_script_encoding",     php3_i18n_script_encoding,     NULL },*/
    { "i18n_http_input",          php3_i18n_http_input,          NULL },
    { "i18n_http_input_identify", php3_i18n_http_input_identify, NULL },
    { "i18n_mime_header_encode",     php3_i18n_mime_header_encode,     NULL },
    { "i18n_mime_header_decode",     php3_i18n_mime_header_decode,     NULL },
    { "i18n_ja_jp_hantozen",     php3_i18n_ja_jp_hantozen,     NULL },
    { NULL,               NULL,                  NULL }
};

/* module registration */
static void
i18n_module_info(void)
{
#ifdef HAVE_MBREGEX
	PUTS("only \"ja_JP\" locale is supported.<br>mbregex : enable");	/* XXX */
#else
	PUTS("only \"ja_JP\" locale is supported.");	/* XXX */
#endif
}

php3_module_entry i18n_module_entry = {
	"Internationalization", i18n_functions, NULL, NULL, NULL, NULL,
	i18n_module_info, STANDARD_MODULE_PROPERTIES
};


/******************************************************************************/
/* i18n_http_output() */

/*
 * encoding = i18n_http_output()
 * i18n_http_output(encoding)
 */
static void
php3_i18n_http_output(INTERNAL_FUNCTION_PARAMETERS)
{
	YYSTYPE *str;
	mbfl_encoding_ptr penc;
	TLS_VARS;

	if (ARG_COUNT(ht) == 0) {
		penc = mbfl_no2encoding(i18n_get_output_encoding());
		if (penc != NULL) {
			RETVAL_STRING((char*)penc->name, 1);
		} else {
			php3_error(E_WARNING, "configuration error \"i18n_http_output\"");
			RETURN_FALSE;
		}
	} else
	if (ARG_COUNT(ht) == 1 && getParameters(ht, 1, &str) != FAILURE) {
		convert_to_string(str);
		if (i18n_set_output_encoding(str->value.str.val) < 0) {
			php3_error(E_WARNING,
				"i18n_http_output() can't accept \"%s\"", str->value.str.val);
			RETURN_FALSE;
		}
    } else {
        WRONG_PARAM_COUNT;
	}
}



/************************************************************************/
/* mbstrlen - strlen for multi-byte                                     */ 
/************************************************************************/
static void
php3_mbstrlen(INTERNAL_FUNCTION_PARAMETERS)
{
    pval *arg1, *arg2;
	int argc, code;
	mbfl_encoding_ptr penc;

	argc = ARG_COUNT(ht);

	if((argc == 1 && getParameters(ht, 1, &arg1) == FAILURE) ||
		(argc == 2 && getParameters(ht, 2, &arg1, &arg2) == FAILURE) ||
		argc < 1 || argc > 2) {
		WRONG_PARAM_COUNT;
	}

	convert_to_string(arg1);

	if(argc == 2){
		convert_to_string(arg2);
		penc = mbfl_name2encoding(arg2->value.str.val);
		if(penc != NULL) {
			code = penc->no_encoding;
		} else {
			php3_error(E_WARNING, "unknown encoding \"%s\"", arg2->value.str.val);
			RETURN_FALSE;
		}
	} else {
		code = i18n_get_internal_encoding();
	}

    RETURN_LONG(mbfl_strlen(arg1->value.str.val, code));

} /* end of php3_mbstrlen */



/************************************************************************/
/* mbstrpos - strpos for multi-byte                                     */ 
/************************************************************************/
static void
php3_mbstrpos(INTERNAL_FUNCTION_PARAMETERS)
{
	pval *haystack, *needle, *OFFSET, *codename;
	int offset, code, n;
	mbfl_encoding_ptr penc;

	offset = 0;
	code = i18n_get_internal_encoding();
	switch(ARG_COUNT(ht)) {
	case 2:
		if (getParameters(ht, 2, &haystack, &needle) == FAILURE) {
			WRONG_PARAM_COUNT;
		}
		break;
	case 3:
		if (getParameters(ht, 3, &haystack, &needle, &OFFSET) == FAILURE) {
			WRONG_PARAM_COUNT;
		}
		convert_to_long(OFFSET);
		offset = OFFSET->value.lval;
		break;
	case 4:
		if (getParameters(ht, 4, &haystack, &needle, &OFFSET, &codename) == FAILURE) {
			WRONG_PARAM_COUNT;
		}
		convert_to_long(OFFSET);
		offset = OFFSET->value.lval;
		convert_to_string(codename);
		penc = mbfl_name2encoding(codename->value.str.val);
		if(penc != NULL) {
			code = penc->no_encoding;
		} else {
			php3_error(E_WARNING, "unknown encoding \"%s\"", codename->value.str.val);
			RETURN_FALSE;
		}
		break;
	default:
		WRONG_PARAM_COUNT;
	}

	convert_to_string(haystack);
	if (offset > haystack->value.str.len) {
		php3_error(E_WARNING,"offset not contained in string");
		RETURN_FALSE;
	}

	convert_to_string(needle);
	if (needle->value.str.len == 0) {
		php3_error(E_WARNING,"Empty delimiter");
		RETURN_FALSE;
	}

	n = mbfl_strpos(haystack->value.str.val, needle->value.str.val, offset, code, 0);
	if (n >= 0) {
		RETVAL_LONG(n);
	} else {
		RETVAL_FALSE;
	}
} /* end of php3_mbstrpos */



/************************************************************************/
/* mbstrrpos - strrpos for multi-byte                                    */ 
/************************************************************************/
static void
php3_mbstrrpos(INTERNAL_FUNCTION_PARAMETERS)
{
	pval *haystack, *needle, *codename;
	int code, n;
	mbfl_encoding_ptr penc;

	code = i18n_get_internal_encoding();
	switch(ARG_COUNT(ht)) {
	case 2:
		if (getParameters(ht, 2, &haystack, &needle) == FAILURE) {
			WRONG_PARAM_COUNT;
		}
		break;
	case 3:
		if (getParameters(ht, 3, &haystack, &needle, &codename) == FAILURE) {
			WRONG_PARAM_COUNT;
		}
		convert_to_string(codename);
		penc = mbfl_name2encoding(codename->value.str.val);
		if(penc != NULL) {
			code = penc->no_encoding;
		} else {
			php3_error(E_WARNING, "unknown encoding \"%s\"", codename->value.str.val);
			RETURN_FALSE;
		}
		break;
	default:
		WRONG_PARAM_COUNT;
	}

	convert_to_string(haystack);
	convert_to_string(needle);
	if (needle->value.str.len == 0) {
		php3_error(E_WARNING,"Empty delimiter");
		RETURN_FALSE;
	}

	n = mbfl_strpos(haystack->value.str.val, needle->value.str.val, 0, code, 1);
	if (n >= 0) {
		RETVAL_LONG(n);
	} else {
		RETVAL_FALSE;
	}
} /* end of php3_mbstrrpos */



/************************************************************************/
/* mbsubstr - substr for multi-byte                                     */ 
/************************************************************************/
static void
php3_mbsubstr(INTERNAL_FUNCTION_PARAMETERS)
{
	pval *arg1, *arg2, *arg3, *arg4;
	int argc, from, len, mblen, code;
	mbfl_encoding_ptr penc;
	char *ret;

	code = i18n_get_internal_encoding();
	argc = ARG_COUNT(ht);
	switch(argc) {
	case 2:
		if (getParameters(ht, 2, &arg1, &arg2) == FAILURE) {
			WRONG_PARAM_COUNT;
		}
		break;
	case 3:
		if (getParameters(ht, 3, &arg1, &arg2, &arg3) == FAILURE) {
			WRONG_PARAM_COUNT;
		}
		break;
	case 4:
		if (getParameters(ht, 4, &arg1, &arg2, &arg3, &arg4) == FAILURE) {
			WRONG_PARAM_COUNT;
		}
		convert_to_string(arg4);
		penc = mbfl_name2encoding(arg4->value.str.val);
		if(penc != NULL) {
			code = penc->no_encoding;
		} else {
			php3_error(E_WARNING, "unknown encoding \"%s\"", arg4->value.str.val);
			RETURN_FALSE;
		}
		break;
	default:
		WRONG_PARAM_COUNT;
	}
	convert_to_string(arg1);
	convert_to_long(arg2);
	from = arg2->value.lval;
	if(argc >= 3) {
		convert_to_long(arg3);
		len = arg3->value.lval;
	} else {
		len = arg1->value.str.len;
	}

	/* measures mb length */
	mblen = mbfl_strlen(arg1->value.str.val, code);

	/* if "from" position is negative, count start position from the end
	 * of the string
	 */
	if(from < 0) {
		from = mblen + from;
		if (from < 0) {
			from = 0;
		}
	}

	/* if "length" position is negative, set it to the length
	 * needed to stop that many chars from the end of the string
	 */
	if(len < 0) {
		len = (mblen - from) + len;
		if (len < 0) {
			len = 0;
		}
	}

	if (from >= mblen || len == 0) {
		RETVAL_FALSE;
	} else {
		ret = mbfl_substr(arg1->value.str.val, from, len, code);
		if(ret != NULL) {
			RETVAL_STRING(ret, 0)	/* the string is already strdup()'ed */
		} else {
			RETVAL_FALSE;
		}
	}

} /* php3_mbsubstr */



/************************************************************************/
/* mbstrcut - cut specified byte from multi-byte string                 */ 
/************************************************************************/
static void
php3_mbstrcut(INTERNAL_FUNCTION_PARAMETERS)
{
	pval *arg1, *arg2, *arg3, *arg4;
	int argc, from, len, code;
	mbfl_encoding_ptr penc;
	char *ret;

	code = i18n_get_internal_encoding();
	argc = ARG_COUNT(ht);
	switch(argc) {
	case 2:
		if (getParameters(ht, 2, &arg1, &arg2) == FAILURE) {
			WRONG_PARAM_COUNT;
		}
		break;
	case 3:
		if (getParameters(ht, 3, &arg1, &arg2, &arg3) == FAILURE) {
			WRONG_PARAM_COUNT;
		}
		break;
	case 4:
		if (getParameters(ht, 4, &arg1, &arg2, &arg3, &arg4) == FAILURE) {
			WRONG_PARAM_COUNT;
		}
		convert_to_string(arg4);
		penc = mbfl_name2encoding(arg4->value.str.val);
		if(penc != NULL) {
			code = penc->no_encoding;
		} else {
			php3_error(E_WARNING, "unknown encoding \"%s\"", arg4->value.str.val);
			RETURN_FALSE;
		}
		break;
	default:
		WRONG_PARAM_COUNT;
	}
	convert_to_string(arg1);
	convert_to_long(arg2);
	from = arg2->value.lval;
	if(argc >= 3) {
		convert_to_long(arg3);
		len = arg3->value.lval;
	} else {
		len = arg1->value.str.len;
	}

	/* if "from" position is negative, count start position from the end
	 * of the string
	 */
	if(from < 0) {
		from = arg1->value.str.len + from;
		if (from < 0) {
			from = 0;
		}
	}

	/* if "length" position is negative, set it to the length
	 * needed to stop that many chars from the end of the string
	 */
	if(len < 0) {
		len = (arg1->value.str.len - from) + len;
		if (len < 0) {
			len = 0;
		}
	}

	if(from >= arg1->value.str.len || len == 0) {
		RETVAL_FALSE;
	} else {
		ret = mbfl_strcut(arg1->value.str.val, from, len, code);
		if(ret != NULL) {
			RETVAL_STRING(ret, 0)	/* the string is already strdup()'ed */
		} else {
			RETVAL_FALSE;
		}
	}

} /* php3_mbstrcut */



/************************************************************************/
/* on-demand conversion function                                        */
/************************************************************************/
static void
php3_i18n_convert(INTERNAL_FUNCTION_PARAMETERS)
{
	pval *php_str, *php_new, *php_old;
	int newcode, oldcode;
	mbfl_encoding_ptr penc;
	char *ret;

	if (ARG_COUNT(ht) == 2) {
		if (getParameters(ht, 2, &php_str, &php_new) == FAILURE) {
			WRONG_PARAM_COUNT;
		}
		oldcode = i18n_get_internal_encoding();
	} else
	if (ARG_COUNT(ht) == 3) {
		if (getParameters(ht, 3, &php_str, &php_new, &php_old) == FAILURE) {
			WRONG_PARAM_COUNT;
		}

		/* old encoding */
		convert_to_string(php_old);
		penc = mbfl_name2encoding(php_old->value.str.val);
		if(penc != NULL) {
			oldcode = penc->no_encoding;
		} else {
			php3_error(E_WARNING, "unknown encoding \"%s\"", php_old->value.str.val);
			RETURN_FALSE;
		}
	} else {
		php3_error(E_WARNING,
			"result = i18n_convert(string, newcode, [oldcode])");
        WRONG_PARAM_COUNT;
	}

	/* new encoding */
	convert_to_string(php_new);
	penc = mbfl_name2encoding(php_new->value.str.val);
	if(penc != NULL) {
		newcode = penc->no_encoding;
	} else {
		php3_error(E_WARNING, "unknown encoding \"%s\"", php_new->value.str.val);
		RETURN_FALSE;
	}

	/* do it */
	convert_to_string(php_str);
	ret = mbfl_encoding_convert(oldcode, newcode, php_str->value.str.val);
	if(ret != NULL) {
		RETVAL_STRING(ret, 0)	/* the string is already strdup()'ed */
	} else {
		RETVAL_FALSE;
	}

} /* end of php3_i18n_convert */



/************************************************************************/
/* identify string encoding                                             */
/************************************************************************/
static void
php3_i18n_discover_encoding(INTERNAL_FUNCTION_PARAMETERS)
{
	pval *str, *lang;
	mbfl_encoding_ptr penc;

	if (ARG_COUNT(ht) == 1) {
		if (getParameters(ht, 1, &str) == FAILURE) {
			WRONG_PARAM_COUNT;
		}
	} else if (ARG_COUNT(ht) == 2) {
		if (getParameters(ht, 2, &str, &lang) == FAILURE) {
			WRONG_PARAM_COUNT;
		}
	} else {
		WRONG_PARAM_COUNT;
	}
	convert_to_string(str);
	penc = mbfl_no2encoding(mbfl_identify_encoding(str->value.str.val));
	if (penc != NULL && penc->no_encoding != mbfl_no_encoding_auto){
		RETVAL_STRING((char*)penc->name, 1);
	} else {
		RETVAL_STRING("unknown", 1);
	}
}



/************************************************************************/
/* query GPC string encoding                                            */
/************************************************************************/
static void
php3_i18n_http_input(INTERNAL_FUNCTION_PARAMETERS)
{
	mbfl_encoding_ptr penc = mbfl_no2encoding(i18n_http_input_identify_encoding());
	if(penc != NULL && penc->no_encoding != mbfl_no_encoding_auto) {
		RETVAL_STRING((char*)penc->name, 1);
	} else {
		RETVAL_STRING("unknown", 1);
	}
}

static void
php3_i18n_http_input_identify(INTERNAL_FUNCTION_PARAMETERS)
{
	mbfl_encoding_ptr penc = mbfl_no2encoding(i18n_http_input_identify_encoding());
	if(penc != NULL && penc->no_encoding != mbfl_no_encoding_auto) {
		RETVAL_STRING((char*)penc->name, 1);
	} else {
		RETVAL_STRING("unknown", 1);
	}
}


/************************************************************************/
/* query internal encoding                                              */
/************************************************************************/
static void
php3_i18n_internal_encoding(INTERNAL_FUNCTION_PARAMETERS)
{
	mbfl_encoding_ptr penc = mbfl_no2encoding(i18n_get_internal_encoding());
	if(penc != NULL && penc->no_encoding != mbfl_no_encoding_auto) {
		RETVAL_STRING((char*)penc->name, 1);
	} else {
		RETVAL_STRING("unknown", 1);
	}
}



/************************************************************************/
/* MIME header encode                                                   */
/************************************************************************/
static void
php3_i18n_mime_header_encode(INTERNAL_FUNCTION_PARAMETERS)
{
	pval *str, *lang;
	char *ret;

	if (ARG_COUNT(ht) == 1) {
		if (getParameters(ht, 1, &str) == FAILURE) {
			WRONG_PARAM_COUNT;
		}
	} else if (ARG_COUNT(ht) == 2) {
		if (getParameters(ht, 2, &str, &lang) == FAILURE) {
			WRONG_PARAM_COUNT;
		}
	} else {
		WRONG_PARAM_COUNT;
	}
	convert_to_string(str);
	ret = mbfl_mime_header_encode(
	  i18n_get_internal_encoding(),
	  mbfl_no_encoding_ja_jp_jis,
	  mbfl_no_encoding_base64,
	  str->value.str.val,
	  "\r\n");
	if(ret != NULL) {
		RETVAL_STRING(ret, 0)	/* the string is already strdup()'ed */
	} else {
		RETVAL_FALSE;
	}

} /* end of php3_i18n_mime_header_encode */



/************************************************************************/
/* MIME header decode                                                   */
/************************************************************************/
static void
php3_i18n_mime_header_decode(INTERNAL_FUNCTION_PARAMETERS)
{
	pval *str, *lang;
	char *ret;

	if (ARG_COUNT(ht) == 1) {
		if (getParameters(ht, 1, &str) == FAILURE) {
			WRONG_PARAM_COUNT;
		}
	} else if (ARG_COUNT(ht) == 2) {
		if (getParameters(ht, 2, &str, &lang) == FAILURE) {
			WRONG_PARAM_COUNT;
		}
	} else {
		WRONG_PARAM_COUNT;
	}
	convert_to_string(str);
	ret = mbfl_mime_header_decode(i18n_get_internal_encoding(), str->value.str.val);
	if(ret != NULL) {
		RETVAL_STRING(ret, 0)	/* the string is already strdup()'ed */
	} else {
		RETVAL_FALSE;
	}

} /* end of php3_i18n_mime_header_decode */



/************************************************************************/
/* Hankaku <=> Zenkaku (Japanese)                                       */
/************************************************************************/
static void
php3_i18n_ja_jp_hantozen(INTERNAL_FUNCTION_PARAMETERS)
{
	pval *arg1, *arg2, *arg3;
	int argc, opt, n;
	char *ret;
	mbfl_encoding_ptr penc;

	argc = ARG_COUNT(ht);

	if((argc == 1 && getParameters(ht, 1, &arg1) == FAILURE) ||
		(argc == 2 && getParameters(ht, 2, &arg1, &arg2) == FAILURE) ||
		(argc == 3 && getParameters(ht, 3, &arg1, &arg2, &arg3) == FAILURE) ||
		argc < 1 || argc > 3) {
		WRONG_PARAM_COUNT;
	}

	convert_to_string(arg1);
	
	if(argc >= 2){
		convert_to_string(arg2);
		n = 0;
		opt = 0;
		while(n < arg2->value.str.len) {
			switch(arg2->value.str.val[n++]) {
			case 'A':
				opt |= 0x1;
				break;
			case 'a':
				opt |= 0x10;
				break;
			case 'R':
				opt |= 0x2;
				break;
			case 'r':
				opt |= 0x20;
				break;
			case 'N':
				opt |= 0x4;
				break;
			case 'n':
				opt |= 0x40;
				break;
			case 'K':
				opt |= 0x100;
				break;
			case 'k':
				opt |= 0x1000;
				break;
			case 'H':
				opt |= 0x200;
				break;
			case 'h':
				opt |= 0x2000;
				break;
			case 'V':
				opt |= 0x800;
				break;
			case 'C':
				opt |= 0x10000;
				break;
			case 'c':
				opt |= 0x20000;
				break;
			}
		}
	} else {
		opt = 0x900;
	}

	if(argc == 3){
		convert_to_string(arg3);
		penc = mbfl_name2encoding(arg3->value.str.val);
		if(penc != NULL)
			n = penc->no_encoding;
		else {
			php3_error(E_WARNING, "unknown encoding \"%s\"", arg3->value.str.val);
			RETURN_FALSE;
		}
	} else {
		penc = mbfl_no2encoding(i18n_get_internal_encoding());
		if(penc != NULL) {
			n = penc->no_encoding;
		} else {
			php3_error(E_WARNING, "configuration error \"i18n_internal_encoding\"");
			RETURN_FALSE;
		}
	}
	
	if (penc->planguage->no_language != mbfl_no_language_ja_jp) {
		RETURN_STRINGL(arg1->value.str.val, arg1->value.str.len, 1);
	}

	ret = mbfl_ja_jp_hantozen(arg1->value.str.val, opt, n);
	if(ret != NULL) {
		RETVAL_STRING(ret, 0);	/* the string is already strdup()'ed */
	} else {
		RETVAL_FALSE;
	}

} /* end of php3_i18n_ja_jp_hantozen */


#ifdef HAVE_MBREGEX
/* regex for EUC, SJIS, UTF-8 encoding */
static void _php3_mbereg(INTERNAL_FUNCTION_PARAMETERS, int icase)
{
	pval *regex,			/* Regular expression */
		*findin,		/* String to apply expression to */
		*array = NULL;		/* Optional register array */
	pval entry;
	mb_regex_t *pr;
	struct re_registers *pregs = NULL;
	char *err_str;
	int err, i, match_len, string_len;
	int start, end;
	char *buf = NULL;
	char *string = NULL;
	
	switch(ARG_COUNT(ht)) {
	case 2:
		if (getParameters(ht, 2, &regex, &findin) == FAILURE) {
			WRONG_PARAM_COUNT;
		}
		break;
	case 3:
		if (getParameters(ht, 3, &regex, &findin, &array) == FAILURE) {
			WRONG_PARAM_COUNT;
		}
		if (!ParameterPassedByReference(ht, 3)) {
			php3_error(E_WARNING, "Array to be filled with values must be passed by reference.");
			RETURN_FALSE;
		}
		pregs = (struct re_registers*)ecalloc(1, sizeof(struct re_registers));
		break;
	default:
		WRONG_PARAM_COUNT;
	}

	pr = (mb_regex_t*)ecalloc(1, sizeof(mb_regex_t));
	pr->buffer = (char*)emalloc(16*sizeof(char));
	pr->allocated = 16;
	pr->fastmap = (char*)emalloc(256*sizeof(char));
	if (icase)
		pr->options |= RE_OPTION_IGNORECASE;

	/* compile the regular expression from the supplied regex */
	if (regex->type == IS_STRING) {
		pr->options |= RE_OPTION_EXTENDED;
		err_str = mbre_compile_pattern(regex->value.str.val, regex->value.str.len, pr);
	} else {
		/* we convert numbers to integers and treat them as a string */
		if (regex->type == IS_DOUBLE)
			convert_to_long(regex);	/* get rid of decimal places */
		convert_to_string(regex);
		/* don't bother doing an extended regex with just a number */
		err_str = mbre_compile_pattern(regex->value.str.val, regex->value.str.len, pr);
	}

	if (err_str) {
		php3_error(E_WARNING, "mbregex compile err: %s", err_str);
		if (pregs) {
			efree(pregs);
		}
		mbre_free_pattern(pr);
		efree(pr);
		RETURN_FALSE;
	}

	/* make a copy of the string we're looking in */
	convert_to_string(findin);
	string = estrndup(findin->value.str.val, findin->value.str.len);

	/* actually execute the regular expression */
	err = mbre_search(pr, string, findin->value.str.len, 0, findin->value.str.len, pregs);
	if (err < 0) {
		efree(string);
		if (pregs) {
			mbre_free_registers(pregs);
			efree(pregs);
		}
		mbre_free_pattern(pr);
		efree(pr);
		RETURN_FALSE;
	}
	match_len = 1;

	if (array && pregs) {
		match_len = (int) (pregs->end[0] - pregs->beg[0]);
		string_len = strlen(string) + 1;
		buf = emalloc(string_len);
		pval_destructor(array _INLINE_TLS);	/* start with clean array */
		array_init(array);
		for (i = 0; i < pregs->num_regs; i++) {
			start = pregs->beg[i];
			end = pregs->end[i];
			if (start != -1 && end > 0 && start < string_len && end < string_len && start < end) {
				strncpy(buf, &string[start], end - start);
				entry.value.str.len = end - start;
				entry.value.str.val = estrndup(buf, entry.value.str.len);
				entry.type = IS_STRING;
			} else {
				var_reset(&entry);
			}
			_php3_hash_index_update(array->value.ht, i, &entry, sizeof(pval),NULL);
		}
		efree(buf);
	}

	efree(string);
	if(pregs){
		mbre_free_registers(pregs);
		efree(pregs);
	}
	mbre_free_pattern(pr);
	efree(pr);
	if (match_len == 0)
		match_len = 1;
	RETVAL_LONG(match_len);
}

/* {{{ proto int mbereg(string pattern, string string [, array registers])
   Regular expression match for multibyte string */
static void php3_mbereg(INTERNAL_FUNCTION_PARAMETERS)
{
	_php3_mbereg(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
}
/* }}} */

/* {{{ proto int mberegi(string pattern, string string [, array registers])
   Case-insensitive regular expression match for multibyte string */
static void php3_mberegi(INTERNAL_FUNCTION_PARAMETERS)
{
	_php3_mbereg(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
}
/* }}} */



/* regex replacement */
static char *
_php3_mbregreplace(const char *pattern, const char *replace,
                   const char *string, int icase, int extended)
{
	mb_regex_t *pr;
	struct re_registers *pregs;
	struct memory_device device = { (char*)0, 0, 0 };
	const char *p;
	char c;
	int err, pos, string_len, n;
	char *err_str;

	string_len = strlen(string);

	/* create regex pattern buffer */
	pr = (mb_regex_t*)ecalloc(1, sizeof(mb_regex_t));
	pr->buffer = (char*)emalloc(16*sizeof(char));
	pr->allocated = 16;
	pr->fastmap = (char*)emalloc(256*sizeof(char));
	if (icase)
		pr->options |= RE_OPTION_IGNORECASE;
	if (extended)
		pr->options |= RE_OPTION_EXTENDED;
	err_str = mbre_compile_pattern(pattern, strlen(pattern), pr);
	if (err_str) {
		php3_error(E_WARNING, "mbregex compile err: %s", err_str);
		mbre_free_pattern(pr);
		efree(pr);
		return ((char *) -1);
	}

	/* initialize output device (auto reallocate buffer)*/
	device.buffer = (char*)emalloc(string_len*sizeof(char));
	device.length = string_len;
	device.buffer[0] = '\0';

	pregs = (struct re_registers*)ecalloc(1, sizeof(struct re_registers));
	err = 0;
	pos = 0;
	while (err >= 0) {
		err = mbre_search(pr, string, string_len, pos, string_len - pos, pregs);
		if (err <= -2) {
			php3_error(E_WARNING, "mbregex search failure in _php3_mbregreplace");
			mbre_free_pattern(pr);
			efree(pr);
			mbre_free_registers(pregs);
			efree(pregs);
			if (device.buffer) {
				efree(device.buffer);
			}
			return ((char *) -1);
		}
		if (err >= 0) {
			/* copy the part of the string before the match */
			memory_device_ncat(&device, &string[pos], pregs->beg[0] - pos);
			/* copy replacement and backrefs */
			p = replace;
			while ((c = *p) != '\0') {
				if (c == '\\' &&  p[1] >= '0' && p[1] <= '9' && pregs->num_regs > (p[1] - '0')) {
					n = p[1] - '0';
					memory_device_ncat(&device, &string[pregs->beg[n]], pregs->end[n] - pregs->beg[n]);
					p += 2;
				} else {
					memory_device_output(c, &device);
					p++;
				}
			}
			pos = pregs->end[0];
		} else { /* nomatch */
			/* stick that last bit of string on our output */
			memory_device_ncat(&device, &string[pos], string_len - pos);
		}
	}

	mbre_free_registers(pregs);
	efree(pregs);
	mbre_free_pattern(pr);
	efree(pr);
	memory_device_output('\0', &device);
	return (device.buffer);
}

static void _php3_mberegreplace(INTERNAL_FUNCTION_PARAMETERS, int icase)
{
	pval *arg_pattern,
		*arg_replace,
		*arg_string;
	char *pattern;
	char *string;
	char *replace;
	char *ret;
	TLS_VARS;
	
	if (ARG_COUNT(ht) != 3 || getParameters(ht, 3, &arg_pattern, &arg_replace, &arg_string) == FAILURE) {
		WRONG_PARAM_COUNT;
	}

	if (arg_pattern->type == IS_STRING) {
		if (arg_pattern->value.str.val && arg_pattern->value.str.len)
			pattern = estrndup(arg_pattern->value.str.val,arg_pattern->value.str.len);
		else
			pattern = empty_string;
	} else {
		convert_to_long(arg_pattern);
		pattern = emalloc(2);
		pattern[0] = (char) arg_pattern->value.lval;
		pattern[1] = '\0';
	}

	if (arg_replace->type == IS_STRING) {
		if (arg_replace->value.str.val && arg_replace->value.str.len)
			replace = estrndup(arg_replace->value.str.val, arg_replace->value.str.len);
		else
			replace = empty_string;
	} else {
		convert_to_long(arg_replace);
		replace = emalloc(2);
		replace[0] = (char) arg_replace->value.lval;
		replace[1] = '\0';
	}

	convert_to_string(arg_string);
	if (arg_string->value.str.val && arg_string->value.str.len)
		string = estrndup(arg_string->value.str.val, arg_string->value.str.len);
	else
		string = empty_string;

	/* do the actual work */
	ret = _php3_mbregreplace(pattern, replace, string, icase, 1);
	if (ret == (char *) -1) {
		RETVAL_FALSE;
	} else {
		RETVAL_STRING(ret,1);
		STR_FREE(ret);
	}
	STR_FREE(string);
	STR_FREE(replace);
	STR_FREE(pattern);
}

/* {{{ proto string mbereg_replace(string pattern, string replacement, string string)
   Replace regular expression for multibyte string */
static void php3_mberegreplace(INTERNAL_FUNCTION_PARAMETERS)
{
	_php3_mberegreplace(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
}
/* }}} */

/* {{{ proto string embregi_replace(string pattern, string replacement, string string)
   Case insensitive replace regular expression for multibyte string */
static void php3_mberegireplace(INTERNAL_FUNCTION_PARAMETERS)
{
	_php3_mberegreplace(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
}
/* }}} */


/* {{{ proto array split(string pattern, string string [, int limit])
   split multibyte string into array by regular expression */
static void php3_mbsplit(INTERNAL_FUNCTION_PARAMETERS)
{
	pval *spliton, *str, *arg_count = NULL;
	mb_regex_t *pr;
	struct re_registers *pregs = NULL;
	char *pstr, *err_str;
	int err, count, string_len, pos;
	TLS_VARS;

	switch (ARG_COUNT(ht)) {
	case 2:
		if (getParameters(ht, 2, &spliton, &str) == FAILURE)
			WRONG_PARAM_COUNT;
		count = -1;
		break;
	case 3:
		if (getParameters(ht, 3, &spliton, &str, &arg_count) == FAILURE)
			WRONG_PARAM_COUNT;
		convert_to_long(arg_count);
		count = arg_count->value.lval;
		break;
	default:
		WRONG_PARAM_COUNT;
	}

	if (array_init(return_value) == FAILURE) {
		RETURN_FALSE;
	}

	convert_to_string(spliton);
	convert_to_string(str);

	/* create regex pattern buffer */
	pr = (mb_regex_t*)ecalloc(1, sizeof(mb_regex_t));
	pr->buffer = (char*)emalloc(16*sizeof(char));
	pr->allocated = 16;
	pr->fastmap = (char*)emalloc(256*sizeof(char));
	pr->options |= RE_OPTION_EXTENDED;
	err_str = mbre_compile_pattern(spliton->value.str.val, spliton->value.str.len, pr);
	if (err_str) {
		php3_error(E_WARNING, "mbregex compile err: %s", err_str);
		mbre_free_pattern(pr);
		efree(pr);
		RETURN_FALSE;
	}

	pregs = (struct re_registers*)ecalloc(1, sizeof(struct re_registers));
	pstr = str->value.str.val;
	string_len = str->value.str.len;
	pos = 0;
	err = 0;
	/* churn through str, generating array entries as we go */
	while ((count == -1 || count > 1) &&
	       (err = mbre_search(pr, pstr, string_len, pos, string_len - pos, pregs)) >= 0) {
		if (pregs->beg[0] == pos) {
			/* match is at start of string, return empty string */
			add_next_index_stringl(return_value, empty_string, 0, 1);
		} else {
			/* On a real match */
			/* add it to the array */
			add_next_index_stringl(return_value, &pstr[pos], pregs->beg[0] - pos, 1);
		}
		/* point at our new starting point */
		pos = pregs->end[0];
		/* if we're only looking for a certain number of points,
		   stop looking once we hit it */
		if (count != -1) count--;
	}

	/* see if we encountered an error */
	if (err <= -2) {
		php3_error(E_WARNING, "mbregex search failure in php3_mbsplit");
		mbre_free_registers(pregs);
		efree(pregs);
		mbre_free_pattern(pr);
		efree(pr);
		RETURN_FALSE;
	}

	/* otherwise we just have one last element to add to the array */
	add_next_index_stringl(return_value, &pstr[pos], string_len - pos, 1);

	mbre_free_registers(pregs);
	efree(pregs);
	mbre_free_pattern(pr);
	efree(pr);
}
/* }}} */
#endif	/* HAVE_MBREGEX */

#endif	/* PHP3_I18N */
/* vi:set sw=4 ts=4: */
