From 5b7ea503a70284e6d621dede65390fbe0125f01a Mon Sep 17 00:00:00 2001
From: shatty <shatty@nowhere.fake>
Date: Wed, 13 Aug 2003 05:38:07 +0000
Subject: [PATCH] refine the error handling behavior.  note: we depart from the
 bebook specification for returning B_ERROR when no characters are converted. 
 we do this in exactly one situation: when there are no bytes in the input. 
 this behavior is the behavior given by the R5 libs themselves.  not having
 this behavior caused an error in our stylededit as well.  stylededit has been
 fixed to not exercise this functionality.  also added in the two most popular
 chinese encodings for my own evil purposes.  GB18030 support is required to
 legally sell an operating system in mainland china as well.  GB18030 support
 encompasses GBK and GB2312, additionally.

git-svn-id: file:///srv/svn/repos/haiku/trunk/current@4276 a95241bf-73f2-0310-859d-f6bbb57e9c96
---
 src/kits/support/Jamfile              |  1 +
 src/kits/support/character_sets.cpp   |  9 +++++++++
 src/kits/support/utf8_conversions.cpp | 28 +++++++++++++++++++++++----
 3 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/src/kits/support/Jamfile b/src/kits/support/Jamfile
index 04551b9dac..029eccb8fe 100644
--- a/src/kits/support/Jamfile
+++ b/src/kits/support/Jamfile
@@ -8,6 +8,7 @@ SharedLibrary textencoding :
 	character_sets.cpp
 	utf8_conversions.cpp
 	;
+DEPENDS libtextencoding.so : libiconv.so ;
 LinkSharedOSLibs libtextencoding.so :
 	be
 	iconv
diff --git a/src/kits/support/character_sets.cpp b/src/kits/support/character_sets.cpp
index a8ef008425..34e3a194bc 100644
--- a/src/kits/support/character_sets.cpp
+++ b/src/kits/support/character_sets.cpp
@@ -104,6 +104,14 @@ const char * iso15aliases[] =
  { "ISO_8859-14","Latin-9",NULL };
 const BCharacterSet iso15(24,111,"ISO 8859-15","ISO-8859-15","ISO-8859-15",iso15aliases);
 
+// chinese character set testing
+
+const char * big5aliases[] =
+ { "csBig5",NULL };
+const BCharacterSet big5(25,2026,"Big5","Big5","Big5",big5aliases);
+
+const BCharacterSet gb18030(26,114,"GB18030","GB18030",NULL,NULL);
+
 /**
  * The following initializes the global character set array.
  * It is organized by id for efficient retrieval using predefined constants in UTF8.h and Font.h.
@@ -122,6 +130,7 @@ const BCharacterSet * character_sets_by_id[] = {
 	&windows1252, &unicode2, &KOI8R, &windows1251,
 	&IBM866, &IBM437, &eucKR, &iso13, &iso14, &iso15,
 	// R5 convert_to/from_utf8 encodings end here
+	&big5,&gb18030,
 };
 const uint32 character_sets_by_id_count = sizeof(character_sets_by_id)/sizeof(const BCharacterSet*);
 
diff --git a/src/kits/support/utf8_conversions.cpp b/src/kits/support/utf8_conversions.cpp
index de982994a5..e322828695 100644
--- a/src/kits/support/utf8_conversions.cpp
+++ b/src/kits/support/utf8_conversions.cpp
@@ -15,6 +15,10 @@ convert_encoding(const char * from, const char * to,
                  char * dst, int32 * dstLen,
                  int32 * state)
 {
+	if (*srcLen == 0) {
+		// nothing to do!
+		return B_OK;
+	}
 	iconv_t conversion = iconv_open(to,from);
 	if (conversion == (iconv_t)-1) {
 		return B_ERROR;
@@ -28,14 +32,30 @@ convert_encoding(const char * from, const char * to,
 	input_buffer_t inputBuffer = const_cast<input_buffer_t>(&src);
 	size_t inputLeft = *srcLen;
 	size_t outputLeft = *dstLen;
-	size_t bytesLeft = iconv(conversion,inputBuffer,&inputLeft,&dst,&outputLeft);
+	size_t nonReversibleConversions = iconv(conversion,inputBuffer,&inputLeft,&dst,&outputLeft);
 	*srcLen -= inputLeft;
 	*dstLen -= outputLeft;
-	if ((bytesLeft != 0) && (errno != E2BIG) && (errno != EINVAL)) {
+	iconv_close(conversion);
+	if (nonReversibleConversions == -1) {
+		switch (errno) {
+		case EILSEQ: // invalid multibyte sequence in the source
+			return B_ERROR;
+		case EINVAL: // incomplete multibyte sequence in the input
+			return B_OK;
+		case E2BIG: // not enough room in the output buffer for the next converted character
+			return B_OK;
+		default:
+			// unknown error
+			int err = errno;
+		}
+	}
+	if (*srcLen != 0) {
+		// able to convert at least one character
+		return B_OK;
+	} else {
+		// not able to convert at least one character
 		return B_ERROR;
 	}
-	iconv_close(conversion);
-	return B_OK;
 }
 
 status_t