From cf45491bc856144a539b63155a59e4969346825e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Axel=20D=C3=B6rfler?= <axeld@pinc-software.de>
Date: Fri, 10 Feb 2006 12:00:38 +0000
Subject: [PATCH] Unlike what shatty wrote in his commit messages,
 convert_from_utf8() does return B_OK here when called with an empty string.
 Since this is the natural thing to expect anyway, I changed our version to
 return a sane value here as well - if this causes an error within our
 StyledEdit, as shatty writes, we should just fix our StyledEdit. Cleanup to
 match our style guides a bit better, added license.

git-svn-id: file:///srv/svn/repos/haiku/haiku/trunk@16328 a95241bf-73f2-0310-859d-f6bbb57e9c96
---
 src/kits/textencoding/utf8_conversions.cpp | 174 +++++++++++----------
 1 file changed, 93 insertions(+), 81 deletions(-)

diff --git a/src/kits/textencoding/utf8_conversions.cpp b/src/kits/textencoding/utf8_conversions.cpp
index 08b24d774f..485ba9235d 100644
--- a/src/kits/textencoding/utf8_conversions.cpp
+++ b/src/kits/textencoding/utf8_conversions.cpp
@@ -1,122 +1,129 @@
-#include <UTF8.h>
-#include <iconv.h>
+/*
+ * Copyright 2003-2006, Haiku, Inc. All Rights Reserved.
+ * Distributed under the terms of the MIT License.
+ *
+ * Authors:
+ *		Andrew Bachmann
+ */
+
+
 #include <CharacterSet.h>
 #include <CharacterSetRoster.h>
-#include <Errors.h>
+#include <UTF8.h>
+
 #include <errno.h>
+#include <iconv.h>
 #include <stdio.h>
-#include <Debug.h>
+
 
 //#define DEBUG_CONV 1
 
 #ifdef DEBUG_CONV
-	#define DEBPRINT(ARGS) printf ARGS;
+#	define DEBPRINT(ARGS) printf ARGS;
 #else
-	#define DEBPRINT(ARGS) ;
+#	define DEBPRINT(ARGS) ;
 #endif
 
 using namespace BPrivate;
 
-typedef char ** input_buffer_t;
+int iconvctl(iconv_t icd, int request, void* argument);
 
-int iconvctl (iconv_t icd, int request, void* argument);
 
 status_t
-convert_encoding(const char * from, const char * to,
-                 const char * src, int32 * srcLen,
-                 char * dst, int32 * dstLen,
-                 int32 * state, char substitute)
+convert_encoding(const char* from, const char* to, const char* src,
+	int32* srcLen, char* dst, int32* dstLen, int32* state,
+	char substitute)
 {
-	status_t status;
 	if (*srcLen == 0) {
 		// nothing to do!
-		DEBPRINT(("nothing to do\n"));
 		*dstLen = 0;
-		return B_ERROR;
+		return B_OK;
 	}
-	iconv_t conversion = iconv_open(to,from);
+
+	iconv_t conversion = iconv_open(to, from);
 	if (conversion == (iconv_t)-1) {
 		DEBPRINT(("iconv_open failed\n"));
 		return B_ERROR;
 	}
-	if ((state == NULL) || (*state == 0)) {
-		iconv(conversion,0,0,0,0);
-	}
-	input_buffer_t inputBuffer = const_cast<input_buffer_t>(&src);
+	if (state == NULL || *state == 0)
+		iconv(conversion, 0, 0, 0, 0);
+
+	char** inputBuffer = const_cast<char**>(&src);
 	size_t inputLeft = *srcLen;
 	size_t outputLeft = *dstLen;
 	do {
-		size_t nonReversibleConversions = iconv(conversion,inputBuffer,&inputLeft,&dst,&outputLeft);
+		size_t nonReversibleConversions = iconv(conversion, inputBuffer,
+			&inputLeft, &dst, &outputLeft);
 		if (nonReversibleConversions == (size_t)-1) {
 			if (errno == E2BIG) {
 				// Not enough room in the output buffer for the next converted character
 				// This is not a "real" error, we just quit out.
 				break;
 			}
+
 			switch (errno) {
-			case EILSEQ: // unable to generate a corresponding character
+				case EILSEQ: // unable to generate a corresponding character
 				{
-				// discard the input character
-				const int one = 1, zero = 0;
-				iconvctl(conversion,ICONV_SET_DISCARD_ILSEQ,(void*)&one);
-				iconv(conversion,inputBuffer,&inputLeft,&dst,&outputLeft);
-				iconvctl(conversion,ICONV_SET_DISCARD_ILSEQ,(void*)&zero);
-				// prepare to convert the substitute character to target encoding
-				char * original = new char[1];
-				original[0] = substitute;
-				size_t len = 1;
-				char * copy = original;
-				// Perform the conversion
-				// We ignore any errors during this as part of robustness/best-effort
-				// We use ISO-8859-1 as a source because it is a single byte encoding
-				// It also overlaps UTF-8 for the lower 128 characters.  It is also
-				// likely to have a mapping to almost any target encoding.
-				iconv_t iso8859_1to = iconv_open(to,"ISO-8859-1");
-				if (iso8859_1to != (iconv_t)-1) {
-					iconv(iso8859_1to,0,0,0,0);
-					iconv(iso8859_1to,const_cast<input_buffer_t>(&copy),&len,&dst,&outputLeft);
-					iconv_close(iso8859_1to);
+					// discard the input character
+					const int one = 1, zero = 0;
+					iconvctl(conversion, ICONV_SET_DISCARD_ILSEQ, (void*)&one);
+					iconv(conversion, inputBuffer, &inputLeft, &dst, &outputLeft);
+					iconvctl(conversion, ICONV_SET_DISCARD_ILSEQ, (void*)&zero);
+
+					// prepare to convert the substitute character to target encoding
+					char* original = new char[1];
+					original[0] = substitute;
+					size_t len = 1;
+					char* copy = original;
+
+					// Perform the conversion
+					// We ignore any errors during this as part of robustness/best-effort
+					// We use ISO-8859-1 as a source because it is a single byte encoding
+					// It also overlaps UTF-8 for the lower 128 characters.  It is also
+					// likely to have a mapping to almost any target encoding.
+					iconv_t iso8859_1to = iconv_open(to,"ISO-8859-1");
+					if (iso8859_1to != (iconv_t)-1) {
+						iconv(iso8859_1to, 0, 0, 0, 0);
+						iconv(iso8859_1to, const_cast<char**>(&copy), &len, &dst,
+							&outputLeft);
+						iconv_close(iso8859_1to);
+					}
+					delete original;
+					break;
 				}
-				delete original;
-				}
-				break;
-			case EINVAL: // incomplete multibyte sequence in the input
-				// we just eat bad bytes, as part of robustness/best-effort
-				inputBuffer++;
-				inputLeft--;
-				break;
-			default:
-				// unknown error, completely bail
-				status = errno;
-				iconv_close(conversion);
-				return status;
+
+				case EINVAL: // incomplete multibyte sequence in the input
+					// we just eat bad bytes, as part of robustness/best-effort
+					inputBuffer++;
+					inputLeft--;
+					break;
+
+				default:
+					// unknown error, completely bail
+					status_t status = errno;
+					iconv_close(conversion);
+					return status;
 			}
 		}
-	} while ((inputLeft > 0) && (outputLeft > 0));
+	} while (inputLeft > 0 && outputLeft > 0);
+
 	*srcLen -= inputLeft;
 	*dstLen -= outputLeft;
 	iconv_close(conversion);
-	if (*srcLen != 0) {
-		// able to convert at least one character
-		DEBPRINT(("able to convert at least one character\n"));
-		return B_OK;
-	} else {
-		// not able to convert at least one character
-		DEBPRINT(("not able to convert at least one character\n"));
-		return B_ERROR;
-	}
+
+	return B_OK;
 }
 
+
 status_t
-convert_to_utf8(uint32 srcEncoding,
-                const char * src, int32 * srcLen, 
-                char * dst, int32 * dstLen,
-                int32 * state, char substitute)
+convert_to_utf8(uint32 srcEncoding, const char* src, int32* srcLen,
+	char* dst, int32* dstLen, int32* state, char substitute)
 {
-	const BCharacterSet * charset = BCharacterSetRoster::GetCharacterSetByConversionID(srcEncoding);
-	if (charset == 0) {
+	const BCharacterSet* charset = BCharacterSetRoster::GetCharacterSetByConversionID(
+		srcEncoding);
+	if (charset == NULL)
 		return B_ERROR;
-	}
+
 #if DEBUG_CONV
 	fprintf(stderr, "convert_to_utf8(%s) : \"", charset->GetName());
 	for (int i = 0 ; i < *srcLen ; i++) {
@@ -124,19 +131,21 @@ convert_to_utf8(uint32 srcEncoding,
 	}
 	fprintf(stderr, "\"\n");
 #endif
-	return convert_encoding(charset->GetName(),"UTF-8",src,srcLen,dst,dstLen,state,substitute);
+
+	return convert_encoding(charset->GetName(), "UTF-8", src, srcLen,
+		dst, dstLen, state, substitute);
 }
 
+
 status_t
-convert_from_utf8(uint32 dstEncoding,
-                  const char * src, int32 * srcLen, 
-                  char * dst, int32 * dstLen,
-                  int32 * state, char substitute)
+convert_from_utf8(uint32 dstEncoding, const char* src, int32* srcLen,
+	char* dst, int32* dstLen, int32* state, char substitute)
 {
-	const BCharacterSet * charset = BCharacterSetRoster::GetCharacterSetByConversionID(dstEncoding);
-	if (charset == 0) {
+	const BCharacterSet* charset = BCharacterSetRoster::GetCharacterSetByConversionID(
+		dstEncoding);
+	if (charset == NULL)
 		return B_ERROR;
-	}
+
 #if DEBUG_CONV
 	fprintf(stderr, "convert_from_utf8(%s) : \"", charset->GetName());
 	for (int i = 0 ; i < *srcLen ; i++) {
@@ -144,5 +153,8 @@ convert_from_utf8(uint32 dstEncoding,
 	}
 	fprintf(stderr, "\"\n");
 #endif
-	return convert_encoding("UTF-8",charset->GetName(),src,srcLen,dst,dstLen,state,substitute);
+
+	return convert_encoding("UTF-8", charset->GetName(), src, srcLen,
+		dst, dstLen, state, substitute);
 }
+