#ifdef WIN32 #include #endif #if defined LINUX || defined ANDROID #include // use ICU for conversion #include #endif #include "GLTB/exception.h" #include "GLTB/stringconvert.h" namespace gltb { wchar_t *utf8ToUtf16WChar(const char *input) { #if defined WIN32 int bufferSize=MultiByteToWideChar(CP_UTF8,0,input,-1,NULL,0); if(bufferSize==0 && strlen(input)!=0) { wchar_t *windowsMessage; DWORD lastError = GetLastError(); FormatMessage( FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, lastError, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPTSTR) &windowsMessage, 0, NULL ); /* * Now this is some fine insanity here: having encountered a charset conversion that went wrong, * we now have to rely on another one on a totally unknown string in order to get an error * message onto the screen. */ char *utf8WindowsMessage=utf16ToUtf8Char(windowsMessage); throw Exception("UTF8 to UTF16 string conversion failed; Windows error message claims " + std::string(utf8WindowsMessage),"gltb::utf8ToUtf16WChar()"); } wchar_t *result=new wchar_t[bufferSize]; if(MultiByteToWideChar(CP_UTF8,0,input,-1,result,bufferSize)!=bufferSize) { throw Exception("UTF8 to UTF16 string conversion failed","gltb::utf8ToUtf16WChar()"); } return result; #elif defined LINUX || defined ANDROID int bufferSize=0; UErrorCode errorCode=U_ZERO_ERROR; int numSubs; u_strFromUTF8WithSub(NULL,0,&bufferSize,input,-1,L' ',&numSubs,&errorCode); if(errorCode==U_STRING_NOT_TERMINATED_WARNING) { /* * This is an interesting wrinkle in the behaviour of ICU: the * library happily generates unterminated strings and indicates * that using the error code. So here we are, having fit the * converted string into 0 bytes without terminator. In other * words: the output is the empty string. */ wchar_t *result=new wchar_t[1]; result[0]=0; return result; } else if(errorCode!=U_BUFFER_OVERFLOW_ERROR) { throw Exception("UTF8 to UTF16 string conversion failed while trying to get temporary buffer size, error: " + std::string(u_errorName(errorCode)),"gltb::utf8ToUtf16WChar()"); } UChar *temp=new UChar[bufferSize+1]; memset(temp,0,(bufferSize+1)*sizeof(UChar)); errorCode=U_ZERO_ERROR; u_strFromUTF8WithSub(temp,bufferSize,&bufferSize,input,-1,L' ',&numSubs,&errorCode); if(errorCode>0) { throw Exception("UTF8 to UTF16 string conversion failed, error: " + std::string(u_errorName(errorCode)),"gltb::utf8ToUtf16WChar()"); } bufferSize=0; errorCode=U_ZERO_ERROR; u_strToWCS(NULL,0,&bufferSize,temp,-1,&errorCode); if(errorCode!=U_BUFFER_OVERFLOW_ERROR) { throw Exception("UTF8 to UTF16 string conversion failed while trying to get output buffer size, error: " + std::string(u_errorName(errorCode)),"gltb::utf8ToUtf16WChar()"); } wchar_t *result=new wchar_t[bufferSize+1]; memset(result,0,(bufferSize+1)*sizeof(wchar_t)); errorCode=U_ZERO_ERROR; u_strToWCS(result,bufferSize,&bufferSize,temp,-1,&errorCode); if(errorCode>0) { throw Exception("UTF8 to UTF16 string conversion failed, error: " + std::string(u_errorName(errorCode)),"gltb::utf8ToUtf16WChar()"); } delete[] temp; return result; #else #error unimplemented function on this target platform #endif } char *utf16ToUtf8Char(const wchar_t *input) { #ifdef WIN32 int bufferSize=WideCharToMultiByte(CP_UTF8,0,input,-1,NULL,0,NULL,NULL); if(bufferSize==0 && wcslen(input)!=0) { wchar_t *windowsMessage; DWORD lastError = GetLastError(); FormatMessage( FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, lastError, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPTSTR) &windowsMessage, 0, NULL ); /* * Now this is some fine insanity here: having encountered a charset conversion that went wrong, * we now have to rely on another one on a totally unknown string in order to get an error * message onto the screen. */ char *utf8WindowsMessage=utf16ToUtf8Char(windowsMessage); throw Exception("UTF16 to UTF8 string conversion failed; Windows error message claims " + std::string(utf8WindowsMessage),"gltb::utf16ToUtf8Char()"); } char *result=new char[bufferSize]; if(WideCharToMultiByte(CP_UTF8,0,input,-1,result,bufferSize,NULL,NULL)!=bufferSize) { throw Exception("UTF16 to UTF8 string conversion failed","gltb::utf16ToUtf8Char()"); } return result; #elif defined LINUX || defined ANDROID int bufferSize=0; UErrorCode errorCode=U_ZERO_ERROR; u_strFromWCS(NULL,0,&bufferSize,input,-1,&errorCode); if(errorCode==U_STRING_NOT_TERMINATED_WARNING) { /* * This is an interesting wrinkle in the behaviour of ICU: the * library happily generates unterminated strings and indicates * that using the error code. So here we are, having fit the * converted string into 0 bytes without terminator. In other * words: the output is the empty string. */ char *result=new char[1]; result[0]=0; return result; } else if(errorCode!=U_BUFFER_OVERFLOW_ERROR) { throw Exception("UTF16 to UTF8 string conversion failed while trying to get temporary buffer size, error: " + std::string(u_errorName(errorCode)),"gltb::utf16ToUtf8Char()"); } UChar *temp=new UChar[bufferSize+1]; memset(temp,0,(bufferSize+1)*sizeof(UChar)); errorCode=U_ZERO_ERROR; u_strFromWCS(temp,bufferSize,&bufferSize,input,-1,&errorCode); if(errorCode>0) { throw Exception("UTF16 to UTF8 string conversion failed, error: " + std::string(u_errorName(errorCode)),"gltb::utf16ToUtf8har()"); } bufferSize=0; errorCode=U_ZERO_ERROR; u_strToUTF8(NULL,0,&bufferSize,temp,-1,&errorCode); if(errorCode!=U_BUFFER_OVERFLOW_ERROR) { throw Exception("UTF16 to UTF8 string conversion failed while trying to get output buffer size, error: " + std::string(u_errorName(errorCode)),"gltb::utf16ToUtf8Char()"); } char *result=new char[bufferSize+1]; memset(result,0,bufferSize+1); errorCode=U_ZERO_ERROR; u_strToUTF8(result,bufferSize,&bufferSize,temp,-1,&errorCode); if(errorCode>0) { throw Exception("UTF16 to UTF8 string conversion failed, error: " + std::string(u_errorName(errorCode)),"gltb::utf16ToUtf8har()"); } delete[] temp; return result; #else #error unimplemented function on this target platform #endif } std::wstring utf8ToUtf16WString(std::string input) { wchar_t *result=utf8ToUtf16WChar(input.c_str()); std::wstring resultStr=result; delete[] result; return resultStr; } std::string utf16ToUtf8String(std::wstring input) { char *result=utf16ToUtf8Char(input.c_str()); std::string resultStr=result; delete[] result; return resultStr; } }