From 6d59653e825f687d28763fd8ff9141b2cff246b0 Mon Sep 17 00:00:00 2001 From: Cloud Wu Date: Wed, 8 May 2019 10:10:20 +0800 Subject: [PATCH] Unicode: full Unicode Support (6 squashed commits) (#2541, #2538) fix build for WideCharToMultiByte [3181ff1e] Full Unicode Support [6c9e73ac] Fix ImTextCountUtf8BytesFromChar and ImTextCharToUtf8, these APIs assume the input is an unicode code point, not UTF-16 [ba85665b] Add AddInputCharacterUTF16 for windows backend to handle WM_CHAR [fafdcaf0] Use Windows API to convert UTF-16 for ImFileOpen [dc7d5925] Use windows API to convert UTF-16 for clipboard --- examples/imgui_impl_win32.cpp | 3 +- imconfig.h | 3 ++ imgui.cpp | 82 ++++++++++++++++++++++------------- imgui.h | 14 +++++- imgui_draw.cpp | 3 +- 5 files changed, 70 insertions(+), 35 deletions(-) diff --git a/examples/imgui_impl_win32.cpp b/examples/imgui_impl_win32.cpp index 449922f57..8b1c80876 100644 --- a/examples/imgui_impl_win32.cpp +++ b/examples/imgui_impl_win32.cpp @@ -324,7 +324,8 @@ IMGUI_IMPL_API LRESULT ImGui_ImplWin32_WndProcHandler(HWND hwnd, UINT msg, WPARA return 0; case WM_CHAR: // You can also use ToAscii()+GetKeyboardState() to retrieve characters. - io.AddInputCharacter((unsigned int)wParam); + if (wParam > 0 && wParam < 0x10000) + io.AddInputCharacterUTF16((unsigned short)wParam); return 0; case WM_SETCURSOR: if (LOWORD(lParam) == HTCLIENT && ImGui_ImplWin32_UpdateMouseCursor()) diff --git a/imconfig.h b/imconfig.h index 4f629795b..4f01b7781 100644 --- a/imconfig.h +++ b/imconfig.h @@ -77,6 +77,9 @@ // Read about ImGuiBackendFlags_RendererHasVtxOffset for details. //#define ImDrawIdx unsigned int +//---- Use 32-bit for ImWchar (default is 16-bit) to support full unicode code points. +//#define ImWchar ImWchar32 + //---- Override ImDrawCallback signature (will need to modify renderer back-ends accordingly) //struct ImDrawList; //struct ImDrawCmd; diff --git a/imgui.cpp b/imgui.cpp index 2a7219239..9e7ac21c5 100644 --- a/imgui.cpp +++ b/imgui.cpp @@ -1098,13 +1098,35 @@ void ImGuiIO::AddInputCharacter(unsigned int c) InputQueueCharacters.push_back((ImWchar)c); } +// UTF16 string use Surrogate to encode unicode > 0x10000, so we should save the Surrogate. +void ImGuiIO::AddInputCharacterUTF16(ImWchar16 c) +{ + if (c >= 0xD800 && c <= 0xDBFF) + { + Surrogate = c; + } + else + { + ImWchar cp = c; + if (c >= 0xDC00 && c <= 0xDFFF) + { + if (sizeof(ImWchar) == 2) + cp = IM_UNICODE_CODEPOINT_INVALID; + else + cp = ((ImWchar)(Surrogate - 0xD800) << 10) + (c - 0xDC00) + 0x10000; + Surrogate = 0; + } + InputQueueCharacters.push_back(cp); + } +} + void ImGuiIO::AddInputCharactersUTF8(const char* utf8_chars) { while (*utf8_chars != 0) { unsigned int c = 0; utf8_chars += ImTextCharFromUtf8(&c, utf8_chars, NULL); - if (c > 0 && c <= IM_UNICODE_CODEPOINT_MAX) + if (c > 0) InputQueueCharacters.push_back((ImWchar)c); } } @@ -1488,13 +1510,13 @@ ImFileHandle ImFileOpen(const char* filename, const char* mode) { #if defined(_WIN32) && !defined(IMGUI_DISABLE_WIN32_FUNCTIONS) && !defined(__CYGWIN__) && !defined(__GNUC__) // We need a fopen() wrapper because MSVC/Windows fopen doesn't handle UTF-8 filenames. - const int filename_wsize = ImTextCountCharsFromUtf8(filename, NULL) + 1; - const int mode_wsize = ImTextCountCharsFromUtf8(mode, NULL) + 1; + const int filename_wsize = ::MultiByteToWideChar(CP_UTF8, 0, filename, -1, NULL, 0); + const int mode_wsize = ::MultiByteToWideChar(CP_UTF8, 0, mode, -1, NULL, 0); ImVector buf; buf.resize(filename_wsize + mode_wsize); - ImTextStrFromUtf8(&buf[0], filename_wsize, filename, NULL); - ImTextStrFromUtf8(&buf[filename_wsize], mode_wsize, mode, NULL); - return _wfopen((wchar_t*)&buf[0], (wchar_t*)&buf[filename_wsize]); + ::MultiByteToWideChar(CP_UTF8, 0, filename, -1, &buf[0], filename_wsize); + ::MultiByteToWideChar(CP_UTF8, 0, mode, -1,&buf[filename_wsize], mode_wsize); + return _wfopen(&buf[0], &buf[filename_wsize]); #else return fopen(filename, mode); #endif @@ -1606,6 +1628,8 @@ int ImTextCharFromUtf8(unsigned int* out_char, const char* in_text, const char* c += (*str++ & 0x3f); // utf-8 encodings of values used in surrogate pairs are invalid if ((c & 0xFFFFF800) == 0xD800) return 4; + // If ImWchar is 16bit, use replacement character U+FFFD instead + if (sizeof(ImWchar) == 2 && c >= 0x10000) c = IM_UNICODE_CODEPOINT_INVALID; *out_char = c; return 4; } @@ -1623,8 +1647,7 @@ int ImTextStrFromUtf8(ImWchar* buf, int buf_size, const char* in_text, const cha in_text += ImTextCharFromUtf8(&c, in_text, in_text_end); if (c == 0) break; - if (c <= IM_UNICODE_CODEPOINT_MAX) // FIXME: Losing characters that don't fit in 2 bytes - *buf_out++ = (ImWchar)c; + *buf_out++ = (ImWchar)c; } *buf_out = 0; if (in_text_remaining) @@ -1641,8 +1664,7 @@ int ImTextCountCharsFromUtf8(const char* in_text, const char* in_text_end) in_text += ImTextCharFromUtf8(&c, in_text, in_text_end); if (c == 0) break; - if (c <= IM_UNICODE_CODEPOINT_MAX) - char_count++; + char_count++; } return char_count; } @@ -1662,11 +1684,15 @@ static inline int ImTextCharToUtf8(char* buf, int buf_size, unsigned int c) buf[1] = (char)(0x80 + (c & 0x3f)); return 2; } - if (c >= 0xdc00 && c < 0xe000) + if (c < 0x10000) { - return 0; + if (buf_size < 3) return 0; + buf[0] = (char)(0xe0 + (c >> 12)); + buf[1] = (char)(0x80 + ((c>> 6) & 0x3f)); + buf[2] = (char)(0x80 + ((c ) & 0x3f)); + return 3; } - if (c >= 0xd800 && c < 0xdc00) + if (c <= 0x10FFFF) { if (buf_size < 4) return 0; buf[0] = (char)(0xf0 + (c >> 18)); @@ -1675,14 +1701,8 @@ static inline int ImTextCharToUtf8(char* buf, int buf_size, unsigned int c) buf[3] = (char)(0x80 + ((c ) & 0x3f)); return 4; } - //else if (c < 0x10000) - { - if (buf_size < 3) return 0; - buf[0] = (char)(0xe0 + (c >> 12)); - buf[1] = (char)(0x80 + ((c>> 6) & 0x3f)); - buf[2] = (char)(0x80 + ((c ) & 0x3f)); - return 3; - } + // Invalid code point, the max unicode is 0x10FFFF + return 0; } // Not optimal but we very rarely use this function. @@ -1696,8 +1716,8 @@ static inline int ImTextCountUtf8BytesFromChar(unsigned int c) { if (c < 0x80) return 1; if (c < 0x800) return 2; - if (c >= 0xdc00 && c < 0xe000) return 0; - if (c >= 0xd800 && c < 0xdc00) return 4; + if (c < 0x10000) return 3; + if (c <= 0x10FFFF) return 4; return 3; } @@ -9748,6 +9768,7 @@ static void WindowSettingsHandler_WriteAll(ImGuiContext* ctx, ImGuiSettingsHandl #else #include #endif +#include #if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_APP) // UWP doesn't have Win32 functions #define IMGUI_DISABLE_WIN32_DEFAULT_CLIPBOARD_FUNCTIONS #define IMGUI_DISABLE_WIN32_DEFAULT_IME_FUNCTIONS @@ -9760,6 +9781,7 @@ static void WindowSettingsHandler_WriteAll(ImGuiContext* ctx, ImGuiSettingsHandl #ifdef _MSC_VER #pragma comment(lib, "user32") +#pragma comment(lib, "kernel32") #endif // Win32 clipboard implementation @@ -9775,11 +9797,11 @@ static const char* GetClipboardTextFn_DefaultImpl(void*) ::CloseClipboard(); return NULL; } - if (ImWchar* wbuf_global = (ImWchar*)::GlobalLock(wbuf_handle)) + if (const WCHAR* wbuf_global = (const WCHAR*)::GlobalLock(wbuf_handle)) { - int buf_len = ImTextCountUtf8BytesFromStr(wbuf_global, NULL) + 1; + int buf_len = ::WideCharToMultiByte(CP_UTF8, 0, wbuf_global, -1, NULL, 0, NULL, NULL); buf_local.resize(buf_len); - ImTextStrToUtf8(buf_local.Data, buf_len, wbuf_global, NULL); + ::WideCharToMultiByte(CP_UTF8, 0, wbuf_global, -1, buf_local.Data, buf_len, NULL, NULL); } ::GlobalUnlock(wbuf_handle); ::CloseClipboard(); @@ -9790,15 +9812,15 @@ static void SetClipboardTextFn_DefaultImpl(void*, const char* text) { if (!::OpenClipboard(NULL)) return; - const int wbuf_length = ImTextCountCharsFromUtf8(text, NULL) + 1; - HGLOBAL wbuf_handle = ::GlobalAlloc(GMEM_MOVEABLE, (SIZE_T)wbuf_length * sizeof(ImWchar)); + const int wbuf_length = ::MultiByteToWideChar(CP_UTF8, 0, text, -1, NULL, 0); + HGLOBAL wbuf_handle = ::GlobalAlloc(GMEM_MOVEABLE, (SIZE_T)wbuf_length * sizeof(WCHAR)); if (wbuf_handle == NULL) { ::CloseClipboard(); return; } - ImWchar* wbuf_global = (ImWchar*)::GlobalLock(wbuf_handle); - ImTextStrFromUtf8(wbuf_global, wbuf_length, text, NULL); + WCHAR* wbuf_global = (WCHAR*)::GlobalLock(wbuf_handle); + ::MultiByteToWideChar(CP_UTF8, 0, text, -1, wbuf_global, wbuf_length); ::GlobalUnlock(wbuf_handle); ::EmptyClipboard(); if (::SetClipboardData(CF_UNICODETEXT, wbuf_handle) == NULL) diff --git a/imgui.h b/imgui.h index 1e7b3f335..09160d117 100644 --- a/imgui.h +++ b/imgui.h @@ -143,7 +143,11 @@ struct ImGuiTextFilter; // Helper to parse and apply text filters (e typedef void* ImTextureID; // User data to identify a texture (this is whatever to you want it to be! read the FAQ about ImTextureID in imgui.cpp) #endif typedef unsigned int ImGuiID; // Unique ID used by widgets (typically hashed from a stack of string) -typedef unsigned short ImWchar; // A single U16 character for keyboard input/display. We encode them as multi bytes UTF-8 when used in strings. +#ifndef ImWchar +#define ImWchar ImWchar16 +#endif +typedef unsigned short ImWchar16; // A single U16 character for keyboard input/display. We encode them as multi bytes UTF-8 when used in strings. +typedef int ImWchar32; // A single 32bit character for keyboard input/display, define ImWchar to ImWchar32 to use it. See imconfig.h . typedef int ImGuiCol; // -> enum ImGuiCol_ // Enum: A color identifier for styling typedef int ImGuiCond; // -> enum ImGuiCond_ // Enum: A condition for many Set*() functions typedef int ImGuiDataType; // -> enum ImGuiDataType_ // Enum: A primary data type @@ -1465,6 +1469,7 @@ struct ImGuiIO // Functions IMGUI_API void AddInputCharacter(unsigned int c); // Queue new character input + IMGUI_API void AddInputCharacterUTF16(ImWchar16 c); // Queue new character input from an UTF-16 character, it can be a surrogate IMGUI_API void AddInputCharactersUTF8(const char* str); // Queue new characters input from an UTF-8 string IMGUI_API void ClearInputCharacters(); // Clear the text input buffer manually @@ -1507,6 +1512,7 @@ struct ImGuiIO float KeysDownDurationPrev[512]; // Previous duration the key has been down float NavInputsDownDuration[ImGuiNavInput_COUNT]; float NavInputsDownDurationPrev[ImGuiNavInput_COUNT]; + ImWchar16 Surrogate; // For AddInputCharacterUTF16 ImVector InputQueueCharacters; // Queue of _characters_ input (obtained by platform back-end). Fill using AddInputCharacter() helper. IMGUI_API ImGuiIO(); @@ -2092,7 +2098,11 @@ struct ImFontGlyphRangesBuilder ImVector UsedChars; // Store 1-bit per Unicode code point (0=unused, 1=used) ImFontGlyphRangesBuilder() { Clear(); } - inline void Clear() { int size_in_bytes = (IM_UNICODE_CODEPOINT_MAX+1) / 8; UsedChars.resize(size_in_bytes / (int)sizeof(ImU32)); memset(UsedChars.Data, 0, (size_t)size_in_bytes); } + inline void Clear() + { + int MaxUnicode = sizeof(ImWchar) == 2 ? 0x10000 : 0x110000; + UsedChars.resize(MaxUnicode / sizeof(int)); memset(UsedChars.Data, 0, MaxUnicode / sizeof(int)); + } inline bool GetBit(int n) const { int off = (n >> 5); ImU32 mask = 1u << (n & 31); return (UsedChars[off] & mask) != 0; } // Get bit n in the array inline void SetBit(int n) { int off = (n >> 5); ImU32 mask = 1u << (n & 31); UsedChars[off] |= mask; } // Set bit n in the array inline void AddChar(ImWchar c) { SetBit(c); } // Add character diff --git a/imgui_draw.cpp b/imgui_draw.cpp index 69d18945a..28f77281b 100644 --- a/imgui_draw.cpp +++ b/imgui_draw.cpp @@ -2550,8 +2550,7 @@ void ImFontGlyphRangesBuilder::AddText(const char* text, const char* text_end) text += c_len; if (c_len == 0) break; - if (c <= IM_UNICODE_CODEPOINT_MAX) - AddChar((ImWchar)c); + AddChar((ImWchar)c); } }