module std.windows.charset; private import std.c.windows.windows; private import std.windows.syserror; private import std.utf; private import std.string; // If non-zero, the application should use the W versions of Windows API // functions and std.utf.toUTF16z and toUTF8, rather than toMBSz and // fromMBSz. int useWfuncs = 1; static this() { // Win 95, 98, ME do not implement the W functions // TODO: detect MSLU? useWfuncs = (GetVersion() < 0x80000000); } // Converts the UTF-8 string s into a null-terminated string in a Windows // 8-bit character set. // // codePage may be the number of any codepage, or // 0 - ANSI // 1 - OEM // 2 - Mac char* toMBSz(char[] s, uint codePage = 0) { // Only need to do this if any chars have the high bit set foreach (char c; s) { if (c >= 0x80) { char[] result; int readLen; wchar* ws = std.utf.toUTF16z(s); result.length = WideCharToMultiByte(codePage, 0, ws, -1, null, 0, null, null); if (result.length == 0) { throw new Exception("Couldn't convert string: " ~ sysErrorString(GetLastError())); } readLen = WideCharToMultiByte(codePage, 0, ws, -1, result.ptr, result.length, null, null); if (readLen != result.length) { throw new Exception("Couldn't convert string: " ~ sysErrorString(GetLastError())); } return result; } } return std.string.toStringz(s); } // Converts the null-terminated string s from a Windows 8-bit character set // into a UTF-8 D string. // // codePage may be the number of any codepage, or // 0 - ANSI // 1 - OEM // 2 - Mac char[] fromMBSz(char* s, int codePage = 0) { char* c; for (c = s; *c != 0; c++) { if (*c >= 0x80) { wchar[] result; int readLen; result.length = MultiByteToWideChar(codePage, 0, s, -1, null, 0); if (result.length == 0) { throw new Exception("Couldn't convert string: " ~ sysErrorString(GetLastError())); } readLen = MultiByteToWideChar(codePage, 0, s, -1, result.ptr, result.length); if (readLen != result.length) { throw new Exception("Couldn't convert string: " ~ sysErrorString(GetLastError())); } return std.utf.toUTF8(result[0..$-1]); // omit trailing null } } return s[0..c-s]; } // Converts a null-terminated UTF-16 string into a UTF-8 D string. // // TODO move to std.utf, where it would better belong char[] toUTF8(wchar* s) { return std.utf.toUTF8(s[0..std.string.wcslen(s)]); } /+ + This is the test code I used. I'm not sure if it would be practical to + make this into a unittest block, since in general we can't guarantee that + the user's codepage has a given character. + void main() { printf("ANSI codepage: %u\n", GetACP()); printf("OEM codepage: %u\n", GetOEMCP()); test("Hello!"); test("Crème brulée £1.10"); } void test(char[] text) { char* mbsz = toMBSz(text); char[] back = fromMBSz(mbsz); printf("Original: %.*s\n", text); // the one last thing printf is good for! printf("ANSI: %s\n", mbsz); printf("Back from ANSI: %.*s\n", back); mbsz = toMBSz(text, 1); back = fromMBSz(mbsz, 1); printf("OEM: %s\n", mbsz); printf("Back from OEM: %.*s\n", back); } +/