February 09, 2005
When calculating padding, doFormat counts the number of bytes in the string, not hte number of characters. UTF-8 strings containing multibyte characters are padded wrong.

A quick and dirty fix is to apply the following to format.d:

141c141
<           int padding = field_width - (strlen(prefix) + s.length);
---
>           int padding = field_width - (strlen(prefix) + toUTF32(s).length);

Another better solution to add functions to std.utf for counting the number of characters in a string. This is slightly faster and avoids unnecessary memory allocation.

One possible way to do this (for UTF-8) follows below. It's basically a stripped
down version of decode().

# int countChars(char[] s)
# {
#   size_t len = s.length;
#   size_t chars, i;
#   for( i = 0; i != len;)
#   {
#       char u = s[i];
#
#       if (u & 0x80)
#         {
#           uint n;
#           char u2;
#
#           // Check for valid encodings
#           for (n = 1; ; n++)
#             {
#               if (n > 4)
#                 goto Lerr;          // only do the first 4 of 6 encodings
#
#               if (((u << n) & 0x80) == 0)
#                 {
#                   if (n == 1)
#                     goto Lerr;
#                   break;
#                 }
#             }
#
#           if (i + (n - 1) >= len)
#             goto Lerr;                      // off end of string
#
#           u2 = s[i + 1];
#           if ((u & 0xFE) == 0xC0 ||
#             (u == 0xE0 && (u2 & 0xE0) == 0x80) ||
#             (u == 0xF0 && (u2 & 0xF0) == 0x80) ||
#             (u == 0xF8 && (u2 & 0xF8) == 0x80) ||
#             (u == 0xFC && (u2 & 0xFC) == 0x80))
#               goto Lerr;                      // overlong combination
#
#           for (uint j = 1; j != n; j++)
#           {
#               u = s[i + j];
#               if ((u & 0xC0) != 0x80)
#                 goto Lerr;                  // trailing bytes are 10xxxxxx
#           }
#           i += n;
#        }
#        else i++;
#        chars++;
#   }
#   return chars;
#
# Lerr:
#   throw new UtfError("invalid UTF-8 sequence", i);
# }

Nick