From 0720a06a7518c9d0c0125bd5d1f3b6264c55c3dd Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 17 Nov 2011 16:42:19 -0500 Subject: NLS: improve UTF8 -> UTF16 string conversion routine The utf8s_to_utf16s conversion routine needs to be improved. Unlike its utf16s_to_utf8s sibling, it doesn't accept arguments specifying the maximum length of the output buffer or the endianness of its 16-bit output. This patch (as1501) adds the two missing arguments, and adjusts the only two places in the kernel where the function is called. A follow-on patch will add a third caller that does utilize the new capabilities. The two conversion routines are still annoyingly inconsistent in the way they handle invalid byte combinations. But that's a subject for a different patch. Signed-off-by: Alan Stern CC: Clemens Ladisch Signed-off-by: Greg Kroah-Hartman --- fs/fat/namei_vfat.c | 3 ++- fs/nls/nls_base.c | 43 +++++++++++++++++++++++++++++++++---------- 2 files changed, 35 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index a87a65663c2..c25cf151b84 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -512,7 +512,8 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname, int charlen; if (utf8) { - *outlen = utf8s_to_utf16s(name, len, (wchar_t *)outname); + *outlen = utf8s_to_utf16s(name, len, UTF16_HOST_ENDIAN, + (wchar_t *) outname, FAT_LFN_LEN + 2); if (*outlen < 0) return *outlen; else if (*outlen > FAT_LFN_LEN) diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index 44a88a9fa2c..0eb059ec6f2 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c @@ -114,34 +114,57 @@ int utf32_to_utf8(unicode_t u, u8 *s, int maxlen) } EXPORT_SYMBOL(utf32_to_utf8); -int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs) +static inline void put_utf16(wchar_t *s, unsigned c, enum utf16_endian endian) +{ + switch (endian) { + default: + *s = (wchar_t) c; + break; + case UTF16_LITTLE_ENDIAN: + *s = __cpu_to_le16(c); + break; + case UTF16_BIG_ENDIAN: + *s = __cpu_to_be16(c); + break; + } +} + +int utf8s_to_utf16s(const u8 *s, int len, enum utf16_endian endian, + wchar_t *pwcs, int maxlen) { u16 *op; int size; unicode_t u; op = pwcs; - while (*s && len > 0) { + while (len > 0 && maxlen > 0 && *s) { if (*s & 0x80) { size = utf8_to_utf32(s, len, &u); if (size < 0) return -EINVAL; + s += size; + len -= size; if (u >= PLANE_SIZE) { + if (maxlen < 2) + break; u -= PLANE_SIZE; - *op++ = (wchar_t) (SURROGATE_PAIR | - ((u >> 10) & SURROGATE_BITS)); - *op++ = (wchar_t) (SURROGATE_PAIR | + put_utf16(op++, SURROGATE_PAIR | + ((u >> 10) & SURROGATE_BITS), + endian); + put_utf16(op++, SURROGATE_PAIR | SURROGATE_LOW | - (u & SURROGATE_BITS)); + (u & SURROGATE_BITS), + endian); + maxlen -= 2; } else { - *op++ = (wchar_t) u; + put_utf16(op++, u, endian); + maxlen--; } - s += size; - len -= size; } else { - *op++ = *s++; + put_utf16(op++, *s++, endian); len--; + maxlen--; } } return op - pwcs; -- cgit v1.2.3 From 045ddc8991698a8e9c5668c6190faa8b5d516dc0 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 21 Nov 2011 10:15:13 -0500 Subject: NLS: raname "maxlen" to "maxout" in UTF conversion routines As requested by NamJae Jeon, this patch (as1503) changes the name of the "maxlen" parameters to "maxout" in the various UTF conversion routines. This should make the role of that parameter more clear. The patch also renames the "len" parameters to "inlen", for the same reason. Signed-off-by: Alan Stern Reviewed-by: NamJae Jeon Signed-off-by: Greg Kroah-Hartman --- fs/nls/nls_base.c | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index 0eb059ec6f2..fea6bd5831d 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c @@ -52,7 +52,7 @@ static const struct utf8_table utf8_table[] = #define SURROGATE_LOW 0x00000400 #define SURROGATE_BITS 0x000003ff -int utf8_to_utf32(const u8 *s, int len, unicode_t *pu) +int utf8_to_utf32(const u8 *s, int inlen, unicode_t *pu) { unsigned long l; int c0, c, nc; @@ -71,7 +71,7 @@ int utf8_to_utf32(const u8 *s, int len, unicode_t *pu) *pu = (unicode_t) l; return nc; } - if (len <= nc) + if (inlen <= nc) return -1; s++; c = (*s ^ 0x80) & 0xFF; @@ -83,7 +83,7 @@ int utf8_to_utf32(const u8 *s, int len, unicode_t *pu) } EXPORT_SYMBOL(utf8_to_utf32); -int utf32_to_utf8(unicode_t u, u8 *s, int maxlen) +int utf32_to_utf8(unicode_t u, u8 *s, int maxout) { unsigned long l; int c, nc; @@ -97,7 +97,7 @@ int utf32_to_utf8(unicode_t u, u8 *s, int maxlen) return -1; nc = 0; - for (t = utf8_table; t->cmask && maxlen; t++, maxlen--) { + for (t = utf8_table; t->cmask && maxout; t++, maxout--) { nc++; if (l <= t->lmask) { c = t->shift; @@ -129,24 +129,24 @@ static inline void put_utf16(wchar_t *s, unsigned c, enum utf16_endian endian) } } -int utf8s_to_utf16s(const u8 *s, int len, enum utf16_endian endian, - wchar_t *pwcs, int maxlen) +int utf8s_to_utf16s(const u8 *s, int inlen, enum utf16_endian endian, + wchar_t *pwcs, int maxout) { u16 *op; int size; unicode_t u; op = pwcs; - while (len > 0 && maxlen > 0 && *s) { + while (inlen > 0 && maxout > 0 && *s) { if (*s & 0x80) { - size = utf8_to_utf32(s, len, &u); + size = utf8_to_utf32(s, inlen, &u); if (size < 0) return -EINVAL; s += size; - len -= size; + inlen -= size; if (u >= PLANE_SIZE) { - if (maxlen < 2) + if (maxout < 2) break; u -= PLANE_SIZE; put_utf16(op++, SURROGATE_PAIR | @@ -156,15 +156,15 @@ int utf8s_to_utf16s(const u8 *s, int len, enum utf16_endian endian, SURROGATE_LOW | (u & SURROGATE_BITS), endian); - maxlen -= 2; + maxout -= 2; } else { put_utf16(op++, u, endian); - maxlen--; + maxout--; } } else { put_utf16(op++, *s++, endian); - len--; - maxlen--; + inlen--; + maxout--; } } return op - pwcs; @@ -183,27 +183,27 @@ static inline unsigned long get_utf16(unsigned c, enum utf16_endian endian) } } -int utf16s_to_utf8s(const wchar_t *pwcs, int len, enum utf16_endian endian, - u8 *s, int maxlen) +int utf16s_to_utf8s(const wchar_t *pwcs, int inlen, enum utf16_endian endian, + u8 *s, int maxout) { u8 *op; int size; unsigned long u, v; op = s; - while (len > 0 && maxlen > 0) { + while (inlen > 0 && maxout > 0) { u = get_utf16(*pwcs, endian); if (!u) break; pwcs++; - len--; + inlen--; if (u > 0x7f) { if ((u & SURROGATE_MASK) == SURROGATE_PAIR) { if (u & SURROGATE_LOW) { /* Ignore character and move on */ continue; } - if (len <= 0) + if (inlen <= 0) break; v = get_utf16(*pwcs, endian); if ((v & SURROGATE_MASK) != SURROGATE_PAIR || @@ -214,18 +214,18 @@ int utf16s_to_utf8s(const wchar_t *pwcs, int len, enum utf16_endian endian, u = PLANE_SIZE + ((u & SURROGATE_BITS) << 10) + (v & SURROGATE_BITS); pwcs++; - len--; + inlen--; } - size = utf32_to_utf8(u, op, maxlen); + size = utf32_to_utf8(u, op, maxout); if (size == -1) { /* Ignore character and move on */ } else { op += size; - maxlen -= size; + maxout -= size; } } else { *op++ = (u8) u; - maxlen--; + maxout--; } } return op - s; -- cgit v1.2.3