From: Simon Glass <sjg@chromium.org>
The Linux kernel's NLS (National Language Support) subsystem provides
character set conversion for filesystems with Unicode support.
Add minimal stubs for struct nls_table, load_nls(), unload_nls(), and
a basic utf16s_to_utf8s() implementation. These are needed by the isofs
Joliet extension for Unicode filename support.
Signed-off-by: Simon Glass <sjg@chromium.org>
---
include/charset.h | 20 ++++++++++++++++++
include/linux/nls.h | 50 +++++++++++++++++++++++++++++++++++++++++++++
lib/charset.c | 29 ++++++++++++++++++++++++++
3 files changed, 99 insertions(+)
create mode 100644 include/linux/nls.h
@@ -303,6 +303,26 @@ size_t u16_strlcat(u16 *dest, const u16 *src, size_t count);
*/
uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size);
+enum utf16_endian;
+
+/**
+ * utf16s_to_utf8s() - convert a UTF-16 string to UTF-8 with explicit endianness
+ *
+ * Linux NLS-compatible interface that wraps utf16_to_utf8(). Converts at
+ * most @inlen UTF-16 code units from @pwcs to UTF-8, stopping at a null
+ * character or when @maxout bytes have been written. Surrogate pairs are
+ * handled by the underlying utf16_to_utf8() implementation.
+ *
+ * @pwcs: source UTF-16 string
+ * @inlen: number of UTF-16 code units to convert
+ * @endian: byte order of the source string (UTF16_BIG_ENDIAN, etc.)
+ * @s: destination buffer for UTF-8 output
+ * @maxout: size of the destination buffer in bytes
+ * Return: number of bytes written to @s
+ */
+int utf16s_to_utf8s(const u16 *pwcs, int inlen, enum utf16_endian endian,
+ u8 *s, int maxout);
+
/**
* utf_to_cp() - translate Unicode code point to 8bit codepage
*
new file mode 100644
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Minimal NLS (National Language Support) stubs for U-Boot
+ *
+ * Based on the interface from Linux's include/linux/nls.h but heavily
+ * simplified: struct nls_table is trimmed to the fields used by isofs,
+ * load_nls() and unload_nls() are no-ops, and utf16s_to_utf8s() is a
+ * implementation wrapping utf16_to_utf8() in lib/charset.c
+ *
+ * Joliet support requires NLS for character set conversion. These stubs
+ * allow the code to compile without full NLS infrastructure.
+ */
+#ifndef _LINUX_NLS_H
+#define _LINUX_NLS_H
+
+#include <linux/types.h>
+
+#define NLS_MAX_CHARSET_SIZE 6
+
+/* UTF-16 byte order */
+enum utf16_endian {
+ UTF16_HOST_ENDIAN,
+ UTF16_LITTLE_ENDIAN,
+ UTF16_BIG_ENDIAN,
+};
+
+struct nls_table {
+ const char *charset;
+ int (*uni2char)(wchar_t uni, unsigned char *out, int boundlen);
+ int (*char2uni)(const unsigned char *rawstring, int boundlen,
+ wchar_t *uni);
+};
+
+static inline struct nls_table *load_nls(const char *charset)
+{
+ return NULL;
+}
+
+static inline struct nls_table *load_nls_default(void)
+{
+ return NULL;
+}
+
+static inline void unload_nls(struct nls_table *nls)
+{
+}
+
+#include <charset.h>
+
+#endif /* _LINUX_NLS_H */
@@ -10,6 +10,7 @@
#include <cp437.h>
#include <efi_loader.h>
#include <errno.h>
+#include <linux/nls.h>
#include <malloc.h>
/**
@@ -311,6 +312,34 @@ int utf16_utf8_strncpy(char **dst, const u16 *src, size_t count)
return 0;
}
+int utf16s_to_utf8s(const u16 *pwcs, int inlen, enum utf16_endian endian,
+ u8 *s, int maxout)
+{
+ u16 *tmp;
+ u8 *start = s;
+ int i;
+
+ tmp = malloc(inlen * sizeof(u16));
+ if (!tmp)
+ return 0;
+
+ for (i = 0; i < inlen; i++) {
+ if (endian == UTF16_BIG_ENDIAN)
+ tmp[i] = __be16_to_cpu(pwcs[i]);
+ else
+ tmp[i] = __le16_to_cpu(pwcs[i]);
+ if (!tmp[i]) {
+ inlen = i;
+ break;
+ }
+ }
+
+ s = utf16_to_utf8(s, tmp, inlen);
+ free(tmp);
+
+ return min((int)(s - start), maxout);
+}
+
s32 utf_to_lower(const s32 code)
{
struct capitalization_table *pos = capitalization_table;