summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--cpukit/libfs/Makefile.am1
-rw-r--r--cpukit/libfs/src/dosfs/dosfs.h17
-rw-r--r--cpukit/libfs/src/dosfs/msdos_conv_utf8.c308
-rw-r--r--cpukit/libfs/src/dosfs/msdos_misc.c24
4 files changed, 340 insertions, 10 deletions
diff --git a/cpukit/libfs/Makefile.am b/cpukit/libfs/Makefile.am
index e06c8bd381..58733f739e 100644
--- a/cpukit/libfs/Makefile.am
+++ b/cpukit/libfs/Makefile.am
@@ -82,6 +82,7 @@ libdosfs_a_SOURCES += src/dosfs/msdos_create.c src/dosfs/msdos_dir.c \
src/dosfs/msdos_mknod.c src/dosfs/msdos_node_type.c \
src/dosfs/msdos_rmnod.c src/dosfs/msdos_statvfs.c \
src/dosfs/msdos_conv_default.c \
+ src/dosfs/msdos_conv_utf8.c \
src/dosfs/msdos_conv.c src/dosfs/msdos.h src/dosfs/msdos_format.c \
src/dosfs/dosfs.h src/dosfs/msdos_rename.c
endif
diff --git a/cpukit/libfs/src/dosfs/dosfs.h b/cpukit/libfs/src/dosfs/dosfs.h
index f1c3d87d51..acfc143096 100644
--- a/cpukit/libfs/src/dosfs/dosfs.h
+++ b/cpukit/libfs/src/dosfs/dosfs.h
@@ -206,7 +206,8 @@ typedef struct {
/**
* @brief Converter implementation for new filesystem instance.
*
- * @see rtems_dosfs_create_default_converter().
+ * @see rtems_dosfs_create_default_converter() and
+ * rtems_dosfs_create_utf8_converter().
*/
rtems_dosfs_convert_control *converter;
} rtems_dosfs_mount_options;
@@ -221,6 +222,20 @@ typedef struct {
*/
rtems_dosfs_convert_control *rtems_dosfs_create_default_converter(void);
+/**
+ * @brief Allocates and initializes a UTF-8 converter.
+ *
+ * @param[in] codepage The iconv() identification string for the used codepage.
+ *
+ * @retval NULL Something failed.
+ * @retval other Pointer to initialized converter.
+ *
+ * @see rtems_dosfs_mount_options and mount().
+ */
+rtems_dosfs_convert_control *rtems_dosfs_create_utf8_converter(
+ const char *codepage
+);
+
#define MSDOS_FMT_INFO_LEVEL_NONE (0)
#define MSDOS_FMT_INFO_LEVEL_INFO (1)
#define MSDOS_FMT_INFO_LEVEL_DETAIL (2)
diff --git a/cpukit/libfs/src/dosfs/msdos_conv_utf8.c b/cpukit/libfs/src/dosfs/msdos_conv_utf8.c
new file mode 100644
index 0000000000..a80db7e09c
--- /dev/null
+++ b/cpukit/libfs/src/dosfs/msdos_conv_utf8.c
@@ -0,0 +1,308 @@
+/**
+ * @file
+ *
+ * @ingroup DOSFS
+ *
+ * @brief UTF-8 Converter
+ */
+
+/*
+ * Copyright (c) 2013 embedded brains GmbH. All rights reserved.
+ *
+ * embedded brains GmbH
+ * Dornierstr. 4
+ * 82178 Puchheim
+ * Germany
+ * <rtems@embedded-brains.de>
+ *
+ * The license and distribution terms for this file may be
+ * found in the file LICENSE in this distribution or at
+ * http://www.rtems.com/license/LICENSE.
+ */
+
+#include <stddef.h>
+#include <assert.h>
+#include <errno.h>
+#include <iconv.h>
+#include <rtems/dosfs.h>
+#include <utf8proc/utf8proc.h>
+#include "msdos.h"
+
+#define INVALID_ICONV_DESC ( (iconv_t) -1 )
+
+typedef struct {
+ /*
+ * This structure must be the first field, since otherwise the cast
+ * operations later in the file are invalid.
+ */
+ rtems_dosfs_convert_control super;
+
+ iconv_t desc_codepage_to_utf8;
+ iconv_t desc_utf8_to_codepage;
+ iconv_t desc_utf16_to_utf8;
+ iconv_t desc_utf8_to_utf16;
+ uint8_t buffer[MSDOS_NAME_MAX_UTF8_LFN_BYTES];
+} msdos_utf8_convert_control;
+
+static int msdos_utf8_convert_with_iconv(
+ iconv_t desc,
+ const void *src,
+ size_t src_size,
+ void *dst,
+ size_t *dst_size
+)
+{
+ int eno = 0;
+ size_t inbytes_left = src_size;
+ size_t outbytes_left = *dst_size;
+ char *inbuf = (void *) (uintptr_t) src;
+ char *outbuf = dst;
+ size_t iconv_status;
+
+ iconv_status = iconv(
+ desc,
+ &inbuf,
+ &inbytes_left,
+ &outbuf,
+ &outbytes_left
+ );
+
+ *dst_size -= outbytes_left;
+
+ if ( iconv_status > 0 ) {
+ eno = EINVAL;
+ } else if ( iconv_status < 0 ) {
+ eno = ENOMEM;
+ }
+
+ return eno;
+}
+
+static int msdos_utf8_codepage_to_utf8(
+ rtems_dosfs_convert_control *super,
+ const char *src,
+ size_t src_size,
+ uint8_t *dst,
+ size_t *dst_size
+)
+{
+ msdos_utf8_convert_control *self = (msdos_utf8_convert_control *) super;
+
+ return msdos_utf8_convert_with_iconv(
+ self->desc_codepage_to_utf8,
+ src,
+ src_size,
+ dst,
+ dst_size
+ );
+}
+
+static int msdos_utf8_utf8_to_codepage(
+ rtems_dosfs_convert_control *super,
+ const uint8_t *src,
+ size_t src_size,
+ char *dst,
+ size_t *dst_size
+)
+{
+ msdos_utf8_convert_control *self = (msdos_utf8_convert_control *) super;
+
+ return msdos_utf8_convert_with_iconv(
+ self->desc_utf8_to_codepage,
+ src,
+ src_size,
+ dst,
+ dst_size
+ );
+}
+
+static int msdos_utf8_utf16_to_utf8(
+ rtems_dosfs_convert_control *super,
+ const uint16_t *src,
+ size_t src_size,
+ uint8_t *dst,
+ size_t *dst_size
+)
+{
+ msdos_utf8_convert_control *self = (msdos_utf8_convert_control *) super;
+
+ return msdos_utf8_convert_with_iconv(
+ self->desc_utf16_to_utf8,
+ src,
+ src_size,
+ dst,
+ dst_size
+ );
+}
+
+static int msdos_utf8_utf8_to_utf16(
+ rtems_dosfs_convert_control *super,
+ const uint8_t *src,
+ size_t src_size,
+ uint16_t *dst,
+ size_t *dst_size
+)
+{
+ msdos_utf8_convert_control *self = (msdos_utf8_convert_control *) super;
+
+ return msdos_utf8_convert_with_iconv(
+ self->desc_utf8_to_utf16,
+ src,
+ src_size,
+ dst,
+ dst_size
+ );
+}
+
+static int msdos_utf8proc_errmsg_to_errno( ssize_t errcode )
+{
+ int eno = 0;
+
+
+ switch ( errcode ) {
+ case 0:
+ eno = 0;
+ break;
+ case UTF8PROC_ERROR_NOMEM:
+ eno = ENOMEM;
+ break;
+ case UTF8PROC_ERROR_OVERFLOW:
+ eno = EOVERFLOW;
+ break;
+ case UTF8PROC_ERROR_INVALIDUTF8:
+ eno = EINVAL;
+ break;
+ case UTF8PROC_ERROR_NOTASSIGNED:
+ eno = EINVAL;
+ break;
+ case UTF8PROC_ERROR_INVALIDOPTS:
+ eno = EINVAL;
+ break;
+ default:
+ eno = ENOENT;
+ break;
+ }
+
+ return eno;
+}
+
+static int msdos_utf8_normalize_and_fold(
+ rtems_dosfs_convert_control *super,
+ const uint8_t *src,
+ const size_t src_size,
+ uint8_t *dst,
+ size_t *dst_size
+)
+{
+ int eno = 0;
+ int32_t *unicode_buf = (int32_t *) dst;
+ ssize_t unicode_buf_size = *dst_size / sizeof( *unicode_buf );
+ ssize_t unicodes_to_reencode;
+ ssize_t result;
+
+ (void) super;
+
+ result = utf8proc_decompose(
+ src,
+ (ssize_t) src_size,
+ unicode_buf,
+ unicode_buf_size,
+ UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_CASEFOLD
+ );
+
+ if ( result >= 0 ) {
+ if ( result < unicode_buf_size ) {
+ unicodes_to_reencode = result;
+ } else {
+ unicodes_to_reencode = unicode_buf_size - 1;
+ eno = ENOMEM;
+ }
+
+ result = utf8proc_reencode(
+ unicode_buf,
+ unicodes_to_reencode,
+ UTF8PROC_STABLE | UTF8PROC_DECOMPOSE
+ );
+
+ if ( result >= 0 ) {
+ *dst_size = result;
+ } else {
+ eno = msdos_utf8proc_errmsg_to_errno( result );
+ }
+ } else {
+ eno = msdos_utf8proc_errmsg_to_errno( result );
+ }
+
+ return eno;
+}
+
+static void msdos_utf8_destroy(
+ rtems_dosfs_convert_control *super
+)
+{
+ msdos_utf8_convert_control *self = (msdos_utf8_convert_control *) super;
+ int rv;
+
+ if ( self->desc_utf16_to_utf8 != INVALID_ICONV_DESC ) {
+ rv = iconv_close( self->desc_utf16_to_utf8 );
+ assert( rv == 0 );
+ }
+
+ if ( self->desc_codepage_to_utf8 != INVALID_ICONV_DESC ) {
+ rv = iconv_close( self->desc_codepage_to_utf8 );
+ assert( rv == 0 );
+ }
+
+ if ( self->desc_utf8_to_codepage != INVALID_ICONV_DESC ) {
+ rv = iconv_close( self->desc_utf8_to_codepage );
+ assert( rv == 0 );
+ }
+
+ if ( self->desc_utf8_to_utf16 != INVALID_ICONV_DESC ) {
+ rv = iconv_close( self->desc_utf8_to_utf16 );
+ assert( rv == 0 );
+ }
+
+ free( self );
+}
+
+static const rtems_dosfs_convert_handler msdos_utf8_convert_handler = {
+ .utf8_to_codepage = msdos_utf8_utf8_to_codepage,
+ .codepage_to_utf8 = msdos_utf8_codepage_to_utf8,
+ .utf8_to_utf16 = msdos_utf8_utf8_to_utf16,
+ .utf16_to_utf8 = msdos_utf8_utf16_to_utf8,
+ .utf8_normalize_and_fold = msdos_utf8_normalize_and_fold,
+ .destroy = msdos_utf8_destroy
+};
+
+rtems_dosfs_convert_control *rtems_dosfs_create_utf8_converter(
+ const char *codepage
+)
+{
+ msdos_utf8_convert_control *self = malloc( sizeof( *self ) );
+
+ if ( self != NULL ) {
+ self->desc_codepage_to_utf8 = iconv_open( "UTF-8", codepage );
+ self->desc_utf8_to_codepage = iconv_open( codepage, "UTF-8" );
+ self->desc_utf16_to_utf8 = iconv_open( "UTF-8", "UTF-16LE" );
+ self->desc_utf8_to_utf16 = iconv_open( "UTF-16LE", "UTF-8" );
+
+ if (
+ self->desc_utf16_to_utf8 != INVALID_ICONV_DESC
+ && self->desc_utf8_to_codepage != INVALID_ICONV_DESC
+ && self->desc_codepage_to_utf8 != INVALID_ICONV_DESC
+ && self->desc_utf8_to_utf16 != INVALID_ICONV_DESC
+ ) {
+ rtems_dosfs_convert_control *super = &self->super;
+
+ super->handler = &msdos_utf8_convert_handler;
+ super->buffer.data = &self->buffer;
+ super->buffer.size = sizeof( self->buffer );
+ } else {
+ msdos_utf8_destroy( &self->super );
+ self = NULL;
+ }
+ }
+
+ return &self->super;
+}
diff --git a/cpukit/libfs/src/dosfs/msdos_misc.c b/cpukit/libfs/src/dosfs/msdos_misc.c
index 56b58c85f7..959768c286 100644
--- a/cpukit/libfs/src/dosfs/msdos_misc.c
+++ b/cpukit/libfs/src/dosfs/msdos_misc.c
@@ -42,6 +42,13 @@
#include <stdio.h>
+#define MSDOS_LFN_ENTRY_SIZE \
+ (MSDOS_LFN_LEN_PER_ENTRY * MSDOS_NAME_LFN_BYTES_PER_CHAR)
+
+#define MSDOS_LFN_ENTRY_SIZE_UTF8 \
+ ((MSDOS_LFN_LEN_PER_ENTRY + 1 ) * MSDOS_NAME_LFN_BYTES_PER_CHAR \
+ * MSDOS_NAME_MAX_UTF8_BYTES_PER_CHAR)
+
/*
* External strings. Saves space this way.
*/
@@ -1019,7 +1026,7 @@ msdos_get_utf16_string_from_long_entry (
{
ssize_t chars_in_entry;
- if (buf_size >= MSDOS_LFN_LEN_PER_ENTRY * MSDOS_NAME_LFN_BYTES_PER_CHAR) {
+ if (buf_size >= MSDOS_LFN_ENTRY_SIZE) {
memcpy (&entry_string_buf[0], &entry[1], 10 );
memcpy (&entry_string_buf[5], &entry[14], 12 );
memcpy (&entry_string_buf[11], &entry[28], 4 );
@@ -1195,7 +1202,7 @@ msdos_compare_entry_against_filename (
{
ssize_t size_remaining = filename_size_remaining;
int eno = 0;
- uint8_t entry_normalized[( MSDOS_LFN_LEN_PER_ENTRY + 1 ) * MSDOS_NAME_LFN_BYTES_PER_CHAR * MSDOS_NAME_MAX_UTF8_BYTES_PER_CHAR];
+ uint8_t entry_normalized[MSDOS_LFN_ENTRY_SIZE_UTF8];
size_t bytes_in_entry_normalized = sizeof ( entry_normalized );
eno = (*converter->handler->utf8_normalize_and_fold) (
@@ -1263,7 +1270,7 @@ msdos_find_file_in_directory (
bool empty_space_found = false;
uint32_t entries_per_block = bts2rd / MSDOS_DIRECTORY_ENTRY_STRUCT_SIZE;
int lfn_entry = 0;
- uint8_t entry_utf8_normalized[(MSDOS_LFN_LEN_PER_ENTRY + 1 ) * MSDOS_NAME_LFN_BYTES_PER_CHAR * MSDOS_NAME_MAX_UTF8_BYTES_PER_CHAR/*MSDOS_ENTRY_LFN_UTF8_BYTES*/];
+ uint8_t entry_utf8_normalized[MSDOS_LFN_ENTRY_SIZE_UTF8];
size_t bytes_in_entry;
bool filename_matched = false;
ssize_t filename_size_remaining = name_len_for_compare;
@@ -1800,7 +1807,7 @@ msdos_add_file (
*MSDOS_DIR_LFN_CHECKSUM(entry) = lfn_checksum;
p = entry + 1;
- n = name_converted + (fat_entries - lfn_entry) * MSDOS_LFN_LEN_PER_ENTRY * MSDOS_NAME_LFN_BYTES_PER_CHAR;
+ n = name_converted + (fat_entries - lfn_entry) * MSDOS_LFN_ENTRY_SIZE;
#if MSDOS_FIND_PRINT
printf ("MSFS:[11] ");
@@ -1919,8 +1926,8 @@ msdos_find_name_in_fat_file (
buffer,
buffer_size);
if (name_len_for_save > 0) {
- fat_entries = (name_len_for_save -1
- + (MSDOS_LFN_LEN_PER_ENTRY * MSDOS_NAME_LFN_BYTES_PER_CHAR)) / (MSDOS_LFN_LEN_PER_ENTRY * MSDOS_NAME_LFN_BYTES_PER_CHAR);
+ fat_entries = (name_len_for_save + MSDOS_LFN_ENTRY_SIZE - 1)
+ / MSDOS_LFN_ENTRY_SIZE;
name_len_for_compare = msdos_filename_utf8_to_long_name_for_compare (
converter,
name_utf8,
@@ -1984,9 +1991,8 @@ msdos_find_name_in_fat_file (
buffer,
buffer_size);
if (name_len_for_save > 0) {
- fat_entries = (name_len_for_save -1
- + (MSDOS_LFN_LEN_PER_ENTRY * MSDOS_NAME_LFN_BYTES_PER_CHAR)) / (MSDOS_LFN_LEN_PER_ENTRY * MSDOS_NAME_LFN_BYTES_PER_CHAR);
-
+ fat_entries = (name_len_for_save + MSDOS_LFN_ENTRY_SIZE - 1)
+ / MSDOS_LFN_ENTRY_SIZE;
}
else
retval = -1;