diff options
Diffstat (limited to 'cpukit/libfs')
-rw-r--r-- | cpukit/libfs/Makefile.am | 1 | ||||
-rw-r--r-- | cpukit/libfs/src/dosfs/dosfs.h | 17 | ||||
-rw-r--r-- | cpukit/libfs/src/dosfs/msdos_conv_utf8.c | 308 | ||||
-rw-r--r-- | cpukit/libfs/src/dosfs/msdos_misc.c | 24 |
4 files changed, 340 insertions, 10 deletions
diff --git a/cpukit/libfs/Makefile.am b/cpukit/libfs/Makefile.am index e06c8bd381..58733f739e 100644 --- a/cpukit/libfs/Makefile.am +++ b/cpukit/libfs/Makefile.am @@ -82,6 +82,7 @@ libdosfs_a_SOURCES += src/dosfs/msdos_create.c src/dosfs/msdos_dir.c \ src/dosfs/msdos_mknod.c src/dosfs/msdos_node_type.c \ src/dosfs/msdos_rmnod.c src/dosfs/msdos_statvfs.c \ src/dosfs/msdos_conv_default.c \ + src/dosfs/msdos_conv_utf8.c \ src/dosfs/msdos_conv.c src/dosfs/msdos.h src/dosfs/msdos_format.c \ src/dosfs/dosfs.h src/dosfs/msdos_rename.c endif diff --git a/cpukit/libfs/src/dosfs/dosfs.h b/cpukit/libfs/src/dosfs/dosfs.h index f1c3d87d51..acfc143096 100644 --- a/cpukit/libfs/src/dosfs/dosfs.h +++ b/cpukit/libfs/src/dosfs/dosfs.h @@ -206,7 +206,8 @@ typedef struct { /** * @brief Converter implementation for new filesystem instance. * - * @see rtems_dosfs_create_default_converter(). + * @see rtems_dosfs_create_default_converter() and + * rtems_dosfs_create_utf8_converter(). */ rtems_dosfs_convert_control *converter; } rtems_dosfs_mount_options; @@ -221,6 +222,20 @@ typedef struct { */ rtems_dosfs_convert_control *rtems_dosfs_create_default_converter(void); +/** + * @brief Allocates and initializes a UTF-8 converter. + * + * @param[in] codepage The iconv() identification string for the used codepage. + * + * @retval NULL Something failed. + * @retval other Pointer to initialized converter. + * + * @see rtems_dosfs_mount_options and mount(). + */ +rtems_dosfs_convert_control *rtems_dosfs_create_utf8_converter( + const char *codepage +); + #define MSDOS_FMT_INFO_LEVEL_NONE (0) #define MSDOS_FMT_INFO_LEVEL_INFO (1) #define MSDOS_FMT_INFO_LEVEL_DETAIL (2) diff --git a/cpukit/libfs/src/dosfs/msdos_conv_utf8.c b/cpukit/libfs/src/dosfs/msdos_conv_utf8.c new file mode 100644 index 0000000000..a80db7e09c --- /dev/null +++ b/cpukit/libfs/src/dosfs/msdos_conv_utf8.c @@ -0,0 +1,308 @@ +/** + * @file + * + * @ingroup DOSFS + * + * @brief UTF-8 Converter + */ + +/* + * Copyright (c) 2013 embedded brains GmbH. All rights reserved. + * + * embedded brains GmbH + * Dornierstr. 4 + * 82178 Puchheim + * Germany + * <rtems@embedded-brains.de> + * + * The license and distribution terms for this file may be + * found in the file LICENSE in this distribution or at + * http://www.rtems.com/license/LICENSE. + */ + +#include <stddef.h> +#include <assert.h> +#include <errno.h> +#include <iconv.h> +#include <rtems/dosfs.h> +#include <utf8proc/utf8proc.h> +#include "msdos.h" + +#define INVALID_ICONV_DESC ( (iconv_t) -1 ) + +typedef struct { + /* + * This structure must be the first field, since otherwise the cast + * operations later in the file are invalid. + */ + rtems_dosfs_convert_control super; + + iconv_t desc_codepage_to_utf8; + iconv_t desc_utf8_to_codepage; + iconv_t desc_utf16_to_utf8; + iconv_t desc_utf8_to_utf16; + uint8_t buffer[MSDOS_NAME_MAX_UTF8_LFN_BYTES]; +} msdos_utf8_convert_control; + +static int msdos_utf8_convert_with_iconv( + iconv_t desc, + const void *src, + size_t src_size, + void *dst, + size_t *dst_size +) +{ + int eno = 0; + size_t inbytes_left = src_size; + size_t outbytes_left = *dst_size; + char *inbuf = (void *) (uintptr_t) src; + char *outbuf = dst; + size_t iconv_status; + + iconv_status = iconv( + desc, + &inbuf, + &inbytes_left, + &outbuf, + &outbytes_left + ); + + *dst_size -= outbytes_left; + + if ( iconv_status > 0 ) { + eno = EINVAL; + } else if ( iconv_status < 0 ) { + eno = ENOMEM; + } + + return eno; +} + +static int msdos_utf8_codepage_to_utf8( + rtems_dosfs_convert_control *super, + const char *src, + size_t src_size, + uint8_t *dst, + size_t *dst_size +) +{ + msdos_utf8_convert_control *self = (msdos_utf8_convert_control *) super; + + return msdos_utf8_convert_with_iconv( + self->desc_codepage_to_utf8, + src, + src_size, + dst, + dst_size + ); +} + +static int msdos_utf8_utf8_to_codepage( + rtems_dosfs_convert_control *super, + const uint8_t *src, + size_t src_size, + char *dst, + size_t *dst_size +) +{ + msdos_utf8_convert_control *self = (msdos_utf8_convert_control *) super; + + return msdos_utf8_convert_with_iconv( + self->desc_utf8_to_codepage, + src, + src_size, + dst, + dst_size + ); +} + +static int msdos_utf8_utf16_to_utf8( + rtems_dosfs_convert_control *super, + const uint16_t *src, + size_t src_size, + uint8_t *dst, + size_t *dst_size +) +{ + msdos_utf8_convert_control *self = (msdos_utf8_convert_control *) super; + + return msdos_utf8_convert_with_iconv( + self->desc_utf16_to_utf8, + src, + src_size, + dst, + dst_size + ); +} + +static int msdos_utf8_utf8_to_utf16( + rtems_dosfs_convert_control *super, + const uint8_t *src, + size_t src_size, + uint16_t *dst, + size_t *dst_size +) +{ + msdos_utf8_convert_control *self = (msdos_utf8_convert_control *) super; + + return msdos_utf8_convert_with_iconv( + self->desc_utf8_to_utf16, + src, + src_size, + dst, + dst_size + ); +} + +static int msdos_utf8proc_errmsg_to_errno( ssize_t errcode ) +{ + int eno = 0; + + + switch ( errcode ) { + case 0: + eno = 0; + break; + case UTF8PROC_ERROR_NOMEM: + eno = ENOMEM; + break; + case UTF8PROC_ERROR_OVERFLOW: + eno = EOVERFLOW; + break; + case UTF8PROC_ERROR_INVALIDUTF8: + eno = EINVAL; + break; + case UTF8PROC_ERROR_NOTASSIGNED: + eno = EINVAL; + break; + case UTF8PROC_ERROR_INVALIDOPTS: + eno = EINVAL; + break; + default: + eno = ENOENT; + break; + } + + return eno; +} + +static int msdos_utf8_normalize_and_fold( + rtems_dosfs_convert_control *super, + const uint8_t *src, + const size_t src_size, + uint8_t *dst, + size_t *dst_size +) +{ + int eno = 0; + int32_t *unicode_buf = (int32_t *) dst; + ssize_t unicode_buf_size = *dst_size / sizeof( *unicode_buf ); + ssize_t unicodes_to_reencode; + ssize_t result; + + (void) super; + + result = utf8proc_decompose( + src, + (ssize_t) src_size, + unicode_buf, + unicode_buf_size, + UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_CASEFOLD + ); + + if ( result >= 0 ) { + if ( result < unicode_buf_size ) { + unicodes_to_reencode = result; + } else { + unicodes_to_reencode = unicode_buf_size - 1; + eno = ENOMEM; + } + + result = utf8proc_reencode( + unicode_buf, + unicodes_to_reencode, + UTF8PROC_STABLE | UTF8PROC_DECOMPOSE + ); + + if ( result >= 0 ) { + *dst_size = result; + } else { + eno = msdos_utf8proc_errmsg_to_errno( result ); + } + } else { + eno = msdos_utf8proc_errmsg_to_errno( result ); + } + + return eno; +} + +static void msdos_utf8_destroy( + rtems_dosfs_convert_control *super +) +{ + msdos_utf8_convert_control *self = (msdos_utf8_convert_control *) super; + int rv; + + if ( self->desc_utf16_to_utf8 != INVALID_ICONV_DESC ) { + rv = iconv_close( self->desc_utf16_to_utf8 ); + assert( rv == 0 ); + } + + if ( self->desc_codepage_to_utf8 != INVALID_ICONV_DESC ) { + rv = iconv_close( self->desc_codepage_to_utf8 ); + assert( rv == 0 ); + } + + if ( self->desc_utf8_to_codepage != INVALID_ICONV_DESC ) { + rv = iconv_close( self->desc_utf8_to_codepage ); + assert( rv == 0 ); + } + + if ( self->desc_utf8_to_utf16 != INVALID_ICONV_DESC ) { + rv = iconv_close( self->desc_utf8_to_utf16 ); + assert( rv == 0 ); + } + + free( self ); +} + +static const rtems_dosfs_convert_handler msdos_utf8_convert_handler = { + .utf8_to_codepage = msdos_utf8_utf8_to_codepage, + .codepage_to_utf8 = msdos_utf8_codepage_to_utf8, + .utf8_to_utf16 = msdos_utf8_utf8_to_utf16, + .utf16_to_utf8 = msdos_utf8_utf16_to_utf8, + .utf8_normalize_and_fold = msdos_utf8_normalize_and_fold, + .destroy = msdos_utf8_destroy +}; + +rtems_dosfs_convert_control *rtems_dosfs_create_utf8_converter( + const char *codepage +) +{ + msdos_utf8_convert_control *self = malloc( sizeof( *self ) ); + + if ( self != NULL ) { + self->desc_codepage_to_utf8 = iconv_open( "UTF-8", codepage ); + self->desc_utf8_to_codepage = iconv_open( codepage, "UTF-8" ); + self->desc_utf16_to_utf8 = iconv_open( "UTF-8", "UTF-16LE" ); + self->desc_utf8_to_utf16 = iconv_open( "UTF-16LE", "UTF-8" ); + + if ( + self->desc_utf16_to_utf8 != INVALID_ICONV_DESC + && self->desc_utf8_to_codepage != INVALID_ICONV_DESC + && self->desc_codepage_to_utf8 != INVALID_ICONV_DESC + && self->desc_utf8_to_utf16 != INVALID_ICONV_DESC + ) { + rtems_dosfs_convert_control *super = &self->super; + + super->handler = &msdos_utf8_convert_handler; + super->buffer.data = &self->buffer; + super->buffer.size = sizeof( self->buffer ); + } else { + msdos_utf8_destroy( &self->super ); + self = NULL; + } + } + + return &self->super; +} diff --git a/cpukit/libfs/src/dosfs/msdos_misc.c b/cpukit/libfs/src/dosfs/msdos_misc.c index 56b58c85f7..959768c286 100644 --- a/cpukit/libfs/src/dosfs/msdos_misc.c +++ b/cpukit/libfs/src/dosfs/msdos_misc.c @@ -42,6 +42,13 @@ #include <stdio.h> +#define MSDOS_LFN_ENTRY_SIZE \ + (MSDOS_LFN_LEN_PER_ENTRY * MSDOS_NAME_LFN_BYTES_PER_CHAR) + +#define MSDOS_LFN_ENTRY_SIZE_UTF8 \ + ((MSDOS_LFN_LEN_PER_ENTRY + 1 ) * MSDOS_NAME_LFN_BYTES_PER_CHAR \ + * MSDOS_NAME_MAX_UTF8_BYTES_PER_CHAR) + /* * External strings. Saves space this way. */ @@ -1019,7 +1026,7 @@ msdos_get_utf16_string_from_long_entry ( { ssize_t chars_in_entry; - if (buf_size >= MSDOS_LFN_LEN_PER_ENTRY * MSDOS_NAME_LFN_BYTES_PER_CHAR) { + if (buf_size >= MSDOS_LFN_ENTRY_SIZE) { memcpy (&entry_string_buf[0], &entry[1], 10 ); memcpy (&entry_string_buf[5], &entry[14], 12 ); memcpy (&entry_string_buf[11], &entry[28], 4 ); @@ -1195,7 +1202,7 @@ msdos_compare_entry_against_filename ( { ssize_t size_remaining = filename_size_remaining; int eno = 0; - uint8_t entry_normalized[( MSDOS_LFN_LEN_PER_ENTRY + 1 ) * MSDOS_NAME_LFN_BYTES_PER_CHAR * MSDOS_NAME_MAX_UTF8_BYTES_PER_CHAR]; + uint8_t entry_normalized[MSDOS_LFN_ENTRY_SIZE_UTF8]; size_t bytes_in_entry_normalized = sizeof ( entry_normalized ); eno = (*converter->handler->utf8_normalize_and_fold) ( @@ -1263,7 +1270,7 @@ msdos_find_file_in_directory ( bool empty_space_found = false; uint32_t entries_per_block = bts2rd / MSDOS_DIRECTORY_ENTRY_STRUCT_SIZE; int lfn_entry = 0; - uint8_t entry_utf8_normalized[(MSDOS_LFN_LEN_PER_ENTRY + 1 ) * MSDOS_NAME_LFN_BYTES_PER_CHAR * MSDOS_NAME_MAX_UTF8_BYTES_PER_CHAR/*MSDOS_ENTRY_LFN_UTF8_BYTES*/]; + uint8_t entry_utf8_normalized[MSDOS_LFN_ENTRY_SIZE_UTF8]; size_t bytes_in_entry; bool filename_matched = false; ssize_t filename_size_remaining = name_len_for_compare; @@ -1800,7 +1807,7 @@ msdos_add_file ( *MSDOS_DIR_LFN_CHECKSUM(entry) = lfn_checksum; p = entry + 1; - n = name_converted + (fat_entries - lfn_entry) * MSDOS_LFN_LEN_PER_ENTRY * MSDOS_NAME_LFN_BYTES_PER_CHAR; + n = name_converted + (fat_entries - lfn_entry) * MSDOS_LFN_ENTRY_SIZE; #if MSDOS_FIND_PRINT printf ("MSFS:[11] "); @@ -1919,8 +1926,8 @@ msdos_find_name_in_fat_file ( buffer, buffer_size); if (name_len_for_save > 0) { - fat_entries = (name_len_for_save -1 - + (MSDOS_LFN_LEN_PER_ENTRY * MSDOS_NAME_LFN_BYTES_PER_CHAR)) / (MSDOS_LFN_LEN_PER_ENTRY * MSDOS_NAME_LFN_BYTES_PER_CHAR); + fat_entries = (name_len_for_save + MSDOS_LFN_ENTRY_SIZE - 1) + / MSDOS_LFN_ENTRY_SIZE; name_len_for_compare = msdos_filename_utf8_to_long_name_for_compare ( converter, name_utf8, @@ -1984,9 +1991,8 @@ msdos_find_name_in_fat_file ( buffer, buffer_size); if (name_len_for_save > 0) { - fat_entries = (name_len_for_save -1 - + (MSDOS_LFN_LEN_PER_ENTRY * MSDOS_NAME_LFN_BYTES_PER_CHAR)) / (MSDOS_LFN_LEN_PER_ENTRY * MSDOS_NAME_LFN_BYTES_PER_CHAR); - + fat_entries = (name_len_for_save + MSDOS_LFN_ENTRY_SIZE - 1) + / MSDOS_LFN_ENTRY_SIZE; } else retval = -1; |