diff options
Diffstat (limited to 'cpukit/libfs/src/dosfs/msdos_conv_utf8.c')
-rw-r--r-- | cpukit/libfs/src/dosfs/msdos_conv_utf8.c | 308 |
1 files changed, 308 insertions, 0 deletions
diff --git a/cpukit/libfs/src/dosfs/msdos_conv_utf8.c b/cpukit/libfs/src/dosfs/msdos_conv_utf8.c new file mode 100644 index 0000000000..a80db7e09c --- /dev/null +++ b/cpukit/libfs/src/dosfs/msdos_conv_utf8.c @@ -0,0 +1,308 @@ +/** + * @file + * + * @ingroup DOSFS + * + * @brief UTF-8 Converter + */ + +/* + * Copyright (c) 2013 embedded brains GmbH. All rights reserved. + * + * embedded brains GmbH + * Dornierstr. 4 + * 82178 Puchheim + * Germany + * <rtems@embedded-brains.de> + * + * The license and distribution terms for this file may be + * found in the file LICENSE in this distribution or at + * http://www.rtems.com/license/LICENSE. + */ + +#include <stddef.h> +#include <assert.h> +#include <errno.h> +#include <iconv.h> +#include <rtems/dosfs.h> +#include <utf8proc/utf8proc.h> +#include "msdos.h" + +#define INVALID_ICONV_DESC ( (iconv_t) -1 ) + +typedef struct { + /* + * This structure must be the first field, since otherwise the cast + * operations later in the file are invalid. + */ + rtems_dosfs_convert_control super; + + iconv_t desc_codepage_to_utf8; + iconv_t desc_utf8_to_codepage; + iconv_t desc_utf16_to_utf8; + iconv_t desc_utf8_to_utf16; + uint8_t buffer[MSDOS_NAME_MAX_UTF8_LFN_BYTES]; +} msdos_utf8_convert_control; + +static int msdos_utf8_convert_with_iconv( + iconv_t desc, + const void *src, + size_t src_size, + void *dst, + size_t *dst_size +) +{ + int eno = 0; + size_t inbytes_left = src_size; + size_t outbytes_left = *dst_size; + char *inbuf = (void *) (uintptr_t) src; + char *outbuf = dst; + size_t iconv_status; + + iconv_status = iconv( + desc, + &inbuf, + &inbytes_left, + &outbuf, + &outbytes_left + ); + + *dst_size -= outbytes_left; + + if ( iconv_status > 0 ) { + eno = EINVAL; + } else if ( iconv_status < 0 ) { + eno = ENOMEM; + } + + return eno; +} + +static int msdos_utf8_codepage_to_utf8( + rtems_dosfs_convert_control *super, + const char *src, + size_t src_size, + uint8_t *dst, + size_t *dst_size +) +{ + msdos_utf8_convert_control *self = (msdos_utf8_convert_control *) super; + + return msdos_utf8_convert_with_iconv( + self->desc_codepage_to_utf8, + src, + src_size, + dst, + dst_size + ); +} + +static int msdos_utf8_utf8_to_codepage( + rtems_dosfs_convert_control *super, + const uint8_t *src, + size_t src_size, + char *dst, + size_t *dst_size +) +{ + msdos_utf8_convert_control *self = (msdos_utf8_convert_control *) super; + + return msdos_utf8_convert_with_iconv( + self->desc_utf8_to_codepage, + src, + src_size, + dst, + dst_size + ); +} + +static int msdos_utf8_utf16_to_utf8( + rtems_dosfs_convert_control *super, + const uint16_t *src, + size_t src_size, + uint8_t *dst, + size_t *dst_size +) +{ + msdos_utf8_convert_control *self = (msdos_utf8_convert_control *) super; + + return msdos_utf8_convert_with_iconv( + self->desc_utf16_to_utf8, + src, + src_size, + dst, + dst_size + ); +} + +static int msdos_utf8_utf8_to_utf16( + rtems_dosfs_convert_control *super, + const uint8_t *src, + size_t src_size, + uint16_t *dst, + size_t *dst_size +) +{ + msdos_utf8_convert_control *self = (msdos_utf8_convert_control *) super; + + return msdos_utf8_convert_with_iconv( + self->desc_utf8_to_utf16, + src, + src_size, + dst, + dst_size + ); +} + +static int msdos_utf8proc_errmsg_to_errno( ssize_t errcode ) +{ + int eno = 0; + + + switch ( errcode ) { + case 0: + eno = 0; + break; + case UTF8PROC_ERROR_NOMEM: + eno = ENOMEM; + break; + case UTF8PROC_ERROR_OVERFLOW: + eno = EOVERFLOW; + break; + case UTF8PROC_ERROR_INVALIDUTF8: + eno = EINVAL; + break; + case UTF8PROC_ERROR_NOTASSIGNED: + eno = EINVAL; + break; + case UTF8PROC_ERROR_INVALIDOPTS: + eno = EINVAL; + break; + default: + eno = ENOENT; + break; + } + + return eno; +} + +static int msdos_utf8_normalize_and_fold( + rtems_dosfs_convert_control *super, + const uint8_t *src, + const size_t src_size, + uint8_t *dst, + size_t *dst_size +) +{ + int eno = 0; + int32_t *unicode_buf = (int32_t *) dst; + ssize_t unicode_buf_size = *dst_size / sizeof( *unicode_buf ); + ssize_t unicodes_to_reencode; + ssize_t result; + + (void) super; + + result = utf8proc_decompose( + src, + (ssize_t) src_size, + unicode_buf, + unicode_buf_size, + UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_CASEFOLD + ); + + if ( result >= 0 ) { + if ( result < unicode_buf_size ) { + unicodes_to_reencode = result; + } else { + unicodes_to_reencode = unicode_buf_size - 1; + eno = ENOMEM; + } + + result = utf8proc_reencode( + unicode_buf, + unicodes_to_reencode, + UTF8PROC_STABLE | UTF8PROC_DECOMPOSE + ); + + if ( result >= 0 ) { + *dst_size = result; + } else { + eno = msdos_utf8proc_errmsg_to_errno( result ); + } + } else { + eno = msdos_utf8proc_errmsg_to_errno( result ); + } + + return eno; +} + +static void msdos_utf8_destroy( + rtems_dosfs_convert_control *super +) +{ + msdos_utf8_convert_control *self = (msdos_utf8_convert_control *) super; + int rv; + + if ( self->desc_utf16_to_utf8 != INVALID_ICONV_DESC ) { + rv = iconv_close( self->desc_utf16_to_utf8 ); + assert( rv == 0 ); + } + + if ( self->desc_codepage_to_utf8 != INVALID_ICONV_DESC ) { + rv = iconv_close( self->desc_codepage_to_utf8 ); + assert( rv == 0 ); + } + + if ( self->desc_utf8_to_codepage != INVALID_ICONV_DESC ) { + rv = iconv_close( self->desc_utf8_to_codepage ); + assert( rv == 0 ); + } + + if ( self->desc_utf8_to_utf16 != INVALID_ICONV_DESC ) { + rv = iconv_close( self->desc_utf8_to_utf16 ); + assert( rv == 0 ); + } + + free( self ); +} + +static const rtems_dosfs_convert_handler msdos_utf8_convert_handler = { + .utf8_to_codepage = msdos_utf8_utf8_to_codepage, + .codepage_to_utf8 = msdos_utf8_codepage_to_utf8, + .utf8_to_utf16 = msdos_utf8_utf8_to_utf16, + .utf16_to_utf8 = msdos_utf8_utf16_to_utf8, + .utf8_normalize_and_fold = msdos_utf8_normalize_and_fold, + .destroy = msdos_utf8_destroy +}; + +rtems_dosfs_convert_control *rtems_dosfs_create_utf8_converter( + const char *codepage +) +{ + msdos_utf8_convert_control *self = malloc( sizeof( *self ) ); + + if ( self != NULL ) { + self->desc_codepage_to_utf8 = iconv_open( "UTF-8", codepage ); + self->desc_utf8_to_codepage = iconv_open( codepage, "UTF-8" ); + self->desc_utf16_to_utf8 = iconv_open( "UTF-8", "UTF-16LE" ); + self->desc_utf8_to_utf16 = iconv_open( "UTF-16LE", "UTF-8" ); + + if ( + self->desc_utf16_to_utf8 != INVALID_ICONV_DESC + && self->desc_utf8_to_codepage != INVALID_ICONV_DESC + && self->desc_codepage_to_utf8 != INVALID_ICONV_DESC + && self->desc_utf8_to_utf16 != INVALID_ICONV_DESC + ) { + rtems_dosfs_convert_control *super = &self->super; + + super->handler = &msdos_utf8_convert_handler; + super->buffer.data = &self->buffer; + super->buffer.size = sizeof( self->buffer ); + } else { + msdos_utf8_destroy( &self->super ); + self = NULL; + } + } + + return &self->super; +} |