/**
* @file
*
* @ingroup DOSFS
*
* @brief UTF-8 Converter
*/
/*
* Copyright (c) 2013 embedded brains GmbH. All rights reserved.
*
* The license and distribution terms for this file may be
* found in the file LICENSE in this distribution or at
* http://www.rtems.org/license/LICENSE.
*/
#include <stddef.h>
#include <assert.h>
#include <errno.h>
#include <iconv.h>
#include <rtems/dosfs.h>
#include <utf8proc/utf8proc.h>
#include "msdos.h"
#define INVALID_ICONV_DESC ( (iconv_t) -1 )
typedef struct {
/*
* This structure must be the first field, since otherwise the cast
* operations later in the file are invalid.
*/
rtems_dosfs_convert_control super;
iconv_t desc_codepage_to_utf8;
iconv_t desc_utf8_to_codepage;
iconv_t desc_utf16_to_utf8;
iconv_t desc_utf8_to_utf16;
uint8_t buffer[MSDOS_NAME_MAX_UTF8_LFN_BYTES];
} msdos_utf8_convert_control;
static int msdos_utf8_convert_with_iconv(
iconv_t desc,
const void *src,
size_t src_size,
void *dst,
size_t *dst_size
)
{
int eno;
size_t inbytes_left = src_size;
size_t outbytes_left = *dst_size;
char *inbuf = (void *) (uintptr_t) src;
char *outbuf = dst;
size_t iconv_status;
iconv_status = iconv(
desc,
&inbuf,
&inbytes_left,
&outbuf,
&outbytes_left
);
*dst_size -= outbytes_left;
if ( iconv_status == 0 ) {
eno = 0;
} else if ( iconv_status == (size_t) -1 ) {
/*
* iconv() has detected an error. The most likely reason seems to be a too
* small outbuf.
*/
eno = ENOMEM;
} else {
/*
* The iconv_status contains the number of characters converted in a
* non-reversible way. We want to use reversible conversions only.
* Characters permitted within DOSFS names seem to be reversible.
*/
eno = EINVAL;
}
return eno;
}
static int msdos_utf8_codepage_to_utf8(
rtems_dosfs_convert_control *super,
const char *src,
size_t src_size,
uint8_t *dst,
size_t *dst_size
)
{
msdos_utf8_convert_control *self = (msdos_utf8_convert_control *) super;
return msdos_utf8_convert_with_iconv(
self->desc_codepage_to_utf8,
src,
src_size,
dst,
dst_size
);
}
static int msdos_utf8_utf8_to_codepage(
rtems_dosfs_convert_control *super,
const uint8_t *src,
size_t src_size,
char *dst,
size_t *dst_size
)
{
msdos_utf8_convert_control *self = (msdos_utf8_convert_control *) super;
return msdos_utf8_convert_with_iconv(
self->desc_utf8_to_codepage,
src,
src_size,
dst,
dst_size
);
}
static int msdos_utf8_utf16_to_utf8(
rtems_dosfs_convert_control *super,
const uint16_t *src,
size_t src_size,
uint8_t *dst,
size_t *dst_size
)
{
msdos_utf8_convert_control *self = (msdos_utf8_convert_control *) super;
return msdos_utf8_convert_with_iconv(
self->desc_utf16_to_utf8,
src,
src_size,
dst,
dst_size
);
}
static int msdos_utf8_utf8_to_utf16(
rtems_dosfs_convert_control *super,
const uint8_t *src,
size_t src_size,
uint16_t *dst,
size_t *dst_size
)
{
msdos_utf8_convert_control *self = (msdos_utf8_convert_control *) super;
return msdos_utf8_convert_with_iconv(
self->desc_utf8_to_utf16,
src,
src_size,
dst,
dst_size
);
}
static int msdos_utf8proc_errmsg_to_errno( ssize_t errcode )
{
int eno = 0;
switch ( errcode ) {
case 0:
eno = 0;
break;
case UTF8PROC_ERROR_NOMEM:
eno = ENOMEM;
break;
case UTF8PROC_ERROR_OVERFLOW:
eno = EOVERFLOW;
break;
case UTF8PROC_ERROR_INVALIDUTF8:
eno = EINVAL;
break;
case UTF8PROC_ERROR_NOTASSIGNED:
eno = EINVAL;
break;
case UTF8PROC_ERROR_INVALIDOPTS:
eno = EINVAL;
break;
default:
eno = ENOENT;
break;
}
return eno;
}
static int msdos_utf8_normalize_and_fold(
rtems_dosfs_convert_control *super,
const uint8_t *src,
const size_t src_size,
uint8_t *dst,
size_t *dst_size
)
{
int eno = 0;
int32_t *unicode_buf = (int32_t *) dst;
ssize_t unicode_buf_size = *dst_size / sizeof( *unicode_buf );
ssize_t unicodes_to_reencode;
ssize_t result;
(void) super;
result = utf8proc_decompose(
src,
(ssize_t) src_size,
unicode_buf,
unicode_buf_size,
UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_CASEFOLD
);
if ( result >= 0 ) {
if ( result <= unicode_buf_size ) {
unicodes_to_reencode = result;
} else {
unicodes_to_reencode = unicode_buf_size;
eno = ENOMEM;
}
result = utf8proc_reencode(
unicode_buf,
unicodes_to_reencode,
UTF8PROC_STABLE | UTF8PROC_DECOMPOSE
);
if ( result >= 0 ) {
*dst_size = result;
} else {
eno = msdos_utf8proc_errmsg_to_errno( result );
}
} else {
eno = msdos_utf8proc_errmsg_to_errno( result );
}
return eno;
}
static void msdos_utf8_destroy(
rtems_dosfs_convert_control *super
)
{
msdos_utf8_convert_control *self = (msdos_utf8_convert_control *) super;
int rv;
if ( self->desc_utf16_to_utf8 != INVALID_ICONV_DESC ) {
rv = iconv_close( self->desc_utf16_to_utf8 );
assert( rv == 0 );
}
if ( self->desc_codepage_to_utf8 != INVALID_ICONV_DESC ) {
rv = iconv_close( self->desc_codepage_to_utf8 );
assert( rv == 0 );
}
if ( self->desc_utf8_to_codepage != INVALID_ICONV_DESC ) {
rv = iconv_close( self->desc_utf8_to_codepage );
assert( rv == 0 );
}
if ( self->desc_utf8_to_utf16 != INVALID_ICONV_DESC ) {
rv = iconv_close( self->desc_utf8_to_utf16 );
assert( rv == 0 );
}
free( self );
}
static const rtems_dosfs_convert_handler msdos_utf8_convert_handler = {
.utf8_to_codepage = msdos_utf8_utf8_to_codepage,
.codepage_to_utf8 = msdos_utf8_codepage_to_utf8,
.utf8_to_utf16 = msdos_utf8_utf8_to_utf16,
.utf16_to_utf8 = msdos_utf8_utf16_to_utf8,
.utf8_normalize_and_fold = msdos_utf8_normalize_and_fold,
.destroy = msdos_utf8_destroy
};
rtems_dosfs_convert_control *rtems_dosfs_create_utf8_converter(
const char *codepage
)
{
msdos_utf8_convert_control *self = malloc( sizeof( *self ) );
if ( self != NULL ) {
self->desc_codepage_to_utf8 = iconv_open( "UTF-8", codepage );
self->desc_utf8_to_codepage = iconv_open( codepage, "UTF-8" );
self->desc_utf16_to_utf8 = iconv_open( "UTF-8", "UTF-16LE" );
self->desc_utf8_to_utf16 = iconv_open( "UTF-16LE", "UTF-8" );
if (
self->desc_utf16_to_utf8 != INVALID_ICONV_DESC
&& self->desc_utf8_to_codepage != INVALID_ICONV_DESC
&& self->desc_codepage_to_utf8 != INVALID_ICONV_DESC
&& self->desc_utf8_to_utf16 != INVALID_ICONV_DESC
) {
rtems_dosfs_convert_control *super = &self->super;
super->handler = &msdos_utf8_convert_handler;
super->buffer.data = &self->buffer;
super->buffer.size = sizeof( self->buffer );
} else {
msdos_utf8_destroy( &self->super );
self = NULL;
}
}
return &self->super;
}