/**
* @file
*
* @ingroup libfs_msdos MSDOS FileSystem
*
* @brief MDOS Date Conversion
*/
/*
* Written by Paul Popelka (paulp@uts.amdahl.com)
*
* You can do anything you want with this software, just don't say you wrote
* it, and don't remove this notice.
*
* This software is provided "as is".
*
* The author supplies this software to be publicly redistributed on the
* understanding that the author is not responsible for the correct
* functioning of this software in any circumstances and is not liable for
* any damages caused by this software.
*
* Adaptation of NetBSD code for RTEMS by Victor V. Vengerov <vvv@oktet.ru>
* $NetBSD: msdosfs_conv.c,v 1.10 1994/12/27 18:36:24 mycroft Exp $
*
* October 1992
*
* Modifications to support UTF-8 in the file system are
* Copyright (c) 2013 embedded brains GmbH.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <ctype.h>
#include <rtems.h>
#include "msdos.h"
/* #define SECONDSPERDAY (24 * 60 * 60) */
#define SECONDSPERDAY ((uint32_t) 86400)
#define UTF8_MAX_CHAR_SIZE 4
#define UTF8_NULL 0x00
#define UTF8_NULL_SIZE 1
#define UTF8_BLANK 0x20
#define UTF8_BLANK_SIZE 1
#define UTF8_FULL_STOP 0x2e
#define UTF8_FULL_STOP_SIZE 1
#define UTF16_MAX_CHAR_SIZE 4
#define UTF16_NULL CT_LE_W( 0x0000 )
#define UTF16_NULL_SIZE 2
#define UTF16_BLANK CT_LE_W( 0x0020 )
#define UTF16_BLANK_SIZE 2
#define UTF16_FULL_STOP CT_LE_W( 0x002e )
#define UTF16_FULL_STOP_SIZE 2
/*
* Days in each month in a regular year.
*/
static uint16_t regyear[] = {
31, 28, 31, 30, 31, 30,
31, 31, 30, 31, 30, 31
};
/*
* Days in each month in a leap year.
*/
static uint16_t leapyear[] = {
31, 29, 31, 30, 31, 30,
31, 31, 30, 31, 30, 31
};
/*
* Variables used to remember parts of the last time conversion. Maybe we
* can avoid a full conversion.
*/
static uint32_t lasttime;
static uint32_t lastday;
static uint16_t lastddate;
static uint16_t lastdtime;
/*
* Convert the unix version of time to dos's idea of time to be used in
* file timestamps. The passed in unix time is assumed to be in GMT.
*/
void
msdos_date_unix2dos(unsigned int t, uint16_t *ddp,
uint16_t *dtp)
{
uint32_t days;
uint32_t inc;
uint32_t year;
uint32_t month;
uint16_t *months;
/*
* If the time from the last conversion is the same as now, then
* skip the computations and use the saved result.
*/
if (lasttime != t) {
lasttime = t;
lastdtime = (((t % 60) >> 1) << MSDOS_DT_2SECONDS_SHIFT)
+ (((t / 60) % 60) << MSDOS_DT_MINUTES_SHIFT)
+ (((t / 3600) % 24) << MSDOS_DT_HOURS_SHIFT);
/*
* If the number of days since 1970 is the same as the last
* time we did the computation then skip all this leap year
* and month stuff.
*/
days = t / (SECONDSPERDAY);
if (days != lastday) {
lastday = days;
for (year = 1970;; year++) {
inc = year & 0x03 ? 365 : 366;
if (days < inc)
break;
days -= inc;
}
months = year & 0x03 ? regyear : leapyear;
for (month = 0; month < 12; month++) {
if (days < months[month])
break;
days -= months[month];
}
lastddate = ((days + 1) << MSDOS_DD_DAY_SHIFT)
+ ((month + 1) << MSDOS_DD_MONTH_SHIFT);
/*
* Remember dos's idea of time is relative to 1980.
* unix's is relative to 1970. If somehow we get a
* time before 1980 then don't give totally crazy
* results.
*/
if (year > 1980)
lastddate += (year - 1980) <<
MSDOS_DD_YEAR_SHIFT;
}
}
*dtp = lastdtime;
*ddp = lastddate;
}
/*
* The number of days between Jan 1, 1970 and Jan 1, 1980. In that
* interval there were 8 regular years and 2 leap years.
*/
/* #define DAYSTO1980 ((8 * 365) + (2 * 366)) */
#define DAYSTO1980 ((uint32_t) 3652)
static uint16_t lastdosdate;
static uint32_t lastseconds;
/*
* Convert from dos' idea of time to unix'. This will probably only be
* called from the stat(), and fstat() system calls and so probably need
* not be too efficient.
*/
unsigned int
msdos_date_dos2unix(unsigned int dd, unsigned int dt)
{
uint32_t seconds;
uint32_t m, month;
uint32_t y, year;
uint32_t days;
uint16_t *months;
seconds = 2 * ((dt & MSDOS_DT_2SECONDS_MASK) >> MSDOS_DT_2SECONDS_SHIFT)
+ ((dt & MSDOS_DT_MINUTES_MASK) >> MSDOS_DT_MINUTES_SHIFT) * 60
+ ((dt & MSDOS_DT_HOURS_MASK) >> MSDOS_DT_HOURS_SHIFT) * 3600;
/*
* If the year, month, and day from the last conversion are the
* same then use the saved value.
*/
if (lastdosdate != dd) {
lastdosdate = dd;
days = 0;
year = (dd & MSDOS_DD_YEAR_MASK) >> MSDOS_DD_YEAR_SHIFT;
for (y = 0; y < year; y++)
days += y & 0x03 ? 365 : 366;
months = year & 0x03 ? regyear : leapyear;
/*
* Prevent going from 0 to 0xffffffff in the following
* loop.
*/
month = (dd & MSDOS_DD_MONTH_MASK) >> MSDOS_DD_MONTH_SHIFT;
if (month == 0) {
month = 1;
}
for (m = 0; m < month - 1; m++)
days += months[m];
days += ((dd & MSDOS_DD_DAY_MASK) >> MSDOS_DD_DAY_SHIFT) - 1;
lastseconds = (days + DAYSTO1980) * SECONDSPERDAY;
}
return seconds + lastseconds;
}
static const uint8_t codepage_valid_char_map[] = {
0, 0, 0, 0, 0, 0, 0, 0, /* 00-07 */
0, 0, 0, 0, 0, 0, 0, 0, /* 08-0f */
0, 0, 0, 0, 0, 0, 0, 0, /* 10-17 */
0, 0, 0, 0, 0, 0, 0, 0, /* 18-1f */
0x20, 0x21, 0, 0x23, 0x24, 0x25, 0x26, 0x27, /* 20-27 */
0x28, 0x29, 0, 0, 0, 0x2d, 0, 0, /* 28-2f */
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 30-37 */
0x38, 0x39, 0, 0, 0, 0, 0, 0, /* 38-3f */
0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 40-47 */
0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 48-4f */
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 50-57 */
0x58, 0x59, 0x5a, 0, 0, 0, 0x5e, 0x5f, /* 58-5f */
0x60, 0, 0, 0, 0, 0, 0, 0, /* 60-67 */
0, 0, 0, 0, 0, 0, 0, 0, /* 68-6f */
0, 0, 0, 0, 0, 0, 0, 0, /* 70-77 */
0, 0, 0, 0x7b, 0, 0x7d, 0x7e, 0, /* 78-7f */
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 80-87 */
0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 88-8f */
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 90-97 */
0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 98-9f */
0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* a0-a7 */
0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* a8-af */
0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* b0-b7 */
0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* b8-bf */
0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* c0-c7 */
0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* c8-cf */
0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* d0-d7 */
0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* d8-df */
0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* e0-e7 */
0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* e8-ef */
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* f0-f7 */
0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff /* f8-ff */
};
static uint16_t
msdos_get_valid_utf16_filename_character (const uint16_t utf16_character)
{
uint16_t retval = 0x0000;
uint16_t char_num = CF_LE_W( utf16_character );
if ( char_num <= 0x00ff ) {
switch ( char_num )
{
case 0x002b: /* '+' */
case 0x002c: /* ',' */
case 0x002e: /* '.' */
case 0x003b: /* ';' */
case 0x003d: /* '=' */
case 0x005b: /* '[' */
case 0x005d: /* ']' */
case 0x0061: /* 'a' */
case 0x0062: /* 'b' */
case 0x0063: /* 'c' */
case 0x0064: /* 'd' */
case 0x0065: /* 'e' */
case 0x0066: /* 'f' */
case 0x0067: /* 'g' */
case 0x0068: /* 'h' */
case 0x0069: /* 'i' */
case 0x006a: /* 'j' */
case 0x006b: /* 'k' */
case 0x006c: /* 'l' */
case 0x006d: /* 'm' */
case 0x006e: /* 'n' */
case 0x006f: /* 'o' */
case 0x0070: /* 'p' */
case 0x0071: /* 'q' */
case 0x0072: /* 'r' */
case 0x0073: /* 's' */
case 0x0074: /* 't' */
case 0x0075: /* 'u' */
case 0x0076: /* 'v' */
case 0x0077: /* 'w' */
case 0x0078: /* 'x' */
case 0x0079: /* 'y' */
case 0x007a: /* 'z' */
retval = char_num;
break;
default:
retval = codepage_valid_char_map[char_num];
break;
}
}
else
retval = char_num;
return CT_LE_W( retval );
}
static char
msdos_get_valid_codepage_filename_character (const uint8_t character)
{
char c = codepage_valid_char_map[character];
if (c == 0) {
c = '_';
}
return c;
}
static ssize_t
msdos_filename_process_dot_names (const uint8_t *src_name,
const size_t src_size,
uint8_t *dest_name,
const size_t dest_size)
{
ssize_t returned_size = 0;
int eno = 0;
/*
* The filenames "." and ".." are handled specially, since they
* don't follow dos filename rules.
*/
if ( src_name[0] == UTF8_FULL_STOP
&& src_size == UTF8_FULL_STOP_SIZE) {
if (dest_size >= UTF8_FULL_STOP_SIZE) {
dest_name[0] = UTF8_FULL_STOP;
returned_size = UTF8_FULL_STOP_SIZE;
}
else
eno = ENAMETOOLONG;
}
else if ( eno == 0
&& src_name[0] == UTF8_FULL_STOP
&& src_name[1] == UTF8_FULL_STOP
&& src_size == ( 2 * UTF8_FULL_STOP_SIZE ) ) {
if (dest_size >= 2 * UTF8_FULL_STOP_SIZE) {
dest_name[0] = UTF8_FULL_STOP;
dest_name[1] = UTF8_FULL_STOP;
returned_size = 2 * UTF8_FULL_STOP_SIZE;
}
else
eno = ENAMETOOLONG;
}
if (eno != 0) {
errno = eno;
returned_size = -1;
}
return returned_size;
}
static ssize_t
msdos_filename_delete_trailing_dots (const uint8_t *filename_utf8,
const size_t filename_size)
{
ssize_t size_returned = filename_size;
unsigned int i;
/*
* Remove any dots from the end of a file name.
*/
for ( i = size_returned - UTF8_FULL_STOP_SIZE;
size_returned >= UTF8_FULL_STOP_SIZE
&& filename_utf8[i] == UTF8_FULL_STOP;) {
size_returned -= UTF8_FULL_STOP_SIZE;
i -= UTF8_FULL_STOP_SIZE;
}
return size_returned;
}
ssize_t
msdos_filename_utf8_to_long_name_for_compare (
rtems_dosfs_convert_control *converter,
const uint8_t *utf8_name,
const size_t utf8_name_size,
uint8_t *long_name,
const size_t long_name_size)
{
ssize_t returned_size = 0;
int eno = 0;
size_t name_size;
size_t dest_size = long_name_size;
returned_size = msdos_filename_process_dot_names (
utf8_name,
utf8_name_size,
long_name,
long_name_size);
if (returned_size == 0) {
name_size = msdos_filename_delete_trailing_dots (
&utf8_name[0],
utf8_name_size);
if (name_size > 0) {
eno = (*converter->handler->utf8_normalize_and_fold) (
converter,
utf8_name,
name_size,
long_name,
&dest_size);
if (eno == 0) {
returned_size = (ssize_t)dest_size;
}
} else {
eno = EINVAL;
}
}
if ( eno != 0 ) {
errno = eno;
returned_size = -1;
}
return returned_size;
}
ssize_t
msdos_filename_utf8_to_long_name_for_save (
rtems_dosfs_convert_control *converter,
const uint8_t *utf8_name,
const size_t utf8_name_size,
uint16_t *long_name,
const size_t long_name_size)
{
ssize_t returned_size = 0;
int eno = 0;
size_t name_size;
size_t name_size_tmp;
int i;
uint16_t c;
unsigned int chars_written;
name_size_tmp = long_name_size;
name_size = msdos_filename_delete_trailing_dots (
&utf8_name[0],
utf8_name_size);
if (name_size > 0) {
/*
* Finally convert from UTF-8 to UTF-16
*/
eno = (*converter->handler->utf8_to_utf16) (
converter,
utf8_name,
name_size,
&long_name[0],
&name_size_tmp);
if (eno == 0) {
if (name_size_tmp <= (MSDOS_NAME_MAX_LNF_LEN * MSDOS_NAME_LFN_BYTES_PER_CHAR))
name_size = name_size_tmp;
else
eno = ENAMETOOLONG;
}
if ( eno == 0 )
{
/*
* Validate the characters and assign them to the UTF-16 file name
*/
for ( i = 0;
name_size
&& (c = msdos_get_valid_utf16_filename_character ( long_name[i]) );
++i ) {
long_name[i] = c;
returned_size += MSDOS_NAME_LFN_BYTES_PER_CHAR;
name_size -= MSDOS_NAME_LFN_BYTES_PER_CHAR;
}
if ( name_size == UTF16_NULL_SIZE && c == UTF16_NULL ) {
long_name[i] = c;
returned_size += MSDOS_NAME_LFN_BYTES_PER_CHAR;
}
else if ( name_size != 0 )
eno = EINVAL;
chars_written = returned_size / MSDOS_NAME_LFN_BYTES_PER_CHAR;
if ( long_name [chars_written - 1] != UTF16_NULL
&& (returned_size + UTF16_NULL_SIZE ) <= long_name_size ) {
long_name[chars_written] = UTF16_NULL;
}
}
}
else
eno = EINVAL;
if ( eno != 0 ) {
errno = eno;
returned_size = -1;
}
return returned_size;
}
/*
* Remove any dots from the start of a file name.
*/
static void msdos_filename_remove_prepended_dots (const uint8_t **name_utf8,
size_t *name_size)
{
while ( *name_size >= UTF8_FULL_STOP_SIZE
&& **name_utf8 == UTF8_FULL_STOP) {
*name_utf8 += UTF8_FULL_STOP_SIZE;
*name_size -= UTF8_FULL_STOP_SIZE;
}
}
ssize_t
msdos_filename_utf8_to_short_name_for_compare (
rtems_dosfs_convert_control *converter,
const uint8_t *utf8_name,
const size_t utf8_name_size,
void *short_name,
const size_t short_name_size)
{
ssize_t returned_size = 0;
int eno = 0;
const uint8_t *name_ptr = utf8_name;
char *dest_ptr = (char*)short_name;
size_t name_size = utf8_name_size;
uint8_t name_normalized_buf[(MSDOS_SHORT_NAME_LEN +1) * MSDOS_NAME_MAX_UTF8_BYTES_PER_CHAR];
size_t name_size_tmp = sizeof(name_normalized_buf);
returned_size = msdos_filename_process_dot_names (
utf8_name,
utf8_name_size,
short_name,
short_name_size);
if (returned_size == 0) {
msdos_filename_remove_prepended_dots (&name_ptr,
&name_size);
if (name_size > 0) {
/*
* Normalize the name and convert to lower case
*/
eno = (*converter->handler->utf8_normalize_and_fold) (
converter,
name_ptr,
name_size,
&name_normalized_buf[0],
&name_size_tmp);
name_ptr = &name_normalized_buf[0];
name_size = name_size_tmp;
if ( eno == ENOMEM ) {
eno = 0;
}
if ( eno == 0 ) {
memcpy (&dest_ptr[0], &name_ptr[0], name_size);
returned_size = name_size;
}
} else
eno = EINVAL;
}
if ( eno != 0 ) {
errno = eno;
returned_size = -1;
}
return returned_size;
}
ssize_t
msdos_filename_utf8_to_short_name_for_save (
rtems_dosfs_convert_control *converter,
const uint8_t *utf8_name,
const size_t utf8_name_size,
void *short_name,
const size_t short_name_size)
{
ssize_t returned_size = 0;
int eno = 0;
const uint8_t *name_ptr = utf8_name;
size_t name_size = utf8_name_size;
char *dest_ptr = (char*)short_name;
unsigned int i;
size_t name_size_tmp;
char name_to_format_buf[MSDOS_SHORT_NAME_LEN +1];
returned_size = msdos_filename_process_dot_names (
utf8_name,
utf8_name_size,
short_name,
short_name_size);
if (returned_size == 0) {
msdos_filename_remove_prepended_dots (&name_ptr,
&name_size);
if (name_size > 0) {
/*
* Finally convert from UTF-8 to codepage
*/
name_size_tmp = sizeof ( name_to_format_buf );
eno = (*converter->handler->utf8_to_codepage) (
converter,
name_ptr,
name_size,
&name_to_format_buf[0],
&name_size_tmp);
if ( eno != 0 ) {
/* The UTF-8 name my well be long name, for which we now want to
* generate the corresponding short name. Under these circumstances
* eno != 0 likely simply means that the UTF-8 name is longer than 11 characters
* or that it contains unicode characters which can not be converted to the code page
* in a reversible way. Non-reversible characters will be represented by question mark
* characters. Later in this method they will get replaced by underline characters.
*/
eno = 0;
}
name_ptr = (const uint8_t *)(&name_to_format_buf[0]);
name_size = name_size_tmp;
for (i = 0; i < name_size; ++i)
name_to_format_buf[i] = toupper ( (unsigned char)(name_to_format_buf[i]) );
/*
* Validate the characters and assign them to the codepage file name
*/
if ( name_size > 0 ) {
/*
* The first character needs some special treatment
*/
if ( 0x20 == *name_ptr )
dest_ptr[0] = '_';
else if ( 0xE5 == *name_ptr )
dest_ptr[0] = 0x05;
else
dest_ptr[0] = msdos_get_valid_codepage_filename_character(*name_ptr);
++name_ptr;
++returned_size;
--name_size;
/*
* Validate and assign all other characters of the name part
*/
for (i = 1; i <= 7 && name_size && *name_ptr != '.'; ++i) {
dest_ptr[i] = msdos_get_valid_codepage_filename_character(*name_ptr);
++name_ptr;
++returned_size;
--name_size;
}
/*
* Strip any further characters up to a '.' or the end of the
* string.
*/
if ( name_size > 0 && *name_ptr == '.' ) {
++name_ptr;
--name_size;
}
for (; i < 8; ++i) {
dest_ptr[i] = ' ';
++returned_size;
}
/*
* Copy in the extension part of the name, if any.
*/
for (; i <= 10 && name_size ; i++) {
dest_ptr[i] = msdos_get_valid_codepage_filename_character(*name_ptr);
++name_ptr;
++returned_size;
name_size--;
}
/*
* Fill up with blanks. These are DOS's pad characters.
*/
for ( ; i < short_name_size; ++i ) {
dest_ptr[i] = ' ';
++returned_size;
}
}
}
else
eno = EINVAL;
}
if ( eno != 0 ) {
errno = eno;
return -1;
}
return returned_size;
}