summaryrefslogblamecommitdiffstats
path: root/cpukit/libfs/src/dosfs/msdos_conv.c
blob: 98f2e9be816242b46fec49c0b5f5f3b6d9a68592 (plain) (tree)
1
2
3
4
5
6
7
8
9




                                        
   
 

                                                 
  

                                                                            
  
                                     
  



                                                                          
  


                                                                           
               


                                                        

   



                   
                  

                  



                                          















                                               


                                        
                             






                                     
                              







                                                                          



                          





                                                                       

                                                  
 




                         















                                                                           
                                           






















                                                                           
                                                             







                                                                 
                                                                  

                                                        

                                                          
 

                            








                                                                        




                          
 
                                                                                























                                                                             
                                                                  



                                     

                                                  



                                                               



























                                                               
  


                                                                         
 

                                                  
 












































                                                   
 
























































                                                                     
 
    
                                                 
     





                                                 
 

                       
 











                                                    
 










































                                                              

                               








































                                                                                      

















































































































































































                                                                                                     
                                                  








































                                                                       
 

 
/**
 * @file
 *
 * @brief MDOS Date Conversion
 * @ingroup libfs_msdos MSDOS FileSystem
 */

/*
 * Written by Paul Popelka (paulp@uts.amdahl.com)
 *
 * You can do anything you want with this software, just don't say you wrote
 * it, and don't remove this notice.
 *
 * This software is provided "as is".
 *
 * The author supplies this software to be publicly redistributed on the
 * understanding that the author is not responsible for the correct
 * functioning of this software in any circumstances and is not liable for
 * any damages caused by this software.
 *
 * Adaptation of NetBSD code for RTEMS by Victor V. Vengerov <vvv@oktet.ru>
 * $NetBSD: msdosfs_conv.c,v 1.10 1994/12/27 18:36:24 mycroft Exp $
 *
 * October 1992
 *
 * Modifications to support UTF-8 in the file system are
 * Copyright (c) 2013 embedded brains GmbH.
 */

#if HAVE_CONFIG_H
#include "config.h"
#endif

#include <ctype.h>
#include <rtems.h>
#include "msdos.h"

/* #define SECONDSPERDAY (24 * 60 * 60) */
#define SECONDSPERDAY ((uint32_t) 86400)

#define UTF8_MAX_CHAR_SIZE    4
#define UTF8_NULL             0x00
#define UTF8_NULL_SIZE        1
#define UTF8_BLANK            0x20
#define UTF8_BLANK_SIZE       1
#define UTF8_FULL_STOP        0x2e
#define UTF8_FULL_STOP_SIZE   1

#define UTF16_MAX_CHAR_SIZE   4
#define UTF16_NULL            CT_LE_W( 0x0000 )
#define UTF16_NULL_SIZE       2
#define UTF16_BLANK           CT_LE_W( 0x0020 )
#define UTF16_BLANK_SIZE      2
#define UTF16_FULL_STOP       CT_LE_W( 0x002e )
#define UTF16_FULL_STOP_SIZE  2

/*
 * Days in each month in a regular year.
 */
static uint16_t regyear[] = {
	31, 28, 31, 30, 31, 30,
	31, 31, 30, 31, 30, 31
};

/*
 * Days in each month in a leap year.
 */
static uint16_t leapyear[] = {
	31, 29, 31, 30, 31, 30,
	31, 31, 30, 31, 30, 31
};

/*
 * Variables used to remember parts of the last time conversion.  Maybe we
 * can avoid a full conversion.
 */
static uint32_t lasttime;
static uint32_t lastday;
static uint16_t lastddate;
static uint16_t lastdtime;

/*
 * Convert the unix version of time to dos's idea of time to be used in
 * file timestamps. The passed in unix time is assumed to be in GMT.
 */
void
msdos_date_unix2dos(unsigned int t, uint16_t *ddp,
                    uint16_t *dtp)
{
	uint32_t days;
	uint32_t inc;
	uint32_t year;
	uint32_t month;
	uint16_t *months;

	/*
	 * If the time from the last conversion is the same as now, then
	 * skip the computations and use the saved result.
	 */
	if (lasttime != t) {
		lasttime = t;
		lastdtime = (((t % 60) >> 1) << MSDOS_DT_2SECONDS_SHIFT)
		    + (((t / 60) % 60) << MSDOS_DT_MINUTES_SHIFT)
		    + (((t / 3600) % 24) << MSDOS_DT_HOURS_SHIFT);

		/*
		 * If the number of days since 1970 is the same as the last
		 * time we did the computation then skip all this leap year
		 * and month stuff.
		 */
		days = t / (SECONDSPERDAY);
		if (days != lastday) {
			lastday = days;
			for (year = 1970;; year++) {
				inc = year & 0x03 ? 365 : 366;
				if (days < inc)
					break;
				days -= inc;
			}
			months = year & 0x03 ? regyear : leapyear;
			for (month = 0; month < 12; month++) {
				if (days < months[month])
					break;
				days -= months[month];
			}
			lastddate = ((days + 1) << MSDOS_DD_DAY_SHIFT)
			    + ((month + 1) << MSDOS_DD_MONTH_SHIFT);
			/*
			 * Remember dos's idea of time is relative to 1980.
			 * unix's is relative to 1970.  If somehow we get a
			 * time before 1980 then don't give totally crazy
			 * results.
			 */
			if (year > 1980)
				lastddate += (year - 1980) <<
				             MSDOS_DD_YEAR_SHIFT;
		}
	}
	*dtp = lastdtime;
	*ddp = lastddate;
}

/*
 * The number of days between Jan 1, 1970 and Jan 1, 1980. In that
 * interval there were 8 regular years and 2 leap years.
 */
/* #define	DAYSTO1980	((8 * 365) + (2 * 366)) */
#define DAYSTO1980   ((uint32_t) 3652)

static uint16_t lastdosdate;
static uint32_t lastseconds;

/*
 * Convert from dos' idea of time to unix'. This will probably only be
 * called from the stat(), and fstat() system calls and so probably need
 * not be too efficient.
 */
unsigned int
msdos_date_dos2unix(unsigned int dd, unsigned int dt)
{
	uint32_t seconds;
	uint32_t m, month;
	uint32_t y, year;
	uint32_t days;
	uint16_t *months;

	seconds = 2 * ((dt & MSDOS_DT_2SECONDS_MASK) >> MSDOS_DT_2SECONDS_SHIFT)
	    + ((dt & MSDOS_DT_MINUTES_MASK) >> MSDOS_DT_MINUTES_SHIFT) * 60
	    + ((dt & MSDOS_DT_HOURS_MASK) >> MSDOS_DT_HOURS_SHIFT) * 3600;
	/*
	 * If the year, month, and day from the last conversion are the
	 * same then use the saved value.
	 */
	if (lastdosdate != dd) {
		lastdosdate = dd;
		days = 0;
		year = (dd & MSDOS_DD_YEAR_MASK) >> MSDOS_DD_YEAR_SHIFT;
		for (y = 0; y < year; y++)
			days += y & 0x03 ? 365 : 366;
		months = year & 0x03 ? regyear : leapyear;
		/*
		 * Prevent going from 0 to 0xffffffff in the following
		 * loop.
		 */
		month = (dd & MSDOS_DD_MONTH_MASK) >> MSDOS_DD_MONTH_SHIFT;
		if (month == 0) {
			month = 1;
		}
		for (m = 0; m < month - 1; m++)
			days += months[m];
		days += ((dd & MSDOS_DD_DAY_MASK) >> MSDOS_DD_DAY_SHIFT) - 1;
		lastseconds = (days + DAYSTO1980) * SECONDSPERDAY;
	}
	return seconds + lastseconds;
}


static const uint8_t codepage_valid_char_map[] = {
    0,    0,    0,    0,    0,    0,    0,    0,    /* 00-07 */
    0,    0,    0,    0,    0,    0,    0,    0,    /* 08-0f */
    0,    0,    0,    0,    0,    0,    0,    0,    /* 10-17 */
    0,    0,    0,    0,    0,    0,    0,    0,    /* 18-1f */
    0x20, 0x21, 0,    0x23, 0x24, 0x25, 0x26, 0x27, /* 20-27 */
    0x28, 0x29, 0,    0,    0,    0x2d,  0,    0,   /* 28-2f */
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 30-37 */
    0x38, 0x39, 0,    0,    0,    0,    0,    0,    /* 38-3f */
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 40-47 */
    0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 48-4f */
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 50-57 */
    0x58, 0x59, 0x5a, 0,    0,    0,    0x5e, 0x5f, /* 58-5f */
    0x60, 0,    0,    0,    0,    0,    0,    0,    /* 60-67 */
    0,    0,    0,    0,    0,    0,    0,    0,    /* 68-6f */
    0,    0,    0,    0,    0,    0,    0,    0,    /* 70-77 */
    0,    0,    0,    0x7b, 0,    0x7d, 0x7e, 0,    /* 78-7f */
    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 80-87 */
    0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 88-8f */
    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 90-97 */
    0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 98-9f */
    0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* a0-a7 */
    0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* a8-af */
    0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* b0-b7 */
    0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* b8-bf */
    0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* c0-c7 */
    0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* c8-cf */
    0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* d0-d7 */
    0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* d8-df */
    0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* e0-e7 */
    0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* e8-ef */
    0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* f0-f7 */
    0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff  /* f8-ff */
};

static uint16_t
msdos_get_valid_utf16_filename_character (const uint16_t utf16_character)
{
  uint16_t retval    = 0x0000;
  uint16_t char_num  = CF_LE_W( utf16_character );

  if ( char_num <= 0x00ff ) {
    switch ( char_num )
    {
      case 0x002b: /* '+' */
      case 0x002c: /* ',' */
      case 0x002e: /* '.' */
      case 0x003b: /* ';' */
      case 0x003d: /* '=' */
      case 0x005b: /* '[' */
      case 0x005d: /* ']' */
      case 0x0061: /* 'a' */
      case 0x0062: /* 'b' */
      case 0x0063: /* 'c' */
      case 0x0064: /* 'd' */
      case 0x0065: /* 'e' */
      case 0x0066: /* 'f' */
      case 0x0067: /* 'g' */
      case 0x0068: /* 'h' */
      case 0x0069: /* 'i' */
      case 0x006a: /* 'j' */
      case 0x006b: /* 'k' */
      case 0x006c: /* 'l' */
      case 0x006d: /* 'm' */
      case 0x006e: /* 'n' */
      case 0x006f: /* 'o' */
      case 0x0070: /* 'p' */
      case 0x0071: /* 'q' */
      case 0x0072: /* 'r' */
      case 0x0073: /* 's' */
      case 0x0074: /* 't' */
      case 0x0075: /* 'u' */
      case 0x0076: /* 'v' */
      case 0x0077: /* 'w' */
      case 0x0078: /* 'x' */
      case 0x0079: /* 'y' */
      case 0x007a: /* 'z' */
        retval = char_num;
      break;
      default:
        retval = codepage_valid_char_map[char_num];
      break;
    }
  }
  else
    retval = char_num;

  return CT_LE_W( retval );
}

static char
msdos_get_valid_codepage_filename_character (const uint8_t character)
{
  return codepage_valid_char_map[(unsigned int)character];
}

static ssize_t
msdos_filename_process_dot_names (const uint8_t *src_name,
                                  const size_t   src_size,
                                  uint8_t       *dest_name,
                                  const size_t   dest_size)
{
  ssize_t returned_size = 0;
  int     eno           = 0;
  /*
    * The filenames "." and ".." are handled specially, since they
    * don't follow dos filename rules.
    */
   if (    src_name[0] == UTF8_FULL_STOP
        && src_size    == UTF8_FULL_STOP_SIZE) {
     if (dest_size >= UTF8_FULL_STOP_SIZE) {
       dest_name[0]  = UTF8_FULL_STOP;
       returned_size = UTF8_FULL_STOP_SIZE;
     }
     else
       eno = ENAMETOOLONG;
   }
   else if (    eno           == 0
             && src_name[0]   == UTF8_FULL_STOP
             && src_name[1]   == UTF8_FULL_STOP
             && src_size      == ( 2 * UTF8_FULL_STOP_SIZE ) ) {
     if (dest_size >= 2 * UTF8_FULL_STOP_SIZE) {
       dest_name[0]  = UTF8_FULL_STOP;
       dest_name[1]  = UTF8_FULL_STOP;
       returned_size = 2 * UTF8_FULL_STOP_SIZE;
     }
     else
       eno = ENAMETOOLONG;
   }

   if (eno != 0) {
     errno         = eno;
     returned_size = -1;
   }

   return returned_size;
}

static ssize_t
msdos_filename_delete_trailing_dots (const uint8_t *filename_utf8,
                                     const size_t   filename_size)
{
  ssize_t      size_returned = filename_size;
  unsigned int i;

  /*
   * Remove any dots from the end of a file name.
   */
  for ( i = size_returned - UTF8_FULL_STOP_SIZE;
           size_returned >= UTF8_FULL_STOP_SIZE
        && filename_utf8[i] == UTF8_FULL_STOP;) {
    size_returned -= UTF8_FULL_STOP_SIZE;
    i             -= UTF8_FULL_STOP_SIZE;
  }

  return size_returned;
}

ssize_t
msdos_filename_utf8_to_long_name_for_compare (
    rtems_dosfs_convert_control     *converter,
    const uint8_t                   *utf8_name,
    const size_t                     utf8_name_size,
    uint8_t                         *long_name,
    const size_t                     long_name_size)
  {
    ssize_t        returned_size = 0;
    int            eno           = 0;
    size_t         name_size;
    size_t         dest_size     = long_name_size;

    returned_size = msdos_filename_process_dot_names (
      utf8_name,
      utf8_name_size,
      long_name,
      long_name_size);

    if (returned_size == 0) {
      name_size = msdos_filename_delete_trailing_dots (
        &utf8_name[0],
        utf8_name_size);
      if (name_size > 0) {
        eno = (*converter->handler->utf8_normalize_and_fold) (
          converter,
          utf8_name,
          name_size,
          long_name,
          &dest_size);
        if (eno == 0) {
          returned_size = (ssize_t)dest_size;
        }
      } else {
        eno = EINVAL;
      }
    }

    if ( eno != 0 ) {
      errno         = eno;
      returned_size = -1;
    }

    return returned_size;
  }

ssize_t
msdos_filename_utf8_to_long_name_for_save (
    rtems_dosfs_convert_control     *converter,
    const uint8_t                   *utf8_name,
    const size_t                     utf8_name_size,
    uint16_t                        *long_name,
    const size_t                     long_name_size)
{
    ssize_t      returned_size = 0;
    int          eno           = 0;
    size_t       name_size;
    size_t       name_size_tmp;
    int          i;
    uint16_t     c;
    unsigned int chars_written;

    name_size_tmp = long_name_size;
    name_size = msdos_filename_delete_trailing_dots (
      &utf8_name[0],
      utf8_name_size);
    if (name_size > 0) {
      /*
       * Finally convert from UTF-8 to UTF-16
       */
      eno = (*converter->handler->utf8_to_utf16) (
          converter,
          utf8_name,
          name_size,
          &long_name[0],
          &name_size_tmp);
      if (eno == 0) {
        if (name_size_tmp <= (MSDOS_NAME_MAX_LNF_LEN * MSDOS_NAME_LFN_BYTES_PER_CHAR))
          name_size = name_size_tmp;
        else
          eno = ENAMETOOLONG;
      }

      if ( eno == 0 )
      {
        /*
         * Validate the characters and assign them to the UTF-16 file name
         */
        for ( i = 0;
                 name_size
              && (c = msdos_get_valid_utf16_filename_character ( long_name[i]) );
              ++i ) {
          long_name[i]   = c;
          returned_size += MSDOS_NAME_LFN_BYTES_PER_CHAR;
          name_size     -= MSDOS_NAME_LFN_BYTES_PER_CHAR;
        }
        if ( name_size == UTF16_NULL_SIZE && c == UTF16_NULL ) {
          long_name[i]   = c;
          returned_size += MSDOS_NAME_LFN_BYTES_PER_CHAR;
        }
        else if ( name_size != 0 )
          eno = EINVAL;
        chars_written = returned_size / MSDOS_NAME_LFN_BYTES_PER_CHAR;
        if (   long_name [chars_written - 1] != UTF16_NULL
            && (returned_size + UTF16_NULL_SIZE ) <= long_name_size ) {
          long_name[chars_written] = UTF16_NULL;
        }
      }
    }
    else
      eno = EINVAL;

    if ( eno != 0 ) {
      errno         = eno;
      returned_size = -1;
    }

    return returned_size;
  }

/*
 * Remove any dots from the start of a file name.
 */
static void msdos_filename_remove_prepended_dots (const uint8_t **name_utf8,
                                                  size_t         *name_size)
{
  while (    *name_size >= UTF8_FULL_STOP_SIZE
         && **name_utf8 == UTF8_FULL_STOP) {
    *name_utf8  += UTF8_FULL_STOP_SIZE;
    *name_size  -= UTF8_FULL_STOP_SIZE;
  }
}

ssize_t
msdos_filename_utf8_to_short_name_for_compare (
    rtems_dosfs_convert_control     *converter,
    const uint8_t                   *utf8_name,
    const size_t                     utf8_name_size,
    void                            *short_name,
    const size_t                     short_name_size)
{
  ssize_t        returned_size           = 0;
  int            eno                     = 0;
  const uint8_t *name_ptr                = utf8_name;
  char          *dest_ptr                = (char*)short_name;
  size_t         name_size               = utf8_name_size;
  uint8_t        name_normalized_buf[(MSDOS_SHORT_NAME_LEN +1) * MSDOS_NAME_MAX_UTF8_BYTES_PER_CHAR];
  size_t         name_size_tmp           = sizeof(name_normalized_buf);

  returned_size = msdos_filename_process_dot_names (
    utf8_name,
    utf8_name_size,
    short_name,
    short_name_size);

  if (returned_size == 0) {
    msdos_filename_remove_prepended_dots (&name_ptr,
                                          &name_size);
    if (name_size > 0) {
      /*
       * Normalize the name and convert to lower case
       */
      eno = (*converter->handler->utf8_normalize_and_fold) (
        converter,
        name_ptr,
        name_size,
        &name_normalized_buf[0],
        &name_size_tmp);
      name_ptr  = &name_normalized_buf[0];
      name_size = name_size_tmp;
      if ( eno == ENOMEM ) {
        eno = 0;
      }
      if ( eno == 0 ) {
        memcpy (&dest_ptr[0], &name_ptr[0], name_size);
        returned_size = name_size;
      }
    } else
      eno = EINVAL;
  }

  if ( eno != 0 ) {
    errno         = eno;
    returned_size = -1;
  }

  return returned_size;
}

ssize_t
msdos_filename_utf8_to_short_name_for_save (
    rtems_dosfs_convert_control     *converter,
    const uint8_t                   *utf8_name,
    const size_t                     utf8_name_size,
    void                            *short_name,
    const size_t                     short_name_size)
{
  ssize_t        returned_size           = 0;
  int            eno                     = 0;
  const uint8_t *name_ptr                = utf8_name;
  size_t         name_size               = utf8_name_size;
  char          *dest_ptr                = (char*)short_name;
  unsigned int   i;
  char           c;
  size_t         name_size_tmp;
  char           name_to_format_buf[MSDOS_SHORT_NAME_LEN +1];

  returned_size = msdos_filename_process_dot_names (
    utf8_name,
    utf8_name_size,
    short_name,
    short_name_size);

  if (returned_size == 0) {
    msdos_filename_remove_prepended_dots (&name_ptr,
                                          &name_size);

    if (name_size > 0) {
      /*
       * Finally convert from UTF-8 to codepage
       */
      name_size_tmp = sizeof ( name_to_format_buf );
      eno = (*converter->handler->utf8_to_codepage) (
        converter,
        name_ptr,
        name_size,
        &name_to_format_buf[0],
        &name_size_tmp);
      if ( eno != 0 ) {
        /* The UTF-8 name my well be long name, for which we now want to
         * generate the corresponding short name. Under these circumstances
         * eno != 0 likely simply means that the UTF-8 name is longer than 11 characters
         * or that it contains unicode characters which can not be converted to the code page
         * in a reversible way. Non-reversible characters will be represented by question mark
         * characters. Later in this method they will get replaced by underline characters.
         */
        eno = 0;
      }
      name_ptr  = (const uint8_t *)(&name_to_format_buf[0]);
      name_size = name_size_tmp;
      for (i = 0; i < name_size; ++i)
        name_to_format_buf[i] = toupper ( (unsigned char)(name_to_format_buf[i]) );
      /*
       * Validate the characters and assign them to the codepage file name
       */
      if ( name_size > 0 ) {
        /*
         * The first character needs some special treatment
         */
        if ( 0x20 == *name_ptr )
          dest_ptr[0] = '_';
        else if ( 0xE5 == *name_ptr )
          dest_ptr[0] = 0x05;
        else if (0 != (c = msdos_get_valid_codepage_filename_character( *name_ptr ) ) )
          dest_ptr[0] = c;
        else
          dest_ptr[0] = '_';
        ++name_ptr;
        ++returned_size;
        --name_size;
        /*
         * Validate and assign all other characters of the name part
         */
        for (i = 1; i <= 7 && name_size && *name_ptr != '.'; ++i) {
          c = msdos_get_valid_codepage_filename_character ( *name_ptr );
          if (c != 0)
            dest_ptr[i] = c;
          else
            dest_ptr[i] = '_';
          ++name_ptr;
          ++returned_size;
          --name_size;
        }
        /*
         * Strip any further characters up to a '.' or the end of the
         * string.
         */
        if ( name_size > 0 && *name_ptr == '.' ) {
          ++name_ptr;
          --name_size;
        }

        for (; i < 8; ++i) {
          dest_ptr[i] = ' ';
          ++returned_size;
        }

        /*
         * Copy in the extension part of the name, if any.
         */
        for (; i <= 10 && name_size ; i++) {
          c = msdos_get_valid_codepage_filename_character ( *name_ptr);
          if (c != 0)
            dest_ptr[i] = c;
          else
            dest_ptr[i] = '_';
          ++name_ptr;
          ++returned_size;
          name_size--;
        }
        /*
         * Fill up with blanks. These are DOS's pad characters.
         */
        for ( ; i < short_name_size; ++i ) {
          dest_ptr[i] = ' ';
          ++returned_size;
        }
      }
    }
    else
      eno = EINVAL;
  }

  if ( eno != 0 ) {
    errno = eno;
    return -1;
  }

  return returned_size;
}