UTF-8 - ASCII-256

Standard

Description

Converts a string from extendeed ascii to UTF-8 and viceversa.


Source

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

unsigned char *utf8_to_extended_ascii (char *string, int *len)
{
 unsigned char *ext_string = NULL;
 int mem = 1, i = 0;

 if ( !string || !len )
      return ext_string;

 if ( *len < 1 )
      *len = strlen (string);

 ext_string = (unsigned char *) malloc (mem * sizeof (unsigned char));
 do {
     if ( ((unsigned char) string[i]) == 0xC2 || ((unsigned char) string[i]) == 0xC3 )
         continue;

     if ( ((unsigned char) string[i]) >= 0xA0 && i > 0x00
       && ((unsigned char) string[i - 1]) == 0xC3 ) {
          ext_string[mem - 1] = (unsigned char) string[i] + 0x40;
          ext_string = (unsigned char *) realloc (ext_string, ++mem * sizeof (unsigned char));
         continue;
     }

     ext_string[mem - 1] = (unsigned char) string[i];
     ext_string = (unsigned char *) realloc (ext_string, ++mem * sizeof (unsigned char));
 } while ( i++ < *len );
 ext_string[mem - 1] = 0x00;
 *len = mem;

 return ext_string;
}

void lower_to_upper (unsigned char **string, int length)
{
 int j = -1;

 while ( ++j < length )
         if ( ((*string)[j] > 0x60 && (*string)[j] < 0x7b)
           || ((*string)[j] >= 0xe0 && (*string)[j] <= 0xfd) )
              (*string)[j] &= 0xdf;
}

char *extended_ascii_to_utf8 (unsigned char *ext_string, int length)
{
 char *string = NULL;
 int j = -1, mem = 1;

 if ( !ext_string )
      return string;

 string = (char *) malloc (mem * sizeof (char));
 while ( ++j < length ) {
         if ( ext_string[j] >= 0x7f ) {
              string[mem - 1] = 0xC3;
              string = (char *) realloc (string, ++mem * sizeof (char));

              if ( ext_string[j] >= 0xc0 )
                   string[mem - 1] = (char) ext_string[j] - 0x40;
              else string[mem - 1] = (char) ext_string[j];
         }
         else string[mem - 1] = (char) ext_string[j];

         string = (char *) realloc (string, ++mem * sizeof (char));
 }
 string[mem - 1] = 0x00;

 return string;
}

int main (int argc, char *argv[])
{
 char *string;
 int ext_length = -1;
 unsigned char *ext_string;

 if ( argc < 2 ) {
      fprintf (stderr, "Usage: %s <string>\n", *argv);
      return 1;
 }
 else string = argv[1];

 if ( !(ext_string = utf8_to_extended_ascii (string, &ext_length)) ) {
      fprintf (stderr, "E: string not valid.\n");
      exit (1);
 }

 lower_to_upper (&ext_string, ext_length);
 if ( !(string = extended_ascii_to_utf8 (ext_string, ext_length)) ) {
      fprintf (stderr, "E: extended string not valid.\n");
      exit (1);
 }
 free (ext_string);
 fprintf (stdout, "%s\n", string);
 free (string);

 return 0;
}