#!/usr/bin/perl -w

#---------------------------------------------------
#
# $RCSfile: convchar.pl,v $
# $Revision: 1.2 $
# $Date: 1999/01/29 15:50:51 $
# $Author: fog.stflin09 $
# $Locker:  $
#
#	Script to convert an 8-bit character to an 8-bit character in
#	another character set. Reads characters from stdin, writes to
#	stdout.
#
#		convchar <from-set-table> <to-set-table>
#	The command takes two arguments: filenames of the character set
#	to unicode conversion tables. These are used to convert the input
#	character from the input charset to unicode, then convert it from
#	unicode back to the output charset.
#
#	Suitable tables can be found at
#		ftp://dkuug.dk/i18n/charmaps
#	for just about any charset you could be interested in.
#---------------------------------------------------

use integer;

if (!defined($ARGV[1]) || $ARGV[1] eq "")
{
	print STDERR 'Usage: cat infile | convchar.pl <in-charset-file> <out-charset-file> >outfile', "\n";
	exit;
}

# Open input charset table
$IN_CHAR_SET = $ARGV[0];
$OUT_CHAR_SET = $ARGV[1];

open IN_CHAR_SET or die "Bad input charset filename\n";
open OUT_CHAR_SET or die "Bad output charset filename\n";

# Read IN_CHAR_SET and build its unicode conversion table (an array)
$in_table = 0;
@in2unicode = ("nochar") x 256;

while (defined($line = <IN_CHAR_SET>))
{
	@fields = split /\s+/, $line, 4;
	if ($fields[0] eq "CHARMAP")
	{
		$in_table = 1;
		next;
	}
	elsif ($fields[0] eq "END" && $fields[1] eq "CHARMAP")
	{
		$in_table = 0;
		next;
	}

	if ($in_table)
	{
		$char8 = hex(substr($fields[1], 2, 2));
		$char_uni = substr($fields[2], 1, 5);
		$in2unicode[$char8] = $char_uni;
	}
}

# DEBUG - print conv table
#for ($i=0; $i<256; $i++)
#{
#	if ($i % 4 == 0)
#	{
#		print STDERR "\n";
#	}
#	printf STDERR "in8 %02xH=%s. ", $i, $in2unicode[$i];
#}
#print STDERR "\n";

# Read OUT_CHAR_SET and build its unicode conversion table (a hash)
%unicode2out = ();	# initialise to empty strings?
$in_table = 0;
while (defined($line = <OUT_CHAR_SET>))
{
	@fields = split /\s+/, $line, 4;
	if ($fields[0] eq "CHARMAP")
	{
		$in_table = 1;
		next;
	}
	elsif ($fields[0] eq "END" && $fields[1] eq "CHARMAP")
	{
		$in_table = 0;
		next;
	}
	if ($in_table)
	{
		$char8 = hex(substr($fields[1], 2, 2));
		$char_uni = substr($fields[2], 1, 5);
		$unicode2out{$char_uni} = $char8;
	}
}

close IN_CHAR_SET;
close OUT_CHAR_SET;

# Scan through the input array tables and create a binary 8-bit conversion array
for ($inchar=0; $inchar<=255; ++$inchar)
{
	if (defined($unicode2out{$in2unicode[$inchar]}))
	{
		$in2out[$inchar] = $unicode2out{$in2unicode[$inchar]};	
	}
	else
	{
		$in2out[$inchar] = "?";
	}
}

# Now start passing STDIN to STDOUT converting characters
while (read STDIN, $pack, 256)
{
	$i = 0;
	foreach $char8 (unpack("C*", $pack))	# converts characters to numbers
	{
		$unpacked[$i] = $in2out[$char8];
		++$i;
	}
	$pack = pack("C".$i, @unpacked);
	syswrite STDOUT, $pack, $i;
}
#
# $Log: convchar.pl,v $
# Revision 1.2  1999/01/29 15:50:51  fog.stflin09
# Changed Source keyword to RCSfile
#
# Revision 1.1  1998/08/26 14:56:32  fog
# Initial revision
#
# Revision 6.1  1998/05/06 20:54:16  sfd
# initial version
#
#
