#!/usr/bin/perl -w

use strict;

use Getopt::Long;

my $InputEncStr = "8bit";
my $OutputEncStr = "macros";

Getopt::Long::Configure ("bundling");
GetOptions(
  'input|i=s'    => \$InputEncStr,
  'output|o=s' => \$OutputEncStr,
  'help|h'      => sub {usage_message(); exit(0)}
);

sub usage_message() {
  print "
$0: translates between various encodings of hebrew latex

Usage:

  transheb.pl [options] [<input_file>]

    If no input file given, reads from standard input.
    Prints out the input after a conversion.
  
    options:
    
      -i <encoding> |--input=<encoding>
        The input encoding. Default: $InputEncStr
        
      -o <encoding> |--output=<encoding>
      The output encoding. Default: $OutputEncStr
    
    Valid Encodings:
    
      8bit 224 iso8 8859-8 8859_8 cp1255
        Standard 8bit encoding (chars 224-250)
        
      dos 128 cp862 
        cp862 dos hebrew
      
      7bit 96 si960
        Obsolete 7bit encoding (chars 96-122)
        
      macros macros_new
        New heblatex macros (\\hebalef, \\hebbet, etc.)
        
      macros_orig
        Original heblatex macros (\\alef, \\bet, etc.)
"
}

my %CODE_7BIT = (In=>[]);
my %CODE_DOS  = (In=>[]);
my %CODE_8BIT = (In=>[]);
$CODE_7BIT{Out}= $CODE_7BIT{In};
$CODE_8BIT{Out}= $CODE_8BIT{In};
$CODE_DOS {Out}= $CODE_DOS {In};

for my $i (0 .. 26){
  push @{$CODE_7BIT{In}}, (chr($i+96));
  push @{$CODE_8BIT{In}}, (chr($i+224));
  push @{$CODE_DOS {In}}, (chr($i+128));
}

my @HEBREW_LETTER = (
  'alef',
  'bet',
  'gimel',
  'dalet',
  'he',
  'vav',
  'zayin',
  'het',
  'tet',
  'yod',
  'finalkaf',
  'kaf',
  'lamed',
  'finalmem',
  'mem',
  'finalnun',
  'nun',
  'samekh',
  'ayin',
  'finalpe',
  'pe',
  'finaltsadi',
  'tsadi',
  'qof',
  'resh',
  'shin',
  'tav',
);

my %CODE_LETTERS_ORIG = (In=>[], Out=>[]);
my %CODE_LETTERS_NEW  = (In=>[], Out=>[]);

for my $i (0 .. $#HEBREW_LETTER){
  push @{$CODE_LETTERS_ORIG{In}},  ('\\\\'.   $HEBREW_LETTER[$i]);
  push @{$CODE_LETTERS_ORIG{Out}}, ('\\'.     $HEBREW_LETTER[$i]);
  push @{$CODE_LETTERS_NEW {In}},  ('\\\\heb'.$HEBREW_LETTER[$i]);
  push @{$CODE_LETTERS_NEW {Out}}, ('\\heb'.  $HEBREW_LETTER[$i]);
}

# Map encoding names to encoding tables:
my %EncodingName = (
  "8bit"    => \%CODE_8BIT,
  "224"     => \%CODE_8BIT,
  "iso8"    => \%CODE_8BIT,
  "8859-8"  => \%CODE_8BIT,
  "8859_8"  => \%CODE_8BIT,
  "cp1255"  => \%CODE_8BIT,
  "7bit"  => \%CODE_7BIT,
  "96"    => \%CODE_7BIT,
  "si960" => \%CODE_7BIT,
  "dos"    => \%CODE_DOS,
  "128"    => \%CODE_DOS,
  "cp862"  => \%CODE_DOS,
  "macros"     => \%CODE_LETTERS_NEW,
  "macros_new" => \%CODE_LETTERS_NEW,
  "macros_orig" => \%CODE_LETTERS_ORIG,
);

my $InputEnc;
my $OutputEnc;

if (!defined $EncodingName{$InputEncStr}){
  print STDERR "ERROR: undefind input encoding \"$InputEncStr\"\n";
  exit(1);
} else {
  $InputEnc=$EncodingName{$InputEncStr}{In};
}

if (!defined $EncodingName{$OutputEncStr}){
  print STDERR "ERROR: undefind output encoding \"$OutputEncStr\"\n";
  exit(1);
} else {
  $OutputEnc=$EncodingName{$OutputEncStr}{Out};
}

my $Len = scalar(@$InputEnc);

#print "@$InputEnc\n";

# Go over the input
while (<>){
  # For each line, try all possible substitutins
  for my $i (0 .. $Len - 1){
    #my $input=$$InputEnc[$i];
    #my $output=$$OutputEnc[$i];
    s/$$InputEnc[$i]/$$OutputEnc[$i]/eg;
    #print "s/$$InputEnc[$i]/$$OutputEnc[$i]/\n";
  }
  print;
}

