#! /bin/ksh

#################  preprocesseur pour synthetiseur dutoit
#  ignore ligne vide 
#  autorise commentaire //   ou /*  ou ;
#  transcode code sampa   -> code mbrola v1.02
#  sort format fichier "pho" de dutoit
#######################################################


commande=${0##*/}

#########
usage()
{
  {
     print "usage: $commande [-trace|-v] [fichier_sampa|-]   >" 
     print "       $commande [-trace|-v] <   >" 
     print "    transcode le fichier_sampa ou l'entree standard"
     print '    code de retour ($?) == 2 si code(s) inconnu(s)' 
     print '    -trace ou -v : ecrit sur stderr nbre de recodage effectues'
     print '                   et nbre de codes inconnus' 
  } > /dev/stderr
}

use()
{
  {
     print "usage: $commande [-trace|-v] [sampa_file|-]   >" 
     print "       $commande [-trace|-v] <   >" 
     print "    transcodes the sampa file or the standard input"
     print '    exit code  ($?) == 2 if unknown code(s) ' 
     print '    -trace or -v : writes on stderr the # of translated codes'
     print '                   and the  # of unknown codes ' 
  } > /dev/stderr
}



########################### maitre ###################

sampa=1
trace=0
fic_sampa=''

while test "X$1" != "X"
do
case $1 in
-ignore|-nosampa|-nonsampa)  sampa=0 ;shift ;;
-v*|-trace)  trace=1 ;shift ;;

-usage|-aide) usage ; exit 0 ;;
-help|-use) use ; exit 0 ;;

*) fic_sampa=$1 ; shift ;;
esac
done

nawk  ' 
BEGIN {
sampa="'$sampa'"
trace="'$trace'"
nb_recode=0
nb_nonrecode=0

commentaire1="^//"
commentaire2="^/\\*"
commentaire3="^;"

if (sampa != 0) 
{
# sampa    en    mpbrsola
mbpr["i"] = "i"
mbpr["e"] = "e1"
mbpr["E"] = "e2"
mbpr["a"] = "a"
mbpr["A"] = "a"
mbpr["O"] = "o"
mbpr["o"] = "au"
mbpr["u"] = "ou"
mbpr["y"] = "u"
mbpr["2"] = "eu"
mbpr["9"] = "e3"
mbpr["@"] = "e"
mbpr["e~"] = "in"
mbpr["a~"] = "an"
mbpr["o~"] = "on"
mbpr["9~"] = "un"
mbpr["j"] = "y"
mbpr["w"] = "w"
mbpr["H"] = "u1"
mbpr["p"] = "p"
mbpr["t"] = "t"
mbpr["k"] = "k"
mbpr["b"] = "b"
mbpr["d"] = "d"
mbpr["g"] = "g"
mbpr["f"] = "f"
mbpr["s"] = "s"
mbpr["S"] = "ch"
mbpr["v"] = "v"
mbpr["z"] = "z"
mbpr["Z"] = "j"
mbpr["l"] = "l"
mbpr["R"] = "r"
mbpr["m"] = "m"
mbpr["n"] = "n"
mbpr["N"] = "ng"

mbpr["J"] = "vu"
#  voir ci dessous le cas special du "J" , inexistant chez dutoit 

################ les pauses
mbpr["#"] = "_"

################ specialites locales

mbpr["k!"] = mbpr["k"]
mbpr["p!"] = mbpr["p"]
mbpr["t!"] = mbpr["t"]
mbpr["d!"] = mbpr["d"]
mbpr["n!"] = mbpr["n"]
mbpr["@!"] = mbpr["@"]
mbpr["Pause"] = mbpr["#"]
mbpr["pause"] = mbpr["#"]
mbpr["fin"] = mbpr["#"]
mbpr["Coupdeglotte"] = mbpr["#"]
mbpr["coupdeglotte"] = mbpr["#"]

}


else
{
# truc inexistant pour non sampa
   mbpr["XXXXXX_____XXXXXX"] = 0
}


####### proportion  en % du n pour le J 
n_poids=50

}
########### fin du BEGIN


#ligne vide et commentaires recodes en ";" pour conserver la
#numerotation des lignes
NF == 0		{  print ";" 
					next 
				}

$1 ~ commentaire1 || $1 ~ commentaire2  || $1 ~ commentaire3 {
					print ";"
					next
				}

### codage du J

$1 == "J" && $1 in mbpr {
# eclate en 2 avec n et y  selon leurs poids respectifs
# Les eventuelles cibles f0 sont replacees au bon endroit !!

				duree = $2
				duree_n= int(duree*n_poids/100 +0.5)
				duree_y= duree - duree_n
            for (i = 3 ; i <= NF ; i += 2)
            {
               if ($i <= n_poids )
               {
                  t_reel = $i/100 * duree
                  liste_n = liste_n int( t_reel/duree_n *100 +0.5) " "
(i+1) "  "
               }
               else
               {
                  t_reel = $i/100 *duree - duree_n
                  liste_y = liste_y int( t_reel/duree_y *100 +0.5) " "
(i+1) "  "
               }
            }

            print "n" , duree_n, liste_n
            print "y" , duree_y, liste_y
            nb_recode++
            next
			}			


### transcodage general si sampa: alors $1 existe ds la table associative mbpr

$1 in mbpr	{
		    		$1 =  mbpr[$1]
		    		print $0	
		    		nb_recode++
			 		next
				}

# le reste est sorti tel quel			
			{
			if (trace >0)
			{
				print "ligne", NR ":" ,$0     > "/dev/stderr"
				print "non recodage de:" ,$1  > "/dev/stderr"
			}
			
			nb_nonrecode++
			print $0
			}


END {
if (trace >0)
	print "phonemes total:", (nb_recode+nb_nonrecode), "
ecodes:",nb_recode,"   non_recodes:",nb_nonrecode  >  "/dev/stderr"

if (nb_nonrecode >0)
   exit 2
else
   exit 0 
}
