#!/usr/bin/perl

################################################################################
#
# File: HERest.pl
# A perl script for parellel processing of the command 'HERest' from HTK
# 
# It seems that this parellel implelmenation of the 'HERest' does NOT produce
#   the same result. According to the HTK book, it seems that we need to put
#   equal number of data files into each of the processor which is not always
#   possible. So it's still experimental.
#
# Usage:
#	HERest.pl [options] hmmList dataFiles...
#
# For details about the usage, check it by typing "HERest" without any options.
#	
# This perl script is designed to run transparently, e.g., you can run this
#   script as if you run 'HERest'.
#
# This script submits parellel jobs through the SGE (Sun Grid Engine) using
#   an SGE command 'qsub' and checks the job progress using 'qstat'
# 
# It returns when all the parellel jobs are finished
#
# Written by Bowon Lee, 02/22/2006
#
# Department of the Electrical and Computer Engineering
# University of Illinois at Urbana-Champaign
#
################################################################################

# Specify the number of processors
$NP = 16;	# Number of processors

# Specify the command to be executed
$COMMAND = "HERest";

# Check my user ID
$USERID = readpipe("whoami");

# Check for the input script file following the option '-S'
#   and output HMM model file following the option '-M'
@ARGIN = @ARGV;
foreach $n (0..$#ARGIN)  {
    $NSCP = $n+1 if($ARGIN[$n] eq "-S");
    $NMMF = $n+1 if($ARGIN[$n] eq "-M");
}
$scpi = "$ARGIN[$NSCP]";
$mmfi = "$ARGIN[$NMMF]";

# Open the input script and compute the script size for each processor
open(SCP,"$scpi") || die "Cannot open $scpi: $!";
$NLINES = 0;
foreach (<SCP>) { $NLINES += 1; }
$SCPSIZE = int($NLINES/$NP);
close(SCP);

# Create a list of divided data set
@scpn = ();
foreach $n (1..$NP)  {
    $scpn[$n-1] = "$scpi";
    $scpn[$n-1] =~ s/(.*)(\..*)/\1\_$n\2/g;
}

# Divide the data set and write them into each script file
$n = 0;
$nlines = 0;
foreach $line (<SCP>) {
    if( ($nlines == $SCPSIZE * $n) && ($n != $NP ) ) {	
	close(SCPPL);
	open(SCPPL, ">$scpn[$n]");
	$n = $n + 1;
    }
    print SCPPL "$line";
    $nlines += 1;
}
close(SCPPL);

# Create command for each processor
@commands = ();
foreach $n (1..$NP)  {
    $commands[$n-1] = "$COMMAND";
    foreach $narg (0..$#ARGIN-1) {
	unless($narg == $NSCP ) {
	    if($ARGIN[$narg] =~ m/\*/)
	    { $commands[$n-1] = "$commands[$n-1] '$ARGIN[$narg]'"; }
 	    else
	    { $commands[$n-1] = "$commands[$n-1] $ARGIN[$narg]"; }
	}
	$commands[$n-1] = "$commands[$n-1] $scpn[$n-1]" if($narg == $NSCP);
    }
    $commands[$n-1] = "$commands[$n-1] -p $n";
    $commands[$n-1] = "$commands[$n-1] $ARGIN[$#ARGIN]";
}
open(SCP,"$scpi") || die "Cannot open $scpi: $!";

# Write script for each processor and submit the job
foreach $n (0..$NP-1)  {
    $scps = "$COMMAND\_$n.sh";
    open(SGESCP,">$scps") || die "Cannot open $scps: $!";
    print SGESCP '#!/bin/bash';
    print SGESCP "\n";
    print SGESCP '#$ -S /bin/bash';
    print SGESCP "\n";
    print SGESCP '#$ -cwd';
    print SGESCP "\n";
    print SGESCP "\n";
    print SGESCP "$commands[$n]\n";
    system("qsub $scps");
}

# Wait until all the jobs are completed
$wait = 1;
while($wait) {
    sleep 30;
    print "Checking job progress: ";
    @jobs = readpipe("qstat -u $USERID");
    $nproc =  0;
    foreach $job (@jobs) {
	$nproc += 1 if ($job =~ /$COMMAND/);
    }
    if($nproc == 0) { $wait = 0; next; }
    print "$nproc jobs are still running...\n";
}
print "Done\n";

# Check any errors
print "Checking any errors: ";
@errors = readpipe("cat $COMMAND*.sh.e*");
$errorcheck = $#errors + 1;
if($errorcheck) {
    system("cat $COMMAND*.sh.e* > $COMMAND\_errors");
}		

# Merge the results
print "Merging results: ";
$command = "HERest";
foreach $narg (0..$#ARGIN-1) {
    unless($narg == $NSCP || $narg == $NSCP-1) {
	if($ARGIN[$narg] =~ m/\*/)
	{ $command = "$command '$ARGIN[$narg]'"; }
	else
	{ $command = "$command $ARGIN[$narg]"; }
    }
}
$command = "$command -p 0";
$command = "$command $ARGIN[$#ARGIN] $mmfi\/\*.acc";
system("$command");

# Clean temporary files
print "Cleaning temporary files: ";
foreach $n (0..$NP-1)  {
    system("rm -f $scpn[$n]");
    system("rm -f $COMMAND\_$n.sh*");
}		
print "Done\n";

# If error occurred, then print this message
if($errorcheck) {
    print STDERR "Error occured: Please check $COMMAND\_errors\n";
}
