#!/usr/bin/perl

################################################################################
#
# File: HCopy.pl
# A perl script for parellel processing of the command 'HCopy' from HTK
# 
# Usage:
#	HCopy.pl [options] src [ + src ...] tgt ...
#
# For details about the usage, check it by typing "HCopy" without any options.
#	
# This perl script is designed to run transparently, e.g., you can run this
#   script as if you run 'HCopy'.
#
# This script submits parellel jobs through the SGE (Sun Grid Engine) using
#   an SGE command 'qsub' and checks the job progress using 'qstat'.
# 
# It returns when all the parellel jobs are finished.
#
# If the script size is not large enough, then this script submits a single job
# In this case, this script does not check for job completion in order to send
#   multiple jobs to the cluster. So we need to check the job progress from
#   the main routine.
#
# Written by Bowon Lee, 02/22/2006
#
# Department of the Electrical and Computer Engineering
# University of Illinois at Urbana-Champaign
#
################################################################################

# Specify the number of processors
$NP = 32;	# Number of processors

# Specify the command to be executed
$COMMAND = "HCopy";

# Check my user ID
$USERID = readpipe("whoami");

# Check for the input script file following the option '-S'
@ARGIN = @ARGV;
foreach $n (0..$#ARGIN)  {
	$NSCP = $n+1 if($ARGIN[$n] eq "-S");
}
$scpi = "$ARGIN[$NSCP]";

# Open the input script and compute the script size for each processor
open(SCP,"$scpi") || die "Cannot open $scpi: $!";
$NLINES = 0;
foreach (<SCP>) { $NLINES += 1; }
$SCPSIZE = int($NLINES/$NP);
close(SCP);

# When the script size for each processor is larger than 32,
if($SCPSIZE >= 32) {

    # Create a list of divided data
    @scpn = ();
    foreach $n (0..$NP-1)  {
	$scpn[$n] = "$scpi";
	$scpn[$n] =~ s/(.*)(\..*)/\1\_$n\2/g;
	$mlfn[$n] =~ s/(.*)(\..*)/\1\_$n\2/g;
    }

    # Divide the data and write them into each script file
    open(SCP,"$scpi") || die "Cannot open $scpi: $!";
    $n = 0;
    $nlines = 0;
    foreach $line (<SCP>) {
	if( ($nlines == $SCPSIZE * $n) && ($n != $NP ) ) {	
	    close(SCPPL);
	    open(SCPPL, ">$scpn[$n]");
	    $n = $n + 1;
	}
	print SCPPL "$line";
	$nlines += 1;
    }
    close(SCPPL);

    # Create command for each processor
    @commands = ();
    foreach $n (0..$NP-1)  {
	$commands[$n] = "$COMMAND";
	foreach $narg (0..$#ARGIN) {
	    unless($narg == $NSCP) {
		if($ARGIN[$narg] =~ m/\*/)
		{ $commands[$n] = "$commands[$n] '$ARGIN[$narg]'"; }
		else
		{ $commands[$n] = "$commands[$n] $ARGIN[$narg]"; }
	    }
	$commands[$n] = "$commands[$n] $scpn[$n]" if($narg == $NSCP);
        }
    }

    # Write script for each processor
    foreach $n (0..$NP-1)  {
	$scps = "$COMMAND\_$n.sh";
	open(SGESCP,">$scps") || die "Cannot open $scps: $!";
	print SGESCP '#!/bin/bash';
	print SGESCP "\n";
	print SGESCP '#$ -S /bin/bash';
	print SGESCP "\n";
	print SGESCP '#$ -cwd';
	print SGESCP "\n";
	print SGESCP "\n";
	print SGESCP "$commands[$n]\n";
	system("qsub $scps");
    }

    # Wait until all the jobs are completed
    $wait = 1;
    while($wait) {
	sleep 30;
	print "Checking job progress: ";
	@jobs = readpipe("qstat -u $USERID");
	$nproc =  0;
	foreach $job (@jobs) {
    	    $nproc += 1 if ($job =~ /$COMMAND/);
	}
	if($nproc == 0) { $wait = 0; next; }
	print "$nproc jobs are still running...\n";
    }
    print "Done\n";

    # Check any errors
    print "Checking any errors: ";
    @errors = readpipe("cat $COMMAND*.sh.e*");
    $errorcheck = $#errors + 1;
    if($errorcheck) {
	system("cat $COMMAND*.sh.e* > $COMMAND\_errors");
    }		

    # Cleaning temporary files
    print "Cleaning temporary files: ";
    foreach $n (0..$NP-1)  {
	system("rm -f $scpn[$n]");
	system("rm -f $mlfn[$n]");
	system("rm -f $COMMAND\_$n.sh*");
    }		
    print "Done\n";

    # If error occurred, then print this message
    if($errorcheck) {
	print STDERR "Error occured: Please check $COMMAND\_errors\n";
	die;
    }
}  # if($SCPSIZE >= 32)  {

# When the script size is not large enough, then submit a single job
else  {
	print "Script size is too small for parellel processing: ";
	print "Sending a single job...\n";
	$command = "$COMMAND";
	foreach $n (0..$#ARGIN) { $command = "$command $ARGIN[$n]"; }
	$scp = "$COMMAND_single.sh";
	open(SGESCP,">$scp") || die "Cannot open $scp: $!";
	print SGESCP '#!/bin/bash';
	print SGESCP "\n";
	print SGESCP '#$ -S /bin/bash';
	print SGESCP "\n";
	print SGESCP '#$ -cwd';
	print SGESCP "\n";
	print SGESCP "\n";
	print SGESCP "$command\n";
	close(SGESCP);
	system("qsub $scp");
	system("rm -f $scp*");
}
