/* -------------------------------------------------------------------------------- */
/*
  VApplySvms.c: Compute SVM discriminants for every frame in an input file

  Copyright 2005, Trustees of the University of Illinois

  Licensed under the Apache License, Version 2.0 (the "License").
 You may not use this file except in compliance with the License. You may obtain 
 a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
 by applicable law or agreed to in writing, software distributed under the License 
 is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
 either express or implied. See the License for the specific language governing 
 permissions and limitations under the License.

 Revision history:
   May 2005, Sarah Borys: Fixed bugs in the handling of long multi-file scripts
   August 2004, Mark Hasegawa-Johnson: First revision created

 Likely future revisions:
   PVTK should be cleaned up, made independent of HTK, merged with SpeechLib,
   and rewritten from the bottom up to be compatible with LAPACK and STL.

/* -------------------------------------------------------------------------------- */

char *vapplysvms_version = "$Id: VApplySvms.c,v 1.4 2004/08/10 04:50:14 mhasegaw Exp mhasegaw $";

#include "PVTK.h"

/* ---------------------- Global Variables, same names in every file ----------------------- */
static int  trace  = 0;         /* Trace level */
static ConfParam *configs[MAXGLOBS];
static int nParm = 0;           /* total num params */
/*FileFormat srcFF     = HTK;     /* Default source file format */  
/*FileFormat tgtFF     = HTK; */
static BufferInfo info;

/* Define a global array of SVM definitions */
static SVMDef Transform[MAXTRANS];
static DMatrix NormalizationStats[MAXTRANS];
static IntVec RelativeFrames[MAXTRANS];
static nTrans=0;                /* number of transforms */

static MemHeap mStack;          /* matrix stack, also for i/o */
static MemHeap xfStack;         /* Transform stack -- not reset in every loop*/

/* ---------------- Process Command Line ------------------------- */

static char *USAGE="\n\
USAGE: VApplySvms [opts] src tgt\n\
\n\
 Apply all SVMs specified on the command line (in consecutive -s \n\
 options).  For each SVM, compute the nonlinear discriminant corresponding\n\
 to every frame or row in the input file.  Output a vector of the \n\
 corresponding SVM discriminants.\n\
\n\
 If src is in HTK format, tgt is also in HTK format.  If src is in \n\
 svm_light format, tgt is also in svm_light format, with the label\n\
 of each row in tgt equal to the corresponding label in src.\n\
\n\
 The -t option specifies that each output row should be computed by\n\
 concatenating several frames of the input (at offsets\n\
 t1,t2,... relative to the row number of the output), and then\n\
 applying all SVMs to the resulting super-row.  Row numbers that refer\n\
 before row 1 are filled by repeating row 1; likewise for the last\n\
 row.  This is similar to the concatenation performed by VExtract.\n\
\n\
 The -g option specifies a statsfile (e.g., created using VExtract)\n\
 that will be used to normalize inputs after concatenation.  \n\
\n\
 WARNING: ORDER MATTERS. -g and -t must be specified BEFORE -s.\n\
   In this way, it is possible to specify many different -s files\n\
   with different -g and -t options; you simply need to observe\n\
   the sequence.\n\
 Example: suppose that you want to apply two SVMs to \n\
 every MFC file in the directory mydata.  Both foo.svm\n\
 and bar.svm should be applied to every three-frame sequence\n\
 in the input data. Data should be normalized, prior to applying \n\
 the SVMs, by the normalization files foo.stats and bar.stats.\n\
 The file train.toks contains training vectors that have been\n\
 correctly concatenated together, but not normalized.\n\
\n\
 (1) Use foo.traintoks and bar.traintoks to learn parameters\n\
   of the posterior PDF:\n\
\n\
   VApplySvms -T 1 -g foo.stats -s foo.svm -g bar.stats \n\
           -s bar.svm -d discrim.stats -p posterior\n\
           -h /-2:0.125:2/ -i 1 train.toks train.out\n\
   \n\
 (2) Create a script file ``mydata.scp'' with pairs\n\
      of the form ``mydata/src.MFC mydata/tgt.prob''\n\
\n\
 (3) For every mydata/src.MFC: concatenate three consecutive\n\
   frames, normalize by foo.stats, apply the SVM in foo.svm,\n\
   calculate a sigmoid posterior probability estimate (using\n\
   info in discrim.stats), and write to the first column in \n\
   mydata/tgt.prob.  Repeat same steps with bar.svm, and write\n\
   resulting probabilities to second column in mydata/tgt.prob:\n\
\n\
   VApplySvms -T 1 -t /-1:1:1/ -g foo.stats -s foo.svm -g bar.stats \n\
          -s bar.svm -e discrim.stats -i 1 -p posterior -S mydata.scp\n\
\n\
 Recommendation: always use -T 1 to get a list of files being \n\
 processed.  If you really want a detailed trace, use -T 15 or -T 31.\n\
 -T 15 or -T 31 is currently the only way to see the normalized \n\
 posterior histogram.\n\
\n\
 SVM definitions are assumed to be roughly in SVM-light v5.00 format.\n\
 The first word in the file should contain the characters svm or\n\
 SVM. A line of the form `number number:number number:number ...'  is\n\
 assumed to be a support vector; the first number is its alpha.  A\n\
 line of the form `number # remainder' is assumed to be a parameter.\n\
 Other lines are ignored.  All parameters must be specified before the\n\
 first support vector is read.  Parameter definitions with the\n\
 following remainders are read; others are ignored: 'kernel parameter\n\
 -d', 'kernel parameter -g', 'kernel parameter -s', 'kernel parameter\n\
 -r', 'highest feature index', 'number of support vectors plus 1',\n\
 'threshold b', 'kernel type'.\n\
\n\
 'kernel type': the following kernel types are understood.\n\
     0=linear      K(x,y)=x'*y\n\
     1=polynomial  K(x,y)=(s*x'*y+r)^d\n\
     2=rbf         K(x,y)=exp(-g*|x-y|^2)\n\
     3=tanh        K(x,y)=tanh(s*x'*y+r)\n\
\n\
 If -p is specified, the output file will contain probability estimates\n\
 rather than raw SVM discriminant outputs.  Probability estimates are\n\
 based on the discrimstats file, which may be computed (if -d is specified)\n\
 or read from an external file (if -e is specified).  If -d is specified,\n\
 one of the class labels in the input must be '1' or '+1'; all other classes\n\
 will be contrasted with class 1, regardless of their label.  If -e is \n\
 specified, the file 'discrimstats' must have 3*N lines for some N; the \n\
 first N are global stats, the next N are for class 1, the next N are for \n\
 class ~1 (class not-one).  The first three lines in each class must be \n\
 mu, sd, and number of tokens; remaining lines give the histograms.\n\
 Histogram entries must be integers -- they are presumed to represent\n\
 the count in each bin.  Histogram entries are normalized differently\n\
 depending on whether you ask for ``likelihood'' or ``posterior'' histogram\n\
 estimates.  If -i is specified, ``mincount'' is added to each histogram\n\
 bin after reading or writing discrimstats, but before normalizing to\n\
 create a likelihood or posterior histogram. -i 1 is recommended.\n\
 The type specified by -p can be one of:\n\
\n\
 Option        Output in col n  Output in col n+N   Estimator\n\
 -p sigmoid    p(class 1|d[n])  p(class ~1|d[n])    1/(1+exp(-a*(d[n]-b)))\n\
 -p posterior  p(class 1|d[n])  p(class ~1|d[n])    histogram\n\
 -p gaussian   p(d[n]|class 1)  p(d[n]|class ~1)    Gaussian\n\
 -p likelihood p(d[n]|class 1)  p(d[n]|class ~1)    histogram\n\
\n\
 Options          Meaning \n\
\n\
 -d discrimstats  Save stats of SVM discriminants in discrimstats\n\
 -e discrimstats  Load stats of SVM discriminants from discrimstats\n\
 -g statsfile     Normalize input features by mean and SD from statsfile\n\
 -h /th1,th2,th3/ Compute a histogram with given thresholds\n\
 -h /b:s:e/       Compute a histogram with thresholds b,b+s,b+2s,...,e\n\
 -i mincount      Smooth the histogram by adding mincount to every bin\n\
 -p type          Write probabilities, not discriminants.\n\
 -t /t1,t2,t3,t4/ Concatenate frames t+t1,t+t2,t+t3,t+t4        \n\
 -t /b:s:e/       Concatenate frames t+b,t+b+s,t+b+2*s,...,t+e\n\
 -s path          SVM definition file\n\
 -A               Print command line arguments\n\
 -R               Print RCS version information\n\
 -S f             Use script file f\n\
 -T n             Set trace level to n (meaningful: 1,3,7,15,31)\n\
";

void ReportUsage(char *fmt, char *s)
{
  printf(fmt, s);
  printf(USAGE);
  printf("\n");
  Exit(0);
}

/* SetConfParms: set conf parms relevant to this tool */
void SetConfParms(void)
{
   int i;
   Boolean b;
   char buf[MAXSTRLEN];

   nParm = GetConfig("VAPPLYSVMS", TRUE, configs, MAXGLOBS);
   if (nParm>0){
     if (GetConfInt(configs,nParm,"TRACE",&i)) trace = i;
     /*     if (GetConfStr(configs,nParm,"SOURCEFORMAT",buf)) srcFF = Str2Format(buf); */
     /* if (GetConfStr(configs,nParm,"TARGETFORMAT",buf)) tgtFF = Str2Format(buf); */
   }
}

    
/* ----------- Apply Linear and Nonlinear Transforms -------------- */

int ApplySVM(DMatrix Input, DVector Output, SVMDef *s, DMatrix stats, IntVec relfr, char* filename)
{
  int nrows, ncols, nsv=0;
  int i, j, m, n, t, sv, nContext,c;
  static DMatrix A;
  double accum;
  FILE *f;
  
  /* Construct the backup value of relfr */
  if(relfr==NULL) {
    relfr=CreateIntVec(&xfStack,1);
    relfr[1]=0;
  }

  /* Check matrix sizes */
  nrows = NumDRows(Input);
  nContext = IntVecSize(relfr);
  ncols = NumDCols(Input)*nContext;
  if(s->SV != NULL) nsv = NumDRows(s->SV);
  if(s->kerneltype != K_LINEAR) {
    if(NumDCols(s->SV) != ncols) {
      if ((f=fopen("deadfiles.log", "a"))==NULL) {
	printf("VApplySvms: Unable to open deadfiles.log\n");
        exit(-1);
      }
      fprintf(f, "VApplySvms: Unable to apply nonlinear SVM of dim %d to data of dim %d (%d*%d) to file %s\n",
	      NumDCols(s->SV),ncols,nContext,NumDCols(Input), filename);
      c = fclose(f);
      if (c == EOF) {
	printf("VApplySvms: Unable to close deadfiles.log\n");
        exit(-1);
      }
      /*Don't exit if VApplySvms can't process a file, but write to deadfiles.log instead.  Then go onto process the next file. */
      return(-1);
      /* Exit(-1); */
    }
  }
  if(s->kerneltype == K_LINEAR) {
    if(DVectorSize(s->w) != ncols) {
      fprintf(stderr, "VApplySvms: Unable to apply linear SVM of dim %d to data of dim %d (%d*%d)\n",
	      DVectorSize(s->w),ncols,nContext,NumDCols(Input));
      Exit(-1);
    }
  }
  if(DVectorSize(Output) != nrows) {
    fprintf(stderr,"VApplySvms: Output array has length %d, but input has length %d\n",DVectorSize(Output),nrows);
    Exit(-1);
  }


  if (trace & T_VERBOSE) fprintf(stderr,"ApplySVM type %d to Input (%d,%d * %d)\n",
				 s->kerneltype,NumDRows(Input),nContext, NumDCols(Input));

  /* Create the input data array, and zero the output array */
  A = CreateDMatrix(&mStack, nrows, ncols);
  ZeroDVector(Output);

  /* Concatenate frames as requested by relfr */
  for(t=1; t<=nrows; t++) {
    /* Find all of the frames to be Concatenated for this output frame */
    for(m=1; m<=nContext; m++) {
      /* Compute the frame number */
      n = t + relfr[m];
      n = (n<1)?1:((n>nrows)?nrows:n);
      /* printf("Concatenating row %d (%d+%d) to row %d at offset %d, length %d\n",
	 n,t,relfr[m],t,m*NumDCols(Input),NumDCols(Input)); */
      CopySubVectorDD(Input[n], A[t], 0, (m-1)*NumDCols(Input), NumDCols(Input));
    }
  }

  /* Normalize if so requested by stats */
  if(stats != NULL) {
    if(ncols != NumDCols(stats)) {
      fprintf(stderr,"VApplySvms: Input length %d (%d*%d) can't be normalized using stats of length %d\n",
	      ncols,nContext,NumDCols(Input),NumDCols(stats));
      ZeroDVector(Output);
      return;
    }
    for(t=1; t<=nrows; t++) for(n=1; n<=ncols; n++) {
      if(stats[2][n] > 1e-8) A[t][n] = (A[t][n]-stats[1][n])/stats[2][n];
      else A[t][n] -= stats[1][n];
    }
  }

  /* Switch based on the SVM kernel type */
  switch(s->kerneltype) {
  case K_LINEAR:  /* Linear SVM */
    for(m=1; m<=nrows; m++) for(n=1; n<=ncols; n++) Output[m] += s->w[n]*A[m][n];
    break;
  case K_POLY:  /* Polynomial Kernel */
    for(m=1; m<=nrows; m++) {
      Output[m] = 0;
      for(sv=1; sv<=nsv; sv++) {
	/* accum = s->SV[sv]'*A[m] */
	accum=s->SV[sv][1]*A[m][1]; for(n=2; n<=ncols; n++) accum += s->SV[sv][n]*A[m][n];
	/* K(s->SV[sv],A[m]) = (s*accum+r)^d */
	/* Output accumulates alpha times the kernel value */
	Output[m] += s->alpha[sv] * pow(s->s*accum + s->r, s->d);
      }
    }
    break;
  case K_RBF:  /* RBF kernel */
    for(m=1; m<=nrows; m++) {
      Output[m] = 0;
      for(sv=1; sv<=nsv; sv++) {
	/* accum = |SV[sv]-A[m]|^2 */
	accum=(s->SV[sv][1]-A[m][1])*(s->SV[sv][1]-A[m][1]);
	for(n=2; n<=ncols; n++) accum += (s->SV[sv][n]-A[m][n])*(s->SV[sv][n]-A[m][n]);
	/* K(s->SV[sv],A[m]) = exp(-(s->g)*accum) */
	/* Output accumulates alpha times the kernel value */
	Output[m] += s->alpha[sv] * exp(-(s->g)*accum);
      }
    }
    break;
  case K_SIGMOID:  /* tanh kernel */
    for(m=1; m<=nrows; m++) {
      Output[m] = 0;
      for(sv=1; sv<=nsv; sv++) {
	/* accum = s->SV[sv]'*A[m] */
	accum=s->SV[sv][1]*A[m][1]; for(n=2; n<=ncols; n++) accum += s->SV[sv][n]*A[m][n];
	/* K(SV[sv],A[m]) = tanh(s->s*accum+ s->r) */
	/* Output accumulates alpha times the kernel value */
	Output[m] += s->alpha[sv] * tanh(s->s*accum+ s->r);
      }
    }
    break;
  default:
    fprintf(stderr,"VApplySvms: Don't know how to apply a kernel of type %d\n",s->kerneltype);
  }

  /* Subtract the offset, and return */
  for(m=1; m<=nrows; m++) Output[m] -= s->b;
  return(0);
}


/******************************************************************/
/* ----------- main -------------- */

int main(int argc, char *argv[])
{
  char *s, *filename, *infile, *outfile;       /* command line option, input files, output file */
  FILE *fid;                                   /* A stream for i/o */
   DMatrix B;                                  /* Concatenated input files, and output file */
   int nfr=0, ifr;                               /* Frame numbers, relative to the current frame, to be concatenated */
   IntVec relfr;
   int nrows, iTrans=0;                        /* number of rows */
   char *arg, *arg2;                           /* Arbitrary argument string pointers */
   int m,n,i,j,k,d,t;                          /* Counters */
   int start_frame,end_frame;                  /* Start and end for the -t option */
   char *statsoutfile=NULL;                    /* File for output statistics */
   int statsblocksize=3,normblocksize=3;       /* Size of a block in the output statistics */       
   char *probability_type=NULL;                /* Probability type for output conversion */
   DVector HThresh=NULL;                       /* Threshold levels for histogram */
   SVMDef Input,Output;                        /* SVM definition struct for the input, output, and statistics */
   SVMDef PDF,Stats;                           /* SVM definition structs for normalization, and output stats */
   SVMDef InputNorm;                           /* Normalization statistics for input feature file */
   char **FileList;                            /* List of files, read from the command line */
   int NumFiles,IFile;                         /* number of files, and file counter */
   DMatrix *D;                                 /* Storage for SVM discriminants computed on all inputs */
   DVector *Alphas;                            /* Storage for the class labels read from all inputs */
   HParm HTKDATA;                              /* Data for input and output to and from HTK files */
   int *sampPeriods;                           /* Sample periods of the HTK files */
   short *HTKCodes;                            /* HTK Codes of the HTK files */
   double variance,a;                          /* variable for holding variance estimate, and normalized Gauss distance */
   int mincount=0;                             /* Minimum histogram bincount: used for backoff */
   IntVec iClass;                              /* Class index of token #m: iClass[m]==1 for class 1, ==2 for class ~1 */
   int fileOK,fileSeen=0;             /*A check to make sure the SVM's can be applied to the input*/

   if(InitShell(argc,argv,vapplysvms_version,"")<SUCCESS)
      HError(1000,"VApplySvms: InitShell failed");
   InitMem();   InitLabel();
   InitMath();  InitSigP();
   InitWave();  InitAudio();
   InitVQ();    InitModel();
   if(InitParm()<SUCCESS) HError(1000,"VApplySvms: InitParm failed");

   if (NumArgs() == 0) ReportUsage("","");
   SetConfParms();

   /* Create memory */
   CreateHeap(&mStack, "MatStack", MSTAK, 1, 0.0, 10000000, LONG_MAX);
   CreateHeap(&xfStack, "SVMStack",MSTAK, 1, 0.0, 10000000, LONG_MAX);

   /* Initialize relfr to a one-element vector */
   relfr=CreateIntVec(&mStack,1);
   relfr[1]=0;

   /* Initialize stats file to NULL */
   ZeroSVMDef(&InputNorm);
   ZeroSVMDef(&Stats);
   ZeroSVMDef(&PDF);

   /* ----------- Process Options -------------- */
   while (NextArg() == SWITCHARG) {
     s = GetSwtArg();
     if (strlen(s)!=1) HError(1019,"VApplySvms: Bad switch %s; must be single letter",s);
     switch(s[0]){
     case 'd':
       if (NextArg() != STRINGARG) ReportUsage("VExtract: Output filename expected after -d\n","");
       statsoutfile=CopyString(&xfStack,GetStrArg());
       if(trace & T_OPTS) fprintf(stderr,"VExtract: Will write statistics to %s\n",statsoutfile);
       break;
     case 'e':
       /* Read in the  offset vector and transform matrix */
       if (NextArg() != STRINGARG) ReportUsage("VApplySvms: stats filename expected after -e\n","");
       /* Read a stats normalization file */
       ReadSVMFile((filename=GetStrArg()), &xfStack, &PDF, trace);
       if(NumDRows(PDF.SV) < 2) {
	 fprintf(stderr,"VApplySvms: had trouble reading from stats file\n");
	 PDF.SV=NULL;
       }
       /* Divide PDF.SV into three presumed blocks: universal, +1, and -1 */
       if(trace & T_VERBOSE) fprintf(stderr,"Read discriminant mean and SD of length %d from %s\n",NumDCols(PDF.SV),filename);
       normblocksize=(int)round(NumDRows(PDF.SV)/3);
       break;
     case 'g':
       /* Read in the input feature statistics to use in normalization */
       if (NextArg() != STRINGARG) ReportUsage("VApplySvms: stats filename expected after -g\n","");
       /* Read a stats normalization file */
       ReadSVMFile((filename=GetStrArg()), &xfStack, &InputNorm, trace);
       if(NumDRows(InputNorm.SV) < 2) {
	 fprintf(stderr,"VApplySvms: had trouble reading from inputstats file %s\n",filename);
	 InputNorm.SV=NULL;
       }
       if(trace & T_VERBOSE) fprintf(stderr,"Read input-feature mean and SD of length %d from %s\n",
				     NumDCols(InputNorm.SV),filename);
       break;
     case 'h':
       if (NextArg() != STRINGARG) ReportUsage("VApplySvms: Vector definition string expected after -h\n","");
       HThresh = strtoDVector(&xfStack, GetStrArg());
       /* Add enough room to the stats block in order to store the histogram in each class */
       statsblocksize = 4 + DVectorSize(HThresh);
       if (trace & T_OPTS) {
	 fprintf(stderr,"Histogram will be computed with %d thresholds: [%g",DVectorSize(HThresh),HThresh[1]);
	 for(i=1;i<=DVectorSize(HThresh);i++) fprintf(stderr,",%g",HThresh[i]);
	 fprintf(stderr,"]\n");
       }
       break;
     case 'i':
       mincount = GetChkedInt(0,10000,s);
       if(trace & T_OPTS) fprintf(stderr,"VApplySvms: will add %d to each histogram bin\n",mincount);
       break;
     case 'p':    /* Convert output to a particular type of probability representation */
       if (NextArg() != STRINGARG) ReportUsage("VApplySvms: Probability type expected after 'p' option\n","");
       probability_type=CopyString(&xfStack,GetStrArg());
       if(trace & T_OPTS) 
	 fprintf(stderr,"VApplySvms: outputs will be converted to probabilities using method: %s\n",probability_type);
       break;
     case 's':  
       /* Read an SVM definition file */
       if (nTrans >= MAXTRANS) HError(1014,"VApplySvms: Max number of transforms exceeded");
       /* Read in the  offset vector and transform matrix */
       if (NextArg() != STRINGARG) ReportUsage("VApplySvms: SVM filename expected after -s\n","");
       filename = CopyString(&mStack,GetStrArg());
       ReadSVMFile(filename, &xfStack, &(Transform[nTrans]), trace);
       /* Copy the relfr vector, if available */
       RelativeFrames[nTrans] = CreateIntVec(&xfStack,IntVecSize(relfr));
       CopyIntVec(relfr,RelativeFrames[nTrans]);
       /* Copy the stats matrix, if available */
       if(InputNorm.SV != NULL) {
	 NormalizationStats[nTrans] = CreateDMatrix(&xfStack,2,NumDCols(InputNorm.SV));
	 CopyDVector(InputNorm.SV[1],NormalizationStats[nTrans][1]);
	 CopyDVector(InputNorm.SV[2],NormalizationStats[nTrans][2]);
       }
       else NormalizationStats[nTrans]=NULL;

       nTrans++;

       if(trace & T_VERBOSE) {
	 fprintf(stderr,"Read SVMDef number %d\n",nTrans);
	 PrintSVMDef(stderr, &(Transform[nTrans-1]));
	 if(RelativeFrames[nTrans-1] != NULL) {
	   fprintf(stderr,"  .. input concatenated from frames");
	   for(n=1; n<=IntVecSize(RelativeFrames[nTrans-1]); n++) fprintf(stderr, " %d",RelativeFrames[nTrans-1][n]);
	   fprintf(stderr,"\n");
	 }
	 if(NormalizationStats[nTrans-1] != NULL) {
	   fprintf(stderr,"  ..input shifted by mu[1]=%g, mu size=%d\n",
		   NormalizationStats[nTrans-1][1][1],NumDCols(NormalizationStats[nTrans-1]));
	   fprintf(stderr,"  ..input divided by sd[1]=%g, sd size=%d\n",
		   NormalizationStats[nTrans-1][2][1],NumDCols(NormalizationStats[nTrans-1]));
	 }
       }
       break;
     case 't':  /* Specify relative frame numbers that will be concatenated */
       if (NextArg() != STRINGARG) ReportUsage("VExtract: -t should be followed by a vector of offsets\n","");
       arg=CopyString(&xfStack, GetStrArg()); 
       
       /* Check for colons in the input, indicating start:step:end syntax */
       if((arg2=strchr(arg, ':')) != NULL && arg2>arg) {
	 /* Work backward through all digits and '-' signs to find beginning of the vector */
	 arg2--;
	 while(arg2 >= arg && (isdigit(*arg2) || *arg2=='-')) arg2--;
	 /* Try to read start:step:end; count the number of fields we get to read */
	 m=sscanf(arg2+1, "%d:%d:%d", &start_frame, &n, &end_frame);
	 if(m<2) HError(1,"VExtract: -t should be followed by /frame1,frame2,.../ or /startframe:skip:endframe/,not %s",arg);
	 if(m==2) { end_frame=n; n=1; }

	 /* Free the previous relfr, and create a new one, filled with the step elements */
	 FreeIntVec(&mStack,relfr);
	 nfr=floor((end_frame-start_frame)/n)+1;
	 relfr = CreateIntVec(&mStack,nfr);
	 for(i=1; i<=nfr; i++) relfr[i]=start_frame+(i-1)*n;
       }
       else {
	 /* Otherwise: check for commas */
	 nfr=1; 
	 for(arg2=arg; *arg2!='\0'; arg2++) if(*arg2==',') nfr++; 
	 /* Free the previous relfr, and create a new one, filled with the step elements */
	 FreeIntVec(&mStack,relfr);
	 relfr = CreateIntVec(&mStack,nfr);
	 /* Read the elements of the relative frame vector */
	 ifr=1;
	 for(arg2=arg; *arg2!='\0' && ifr<=nfr; ) {
	   while(isdigit(*arg2)==0 && *arg2 != '-' && *arg2 != '\0') arg2++;
	   relfr[ifr++] = atoi(arg2);
	   while((isdigit(*arg2)!=0 || *arg2 == '-') && *arg2 != '\0') arg2++;
	 }
       }
       if ((trace & T_OPTS) && nfr>=1) {
	 fprintf(stderr,"Output lines will contain %d frames: [%d",IntVecSize(relfr),relfr[1]);
	 for(ifr=2;ifr<=nfr;ifr++) fprintf(stderr,",%d",relfr[ifr]);
	 fprintf(stderr,"]\n");
       }
       break;
     case 'A':
       /* Repeat the arguments to stderr */
       for(m=0; m<=argc; m++) fprintf(stderr,"%s ",argv[m]);
       break;
     case 'R':
       fprintf(stderr,"%s\n",vapplysvms_version);
       PrintRCSIdentifier(stderr);
       Exit(0);
     case 'T':
       trace = GetChkedInt(0,255,s);
       break;
     default:
       ReportUsage("VApplySvms: Unknown switch %s",s);
     }
   }
   
   /* Initialize the output Stats arrays, if they are needed */
   /* Stats are needed (1) if 'f' specified, or (2) if probability_type != NULL and 'g' not specified */
   if((probability_type != NULL && PDF.SV==NULL) || statsoutfile != NULL) {
     ZeroDMatrix((Stats.SV = CreateDMatrix(&xfStack,3*statsblocksize,nTrans)));
     if(trace & T_VERBOSE) fprintf(stderr,"Created Stats.SV, %d, %d\n",NumDRows(Stats.SV),NumDCols(Stats.SV));
     Stats.alpha = CreateDVector(&xfStack,3*statsblocksize);
     /* Label the stats rows.  mu,SD, and N are labeled with class label.  Histogram labeled with threshold */
     for(i=0; i<=2; i++) {
       for(m=1; m<=3; m++) Stats.alpha[i*statsblocksize+m] = (i<2)?i:-1;
       if(HThresh!=NULL) {
	 for(m=1; m<=DVectorSize(HThresh); m++) Stats.alpha[i*statsblocksize+3+m] = HThresh[m]; 
       }
     }
   }

   /* Read all of the filenames from the command line */
   if (NumArgs() <= 1) ReportUsage("VApplySvms: Source file expected","");
   FileList = (char **)malloc(MAX_FILES * sizeof(char *));
   NumFiles=0;
   while(NumArgs() > 0) {
     FileList[NumFiles++] = CopyString(&xfStack,GetStrArg());
     printf("Read %d'th file from cmdline, %s\n",NumFiles,FileList[NumFiles-1]);
   }

   if(NumFiles<=0) ReportUsage("VApplySvms:  No input files specified!!\n","");

   /**************************************************************************/
   /* ----------- Read each input file, apply SVMs, and accumulate statistics ---------*/
   D=(DMatrix *)calloc(1,sizeof(DMatrix));
   Alphas=(DVector *)calloc(1,sizeof(DVector));
   sampPeriods=(int *)calloc(1,sizeof(int));
   HTKCodes=(short *)calloc(1,sizeof(short));

recover:
   if (fileSeen == 0) { IFile = 0; }
   else { fileSeen = 0; fileOK = 0; IFile+=2; }
   while (IFile < NumFiles-1) {
     /* Before reading next infile&outfile: reset I/O stack, but NOT xfStack */
     ResetHeap(&mStack);
     /* First, try reading it as an svmlight file */
     infile=FileList[IFile];
     
     if(ReadSVMFile(infile, &mStack, &Input, trace) == NULL) {
       /* If ReadSVMFile returned NULL, try to read it as an HTK file */
       ReadHParm(&mStack,infile, &HTKDATA, trace);
       HParm2SVMDef(&mStack, &HTKDATA, &Input, &(sampPeriods[0]), &(HTKCodes[0]));
       if(Input.SV != NULL) Input.kerneltype = K_HTK;    /* A code to mark HTK input */

       /* If input is from an HTK file, the only way to apply output probability map is with -e option specified */
       if(PDF.SV == NULL && probability_type != NULL) {
	 fprintf(stderr,"VExtract: %s probabilities can only be applied to HTK files if -e option used\n",
		 probability_type);
	 fprintf(stderr,"          Outputs will not be converted to probabilities\n");
	 probability_type=NULL;
       }
     }

     printf("Reading %s\n",FileList[IFile]);

     /* If reading was successful, apply the SVMs to the input, and write to output  */
     if(Input.SV==NULL) fprintf(stderr,"VApplySvms: %s not readable as HTK or SVMTOKS file\n",infile);
     else {
       if(trace & T_VERBOSE ) fprintf(stderr,"Read input (%d,%d) from %s\n",
				      NumDRows(Input.SV),NumDCols(Input.SV),infile);

       /* Create the output matrix --- one row for each SVM output, one column per frame */
       D[0] = CreateDMatrix(&mStack, nTrans, NumDRows(Input.SV));
       Alphas[0] = CreateDVector(&mStack, DVectorSize(Input.alpha));
       CopyDVector(Input.alpha, Alphas[0]);

       /* Read the class labels --- class "1" is first, all others are class ~1 */
       iClass = CreateIntVec(&mStack,DVectorSize(Input.alpha));         /* iClass stores class indices for each frame */
       for(m=1; m<=DVectorSize(Input.alpha); m++) {
	 iClass[m] = ((int)Input.alpha[m]==1) ? 1 : 2;
       }

       /* ----------- Apply the Transforms -------------- */
       for(iTrans=0; iTrans<nTrans; iTrans++) {
	 if(trace & T_VERBOSE) 
	   fprintf(stderr,"Applying SVM number %d (type %d, size %d,%d) to file %d\n",iTrans,
		   Transform[iTrans].kerneltype,NumDRows(Transform[iTrans].SV),NumDCols(Transform[iTrans].SV),IFile);
	fileOK = ApplySVM(Input.SV, D[0][iTrans+1], &(Transform[iTrans]),NormalizationStats[iTrans], RelativeFrames[iTrans], FileList[IFile]);
        if (fileOK == -1) { 
           fprintf(stderr, "Skipping %d'th file %s\n", IFile, FileList[IFile]);
           fileSeen=1; goto recover; 
        }
 
	 /* -------- Accumulate sum, sum-square, and N ----------- */
	 if(Stats.SV != NULL) {
	   for(n=1; n<=NumDCols(D[0]); n++) {
	     for(i=0; i<=iClass[n]; i += iClass[n]) {            /* Increment stats for iClass, and universal class (0) */
	       Stats.SV[i*statsblocksize+1][iTrans+1] += D[0][iTrans+1][n];                       /* Sum */
	       Stats.SV[i*statsblocksize+2][iTrans+1] += D[0][iTrans+1][n]*D[0][iTrans+1][n]; /* Sum of Squares */
	       Stats.SV[i*statsblocksize+3][iTrans+1]++;                                              /* Num Tokens */
	       if(HThresh!=NULL) {                                                                    /* Histogram count */
		 for(k=1; k<=DVectorSize(HThresh) && D[0][iTrans+1][n]>HThresh[k]; k++);
		 Stats.SV[i*statsblocksize+3+k][iTrans+1]++;
	       } 
	     }
	   }
	   if(trace & T_VERBOSE && iTrans==0) {
	     fprintf(stderr,"Computing stats for class  1, SVM 1: SUM=%g, SSQ=%g, N=%g\n",
		     Stats.SV[statsblocksize+1][1],Stats.SV[statsblocksize+2][1],Stats.SV[statsblocksize+3][1]);
	     fprintf(stderr,"Computing stats for class ~1, SVM 1: SUM=%g, SSQ=%g, N=%g\n",
		     Stats.SV[2*statsblocksize+1][1],Stats.SV[2*statsblocksize+2][1],Stats.SV[2*statsblocksize+3][1]);
	   }
	 }
       }
     }

   /**************************************************************************/
   /* ----------- Compute mean and variance in stats, and save --------------*/
   if(Stats.SV!=NULL) {
     /* Convert the accumulators into mean and standard deviation */
     for(i=0; i<=2; i++) for(d=1; d<=NumDCols(Stats.SV); d++) {
       /* If there are at least two tokens, find the mean and SD, o.w. SD=0 and mean=sum */
       if(Stats.SV[i*statsblocksize+3][d] >= 2) {
	 t=i*statsblocksize;
	 /* The H-J fast variance formula: Var = (SSQ-Sum*Sum/N) / (N-1) */
	 Stats.SV[t+2][d]= (Stats.SV[t+2][d] - Stats.SV[t+1][d]*Stats.SV[t+1][d]/Stats.SV[t+3][d])/(Stats.SV[t+3][d]-1);
	 Stats.SV[t+1][d] /= Stats.SV[t+3][d];                                 /* Mean = Sum/N */
	 if(Stats.SV[t+2][d] > 0) Stats.SV[t+2][d] = sqrt(Stats.SV[t+2][d]);   /* SD=sqrt(Var) usually... */
	 else Stats.SV[t+2][d] = 0;                                            /* If numerical errors set Var<0, then SD=0 */ 
       }
       else Stats.SV[t+2][d] = 0;                                              /* If N<2, SD=0 */
     }
     if(trace & T_VERBOSE)
       fprintf(stderr,"VApplySvms stats[:,1]: Universal:%g+/-%g(%g). Class 1:%g+/-%g(%g).  Class ~1:%g+/-%g(%g)\n",
	       Stats.SV[1][1],Stats.SV[2][1],Stats.SV[3][1],
	       Stats.SV[statsblocksize+1][1],Stats.SV[statsblocksize+2][1],Stats.SV[statsblocksize+3][1],
	       Stats.SV[2*statsblocksize+1][1],Stats.SV[2*statsblocksize+2][1],Stats.SV[2*statsblocksize+3][1]);
     
     if(statsoutfile != NULL) {
       /* Print out the statistics file */
       if((fid=fopen(statsoutfile,"w"))==NULL) fprintf(stderr,"VApplySvms: Unable to write to %s\n",outfile);
       else {
	 if(trace & T_VERBOSE) fprintf(stderr,"Writing discriminant stats size (%d,%d) to %s\n",
				       NumDRows(Stats.SV),NumDCols(Stats.SV),statsoutfile);
	 SaveSVMVectors(fid,&Stats);
	 fclose(fid);
       }
     }
     /* If normalization not previously defined, copy Stats to PDF */
     if(PDF.SV == NULL && probability_type != NULL) {
       if(trace & T_VERBOSE) 
	 fprintf(stderr,"No -e option specified; prob. estimation will use computed stats, (%dx%d)\n",
		 NumDRows(Stats.SV), NumDCols(Stats.SV));
       CopySVMDef(&PDF, &Stats, &xfStack);
       normblocksize=statsblocksize;
     }
     
   }
   /* ----------- Compute parameters for output probability map in PDF--------*/
   if(probability_type != NULL && PDF.SV!=NULL) {
     normblocksize=(int)NumDRows(PDF.SV)/3;
     for(m=1; m<=NumDCols(PDF.SV); m++) {
       switch(probability_type[0]) {
       case 's':                         /* Sigmoid conversion: alpha and beta */
	 variance=(PDF.SV[normblocksize+2][m] * PDF.SV[normblocksize+2][m] * (PDF.SV[normblocksize+3][m]-1) + 
		   PDF.SV[2*normblocksize+2][m] * PDF.SV[2*normblocksize+2][m] * (PDF.SV[2*normblocksize+3][m]-1));
	 n=(int) (PDF.SV[3][m]-2);
	 if(variance > n*VARIANCE_FLOOR && n>0) {
	   variance /= n;
	   if(trace & T_VERBOSE) fprintf(stderr,"Variance estimate for the sigmoid: ((%g)^2*%g+(%g)^2*%g)/%d=%g\n",
					 PDF.SV[normblocksize+2][m],PDF.SV[normblocksize+3][m]-1,
					 PDF.SV[2*normblocksize+2][m],PDF.SV[2*normblocksize+3][m]-1,
					 n,variance);
	 }
	 else if(PDF.SV[normblocksize+2][m]>STDEV_FLOOR) variance=PDF.SV[normblocksize+2][m]*PDF.SV[normblocksize+2][m];
	 else if(PDF.SV[2*normblocksize+2][m]>STDEV_FLOOR) 
	   variance=PDF.SV[2*normblocksize+2][m]*PDF.SV[2*normblocksize+2][m];
	 else variance=VARIANCE_FLOOR;
	 PDF.SV[2][m]= (PDF.SV[normblocksize+1][m] - PDF.SV[2*normblocksize+1][m])/variance; /* alpha = (mu1-mu2)/var */
	 PDF.SV[1][m]=0.5*(PDF.SV[normblocksize+1][m] + PDF.SV[2*normblocksize+1][m]);       /* beta = 0.5*(mu1+mu2) */
	 if(trace & T_VERBOSE && m==1) {
	   fprintf(stderr,"Sigmoid parameters, column 1: alpha=(%g-(%g))/%g=%g, beta=0.5*(%g+(%g))=%g\n",
		   PDF.SV[normblocksize+1][m],PDF.SV[2*normblocksize+1][m],variance,PDF.SV[2][m],
		   PDF.SV[normblocksize+1][m],PDF.SV[2*normblocksize+1][m],PDF.SV[1][m]);
	 }
	 break;
       case 'g':
	 /* Gaussian: no further computations necessary */
	 break;
       case 'p':                         /* Posterior histogram conversion: N1(bin)/(N1(bin)+N2(bin)) */
	 printf("Computing posterior histogram\n");
	 for(k=4; k<=normblocksize; k++) {
	   /* Posterior histogram.  Add mincount to every bin before normalizing.  If no training points, p=0.5 */
	   if(PDF.SV[k][m]>0) PDF.SV[k][m] = (PDF.SV[normblocksize+k][m] + mincount)/(PDF.SV[k][m] + 2*mincount); 
	   else PDF.SV[k][m] = 0.5;
	 }
	 if(trace & T_VERBOSE && m==1) {
	   fprintf(stderr,"Posterior PMF: ");
	   for(k=4; k<=normblocksize-1; k++) fprintf(stderr,"%g (x<%g) ",PDF.SV[k][m],PDF.alpha[k]);
	   fprintf(stderr,"%g\n",PDF.SV[normblocksize][m]);
	 }
	 break;
       case 'l':                         /* Likelihood histogram conversion: N1(bin)/N1(total) */
	 printf("Computing likelihood histogram\n");
	 /* Likelihood histogram.  Add mincount to every bin before normalizing.  */
	 /* If no training points, use uniform distribution */
	 for(k=4; k<=normblocksize; k++) {
	   if(PDF.SV[normblocksize+3][m] > 0) 
	     PDF.SV[normblocksize+k][m] = ( PDF.SV[normblocksize+k][m] + mincount ) /
	       ( PDF.SV[normblocksize+3][m] + mincount*(normblocksize-3) );
	   else PDF.SV[normblocksize+k][m] = 1/((double)normblocksize-3); 
	 }
	 for(k=4; k<=normblocksize; k++) {
	   if(PDF.SV[2*normblocksize+3][m] > 0) 
	     PDF.SV[2*normblocksize+k][m] = ( PDF.SV[2*normblocksize+k][m] + mincount ) /
	       ( PDF.SV[2*normblocksize+3][m] + mincount*(normblocksize-3) );
	   else PDF.SV[2*normblocksize+k][m] = 1/((double)normblocksize-3);
	 }
	 if(trace & T_VERBOSE && m==1) {
	   fprintf(stderr,"Likelihood histogram, class  1: ");
	   for(k=4; k<=normblocksize-1; k++) fprintf(stderr,"%g (x<%g) ",PDF.SV[normblocksize+k][m],PDF.alpha[k]);
	   fprintf(stderr,"%g\n",PDF.SV[2*normblocksize][m]);
	   fprintf(stderr,"Likelihood histogram, class ~1: ");
	   for(k=4; k<=normblocksize-1; k++) fprintf(stderr,"%g (x<%g) ",PDF.SV[2*normblocksize+k][m],PDF.alpha[k]);
	   fprintf(stderr,"%g\n",PDF.SV[3*normblocksize][m]);
	 }
	 break;
       }
     }
   }

   /**************************************************************************/
   /* ----------- Transform each discriminant to a probability if so requested, then output */
     if(D[0]!=NULL) {
       printf("Computing output probability for file %d: %s\n",IFile,FileList[IFile]);
       ResetHeap(&mStack);   
       ZeroSVMDef(&Output);

       /* If no conversion specified, then Output is just D transposed */
       if(probability_type==NULL) {
	 Output.SV=CreateDMatrix(&mStack,NumDCols(D[0]),NumDRows(D[0]));
         TransposeDMatrix(D[0],Output.SV);
       }
       /* If a conversion is specified but no PDF available, warn the user, and output raw discriminant */
       else if(PDF.SV==NULL) {
	 /* Warn the user during processing of the first file (IFile==0) */
	 if(IFile==0) fprintf(stderr,"VApplySvms: No -f or -g given, so probability mapping %s can't be implemented\n",
			      probability_type);
	 Output.SV=CreateDMatrix(&mStack,NumDCols(D[0]),NumDRows(D[0]));
	 TransposeDMatrix(D[0],Output.SV);
       }
       /* If no histogram available for 'p' or 'l' probability type, output the discriminant */
       else if((probability_type[0] == 'p'||probability_type[0]=='l') && normblocksize <= 3) {
	 /* Warn the user during processing of the first file (IFile==0) */
	 if(IFile==0) fprintf(stderr,"VApplySvms: no histogram given, so probability mapping %s can't be implemented\n",
			      probability_type);
	 Output.SV=CreateDMatrix(&mStack,NumDCols(D[0]),NumDRows(D[0]));
	 TransposeDMatrix(D[0],Output.SV);
       }
       /* Otherwise, conversion depends on probability type */
       else {
	 Output.SV=CreateDMatrix(&mStack,NumDCols(D[0]),2*NumDRows(D[0]));
	 switch(probability_type[0]) {
	 case 's':                                      /* Sigmoid: y=1/(1+exp(-a*(x-b))) */
	   for(m=1; m<=NumDRows(D[0]); m++) for(n=1; n<=NumDCols(D[0]); n++) {
	     Output.SV[n][NumDRows(D[0])+m] = 1 - 1/(1+exp(-PDF.SV[2][m]*(D[0][m][n]-PDF.SV[1][m])));	/* p(-1|D) */
	     Output.SV[n][m] = 1/(1+exp(-PDF.SV[2][m]*(D[0][m][n]-PDF.SV[1][m])));                        /* p(+1|D) */
	   }
	   break;
	 case 'g':                                      /* Gaussian: y=exp(-0.5*((x-mu)/sigma)^2)/(sigma*sqrt(2pi)) */
	   for(m=1; m<=NumDRows(D[0]); m++) for(n=1; n<=NumDCols(D[0]); n++) {
	     d=2*normblocksize+m;                       /* p(D|-1) */
	     a=(D[IFile][m][n]-PDF.SV[1][d])/PDF.SV[2][d];
	     Output.SV[n][NumDRows(D[0])+m] = exp(-0.5*a*a)/(2.506628275*PDF.SV[2][d]);
	     d=normblocksize+m;                         /* p(D|+1) */
	     a=(D[0][m][n]-PDF.SV[1][d])/PDF.SV[2][d];
	     Output.SV[n][m] = exp(-0.5*a*a)/(2.506628275*PDF.SV[2][d]);
	   }
	   break;	   
	 case 'p':                                      /* Posterior histogram */
	   for(m=1; m<=NumDRows(D[0]); m++) for(n=1; n<=NumDCols(D[0]); n++) {
	     for(k=4; k < normblocksize-1 && D[0][m][n] > PDF.alpha[k]; k++); 
	     Output.SV[n][NumDRows(D[0])+m] = 1-PDF.SV[k][m];                                       	/* p(-1|D) */
	     Output.SV[n][m] = PDF.SV[k][m];                                                               	/* p(+1|D) */
	   }
	   break;
	 case 'l':                                      /* Likelihood histogram */
	   for(m=1; m<=NumDRows(D[0]); m++) for(n=1; n<=NumDCols(D[0]); n++) {
	     for(k=normblocksize+4; k < 2*normblocksize-1 && D[0][m][n] > PDF.alpha[k]; k++); 
	     Output.SV[n][m] = PDF.SV[k][m];                                                                /* p(D|+1) */
	     for(k=2*normblocksize+4; k < 3*normblocksize-1 && D[0][m][n] > PDF.alpha[k]; k++); 
	     Output.SV[n][NumDRows(D[0])+m] = PDF.SV[k][m];                                             /* p(D|-1) */
	   }
	   break;
	 default:                                       /* Unknown probability mapping type: output the discriminant */
	   /* Warn the user during processing of the first file (IFile==0) */
	   if(IFile==0) fprintf(stderr,"VApplySvms: Probability mapping %s is unknown, and can't be implemented\n",
				probability_type);
	   Output.SV=CreateDMatrix(&mStack,NumDCols(D[0]),NumDRows(D[0]));
	   TransposeDMatrix(D[0],Output.SV);
	 }
       }
  
       /* ----------- Save the Matrix of Discriminants to Output -------------- */
       outfile=FileList[IFile+1];
       if(Input.kerneltype==K_HTK) {
	 printf("Saving in HTK format to %s\n",outfile);
         SVMDef2HParm(&mStack, &Output, &HTKDATA, &(sampPeriods[0]), &(HTKCodes[0]));
	 /* Change the HTK Code to be type USER, compressed */
	 HTKDATA.HTKCode = 9+1024;
         WriteHParm(&mStack, outfile, &HTKDATA, trace);
       }
       else {
	 printf("Saving in SVM format to %s\n",outfile);
	 Output.alpha=CreateDVector(&mStack,DVectorSize(Alphas[0]));
	 CopyDVector(Alphas[0],Output.alpha);
	 if((fid=fopen(outfile,"w"))==NULL) {
	   fprintf(stderr,"VApplySvms: Unable to write to %s\n",outfile);
	   perror("VApplySvms");
	 }
	 else {
	   printf("Trying to save Output to %s\n",outfile);
	   SaveSVMVectors(fid,&Output);
	   fclose(fid);
	 }
       }

       if(trace & T_TOP) fprintf(stderr,"%s -> %s\n",infile, outfile);
     }

     /* Print statistics */
     if(trace & T_MEM) PrintAllHeapStats();
     IFile+=2;
   }
   Exit(0);
}