/* ---------------------------------------------------------------------------- */ /* VTransform.c: Linear and Nonlinear Transform of Long-Term Parameters Copyright 2005, Trustees of the University of Illinois Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Revision history: August 2004, Mark Hasegawa-Johnson: First revision created Likely future revisions: PVTK should be cleaned up, made independent of HTK, merged with SpeechLib, and rewritten from the bottom up to be compatible with LAPACK and STL. /* ---------------------------------------------------------------------------- */ char *vtransform_version = "$Id: VTransform.c,v 1.5 2004/08/11 22:06:23 mhasegaw Exp mhasegaw $"; #include "PVTK.h" /* ---------------------- Global Variables ----------------------- */ static int trace = 0; /* Trace level */ /* The Transform structure. Just one such array, so no typedef needed */ static struct { char type; /* transform type: one of 'cdmn' */ DMatrix m; /* transform matrix */ DVector v; /* offset vector */ char *s; /* nonlinear function name */ int i; /* index of first input for this transform */ int j; /* index of second input for this transform */ } Transform[MAXTRANS]; static nTrans=0; /* number of transforms */ static MemHeap mStack; /* matrix stack */ static MemHeap xfStack; /* Transform stack -- not reset in every loop*/ /* ---------------- Process Command Line ------------------------- */ static char *USAGE="\n\ USAGE: VTransform [opts] src1 [-h/v src2 [...]] tgt\n\ \n\ Option order: standard HTK options are processed first,\n\ then file concatenation options (-h and -v),\n\ then transform options (-c, -d, -m and -n).\n\ Transform options are processed in command-line order.\n\ Output of the file concatenation options is called stage_0;\n\ each consecutive transform option defines a new stage.\n\ \n\ Example: to concatenate multiple input files, multiply inputs by the\n\ matrix in mat.txt, then apply a sigmoid transform, type\n\ \n\ VTransform -A -T 1 -m mat.txt -n sigmoid -S script.scp\n\ ... where script.scp contains lines of the form\n\ src1.htk -h src2.htk -h src3.htk -h src4.htk tgt.htk\n\ \n\ Options between pairs of source filenames \n\ \n\ -h src_n Horizontal concatenation \n\ INPUT=[INPUT, src_n]\n\ -v src_n Vertical concatenation \n\ INPUT=[INPUT; src_n]\n\ \n\ Options that precede all source filenames Default\n\ \n\ -c j [i] Next stage concatenates\n\ j consecutive frames from stage_i. 1 [previous]\n\ -d j [i] Next stage is created by discarding\n\ j-1 of every j frames from stage_i. 1 [previous]\n\ -e n Error-Exit if two horizontally Inf\n\ concatenated source files differ in \n\ frame count by more than n frames,\n\ or if two vertically concatenated \n\ source files differ in feature \n\ dimension more than n dimensions\n\ -m file [i] Next stage is linear transform of\n\ i'th stage (default: previous stage)\n\ Format of the matrix file: \n\ DIMS;OFFSET;MATRIX\n\ Example: 3x3 identity transform:\n\ 4 3\n\ 0 0 0\n\ 1 0 0\n\ 0 1 0\n\ 0 0 1\n\ -n func [i] Next stage is func(stage_i). [previous stage]\n\ Supported functions include\n\ sin, cos, tan, atan, exp, fabs,\n\ floor, ceil, tanh, sinh, cosh ,\n\ log, log10, asin, acos, square,\n\ cube, sqrt, cubert, halfwave, sign,\n\ step, inv, sigmoid,\n\ sigmoid_deriv, tanh_deriv\n\ -n func j [i] Next stage=func(stage_j,stage_i). [previous stage]\n\ Supported functions: add, \n\ subtract, multiply, divide, pow\n\ \n\ -p settings Determines how to align input files of \n\ different lengths. 'fs'\n\ 'settings' is any combination of:\n\ f = align first frames\n\ s = symmetric. Cut the same number \n\ of frames from beginning and end.\n\ z = zero the missing frames\n\ r = pad missing frames by repeating\n\ first and last frames of the shorter file.\n\ -A Print command line arguments\n\ -R Print RCS version information\n\ -S f Use script file f\n\ -T n Set trace level to n (meaningful: 1,3,7,15,31)\n\ "; void ReportUsage(char *fmt, char *s) { printf(fmt, s); printf(USAGE); printf("\n"); Exit(0); } /* Apply Nonlinear Transform: z = func(x,y) */ void FunctionToDVectors(DVector z, DVector x, DVector y, char *func) { int i, size; size=DVectorSize(x); if(size!=DVectorSize(y) || size!=DVectorSize(z)) HError(5270,"FunctionToDVectors: Sizes(x,y,z)=(%d,%d,%d) should equal\n", size,DVectorSize(y),DVectorSize(z)); for(i=1; i <= size; i++) { /* ONE-ARGUMENT FUNCTIONS */ if (!strcmp(func,"log")) z[i] = (x[i]1)?PI/2:asin(x[i])); else if (!strcmp(func,"acos")) z[i] = (x[i]<-1)?PI:((x[i]>1)?0:asin(x[i])); else if (!strcmp(func,"atan")) z[i] = atan(x[i]); else if (!strcmp(func,"fabs")) z[i] = fabs(x[i]); else if (!strcmp(func,"floor")) z[i] = floor(x[i]); else if (!strcmp(func,"ceil")) z[i] = ceil(x[i]); else if (!strcmp(func,"tanh")) z[i] = tanh(x[i]); else if (!strcmp(func,"sinh")) z[i] = sinh(x[i]); else if (!strcmp(func,"cosh")) z[i] = cosh(x[i]); else if (!strcmp(func,"tanh_deriv")) z[i] = 1 - tanh(x[i])*tanh(x[i]); else if (!strcmp(func,"inv")) z[i] = 1/x[i]; else if (!strcmp(func,"halfwave")) z[i] = (x[i]>0)?x[i]:0; else if (!strcmp(func,"step")) z[i] = (x[i]>=0)?1:0; else if (!strcmp(func,"sign")) z[i] = (x[i]>0)?1:((x[i]<0)?-1:0); else if (!strcmp(func,"sigmoid")) z[i] = 1 / (1 + exp(-x[i])); else if (!strcmp(func,"sigmoid_deriv")) z[i] = 1 / (2 + exp(x[i]) + exp(-x[i])); else if (!strcmp(func,"square")) z[i] = x[i] * x[i]; else if (!strcmp(func,"cube")) z[i] = x[i] * x[i] * x[i]; else if (!strcmp(func,"sqrt")) z[i] = (x[i]>0)?sqrt(x[i]):((x[i]<0)?-sqrt(-x[i]):0); else if (!strcmp(func,"cubert")) z[i] = (x[i]>MINLARG)?exp(0.33333*log(x[i])):((x[i]<-MINLARG)?-exp(0.33333*log(-x[i])):0); /* TWO-ARGUMENT FUNCTIONS */ else if (!strcmp(func,"add")) z[i] = x[i]+y[i]; else if (!strcmp(func,"subtract")) z[i] = x[i]-y[i]; else if (!strcmp(func,"multiply")) z[i] = x[i]*y[i]; else if (!strcmp(func,"divide")) z[i] = x[i]/y[i]; else if (!strcmp(func,"pow")) z[i] = pow(x[i],y[i]); else HError(5270,"FunctionToDVector: unknown func name %s",func); } } /* Apply Nonlinear Transform to every element in DMatrix A */ void FunctionToDMatrices(DMatrix Z, DMatrix X, DMatrix Y, char *func) { int size, i; size = NumDRows(X); if(size!=NumDRows(Z) || size!=NumDRows(Y)) HError(5270,"FunctionToDMatrices: Rows(X)=%d, Rows(Y)=%d, Rows(Z)=%d\n", size,NumDRows(Y),NumDRows(Z)); for (i=1; i<=size; i++) FunctionToDVectors(Z[i],X[i],Y[i],func); } /* ----------- Apply Linear and Nonlinear Transforms -------------- */ DMatrix ApplyTransforms(MemHeap *heap, DMatrix Input) { int nrows, ncols; int iTrans, i, j, m, n, in; static DMatrix A[MAXTRANS]; if (trace & T_VERBOSE) fprintf(stderr,"ApplyTransforms Input (%d,%d)\n", NumDRows(Input),NumDCols(Input)); /* Get input number of rows and columns */ nrows = NumDRows(Input); ncols = NumDCols(Input); A[0] = CreateDMatrix(heap, nrows, ncols); CopyDMatrix(Input, A[0]); /* Apply each of the transforms in sequence */ for (iTrans=1; iTrans <= nTrans; iTrans++) { if(trace & T_VERBOSE) { fprintf(stderr,"After %d'th concatentation, first column contains:\n"); for(m=1; m<=NumDRows(A[iTrans-1]); m++) fprintf(stderr,"%d:%g ",m,A[iTrans-1][m][1]); fprintf(stderr,"\n"); } in = Transform[iTrans].i; switch(Transform[iTrans].type) { case 'c': /* Concatenate consecutive frames from the input */ nrows=NumDRows(A[in]); ncols=NumDCols(A[in]) * Transform[iTrans].j; if (trace & T_VERBOSE) fprintf(stderr, " Stage %d: concatenating %d frames from %d:(%d,%d) into %d:(%d,%d)\n", iTrans, Transform[iTrans].j, in, NumDRows(A[in]), NumDCols(A[in]), iTrans, nrows, ncols); A[iTrans] = CreateDMatrix(heap, nrows, ncols); for(i=1; i <= nrows; i++) for(m=Transform[iTrans].j; m-- > 0;) { n = i - (int)(Transform[iTrans].j/2) + m; n = (n>nrows)?nrows:((n<1)?1:n); CopySubVectorDD(A[in][n], A[iTrans][i], 0, m*NumDCols(A[in]), 0); } break; case 'd': /* Decimate consecutive frames from the input */ nrows=ceil(((double)NumDRows(A[in]))/(double)Transform[iTrans].j); ncols=NumDCols(A[in]); if (trace & T_VERBOSE) fprintf(stderr, " Stage %d: decimating by %d from %d:(%d,%d) into %d:(%d,%d)\n", iTrans, Transform[iTrans].j, in, NumDRows(A[in]), NumDCols(A[in]), iTrans, nrows, ncols); A[iTrans] = CreateDMatrix(heap, nrows, ncols); for(i=0; i < nrows; i++) CopySubVectorDD(A[in][i*Transform[iTrans].j+1], A[iTrans][i+1], 0, 0, 0); break; case 'm': /* Matrix transform */ ncols=NumDCols(Transform[iTrans].m); nrows=NumDRows(A[in]); if (trace & T_VERBOSE) fprintf(stderr, " Stage %d: %d:(%d,%d) * (%d,%d) = %d:(%d,%d)\n", iTrans, in, NumDRows(A[in]), NumDCols(A[in]), NumDRows(Transform[iTrans].m), NumDCols(Transform[iTrans].m), iTrans, nrows, ncols); A[iTrans] = CreateDMatrix(heap, nrows, ncols); MultiplyDMatrices(A[iTrans], A[in], Transform[iTrans].m); for (j=1; j<=nrows; j++) AddDVectors(A[iTrans][j], A[iTrans][j], Transform[iTrans].v); break; case 'n': /* Nonlinear element-wise transform */ ncols=NumDCols(A[in]); nrows=NumDRows(A[in]); if (trace & T_VERBOSE) fprintf(stderr, " Stage %d: applying %s to %d:(%d,%d), creating %d:(%d,%d)\n", iTrans, Transform[iTrans].s, in, NumDRows(A[in]), NumDCols(A[in]), iTrans, nrows, ncols); A[iTrans] = CreateDMatrix(heap, nrows, ncols); FunctionToDMatrices(A[iTrans],A[in],A[Transform[iTrans].j], Transform[iTrans].s); default: fprintf(stderr,"Unknown transform specifier: %c found at stage %d\n", Transform[iTrans].type,iTrans); } } return(A[nTrans]); } /******************************************************************/ /* ----------- main -------------- */ int main(int argc, char *argv[]) { char *s, *filename, *infile, *outfile; /* command line option, input files, output file */ short nrow, ncol; /* Number of rows, number of columns in any matrix */ FILE *fid; /* File pointer for both input and output */ DMatrix A,B; /* Concatenated input files, and output file */ char *concatenation; /* Type of concatenation of each new input file */ static int *Arows, *Acols; /* Number of rows and columns after each input concatenation */ int Decimation_Factor=1; /* Cumulative decimation; used to compute tgtPeriod */ int nmat,imat,A_ir,R_ir,A_ic,R_ic,nr,nc; /* Counters for concatenating input files */ int m; char *padding_string, *c; /* Determine padding options for concatenation of inputs */ char padding_alignment='s', padding_type='r'; /* Default padding: symmetric, repeat */ int ErrorExitThreshold=LONG_MAX; /* If file NumRows or NumCols differ by more than this, exit */ HParm InData[MAXTRANS], OutData; /* Array of HTK Parameters to be read in and written out */ if(InitShell(argc,argv,vtransform_version,"")= MAXTRANS) HError(1014,"VTransform: Max number of transforms exceeded"); if (NextArg() != INTARG) HError(1019,"VTransform: -c should be followed by an integer\n"); Transform[nTrans].type = 'c'; Transform[nTrans].j=GetChkedInt(0,1000,s); /* Read in the origin stage if specified, otherwise nTrans-1 */ if (NextArg()==INTARG) Transform[nTrans].i=GetChkedInt(0,nTrans-1,s); else Transform[nTrans].i = nTrans-1; if (trace & T_VERBOSE) fprintf(stderr,"Stage %d concatenates %d frames from stage %d\n",nTrans,Transform[nTrans].j,Transform[nTrans].i); break; case 'd': /* Decimation factor */ nTrans++; if (nTrans >= MAXTRANS) HError(1014,"VTransform: Max number of transforms exceeded"); if (NextArg() != INTARG) HError(1019,"VTransform: -d should be followed by an integer\n"); Transform[nTrans].type = 'd'; /* Get the decimation factor for this stage */ Transform[nTrans].j=GetChkedInt(0,10000,s); /* Update the decimation factor of the final output file */ Decimation_Factor *= Transform[nTrans].j; /* Read in the origin stage if specified, otherwise nTrans-1 */ if (NextArg()==INTARG) Transform[nTrans].i=GetChkedInt(0,nTrans-1,s); else Transform[nTrans].i = nTrans-1; if (trace & T_VERBOSE) fprintf(stderr,"Stage %d discards %d of every %d frames from stage %d\n", nTrans,Transform[nTrans].j-1,Transform[nTrans].j,Transform[nTrans].i); break; case 'e': /* Error Exit: file difference threshold */ if (NextArg() != INTARG) HError(1019,"VTransform: -e should be followed by an integer\n"); /* Get the decimation factor for this stage */ ErrorExitThreshold=GetChkedInt(0,10000,s); if (trace & T_VERBOSE) fprintf(stderr,"ErrorExitThreshold for inter-file differences set to %d\n",ErrorExitThreshold); break; case 'p': /* Padding alignment and padding type */ if (NextArg() != STRINGARG) HError(1019,"VTransform: -p should be followed by characters specifing vector-padding-type"); padding_string = GetStrArg(); for(c=padding_string; *c != '\0'; c++) { if(*c=='s' || *c=='f') { padding_alignment=*c; } if(*c=='z' || *c=='r') { padding_type=*c; } } if (trace & T_VERBOSE) fprintf(stderr,"Padding alignment is %c, padding type is %c\n",padding_alignment, padding_type); break; case 'm': /* Define the nTrans'th linear transform */ nTrans++; if (nTrans >= MAXTRANS) HError(1014,"VTransform: Max number of transforms exceeded"); /* Read in the offset vector and transform matrix */ if (NextArg() != STRINGARG) ReportUsage("VTransform: Matrix input filename expected",""); filename = CopyString(&mStack, GetStrArg()); if((fid=fopen(filename,"r"))==NULL) HError(1019,"VTransform: Cannot read Matrix from %s",filename); if (fscanf(fid,"%hd",&nrow)<1) HError(1019,"VTransform: Unable to read nrows from file %s",filename); if (fscanf(fid,"%hd",&ncol)<1) HError(1019,"VTransform: Unable to read ncols from file %s",filename); Transform[nTrans].v = CreateDVector(&xfStack,ncol); if (!ReadDVector(fid,Transform[nTrans].v,0)) HError(1019,"VTransform: Cant read offset vector from %s", filename); Transform[nTrans].m = CreateDMatrix(&xfStack,nrow-1,ncol); if (!ReadDMatrix(fid,Transform[nTrans].m,0)) HError(1019,"VTransform: Cant read transform matrix from %s", filename); /* Read in the origin stage if specified, otherwise nTrans-1 */ if (NextArg()==INTARG) Transform[nTrans].i=GetChkedInt(0,nTrans-1,s); else Transform[nTrans].i = nTrans-1; /* If TRACE requested, print out the transform definition */ if (trace & T_TOP){ fprintf(stderr,"VTransform: A%d = A%d * %s(%d by %d) + %s (%d vec)\n", nTrans, Transform[nTrans].i, filename, NumDRows(Transform[nTrans].m), NumDCols(Transform[nTrans].m), filename, DVectorSize(Transform[nTrans].v)); ShowDVector("Offset Vector:",Transform[nTrans].v,5); ShowDMatrix("Transform Matrix:",Transform[nTrans].m,5,5); } fclose(fid); Transform[nTrans].type = 'm'; break; case 'n': /* Define the nTrans'th Nonlinear transform function */ nTrans++; if (nTrans >= MAXTRANS) HError(1014,"VTransform: Max number of transforms exceeded"); /* Read a nonlinear function name */ if (NextArg() != STRINGARG) ReportUsage("VTransform: Function name expected",""); Transform[nTrans].s = GetStrArg(); /* If origin "i" specified, read it */ if (NextArg()==INTARG) Transform[nTrans].i=GetChkedInt(0,nTrans-1,s); else Transform[nTrans].i = nTrans-1; /* If origin "j" specified, read it */ if (NextArg()==INTARG) Transform[nTrans].j=GetChkedInt(0,nTrans-1,s); else Transform[nTrans].j = nTrans-1; /* Trace if trace requested */ if (trace & T_TOP) fprintf(stderr,"VTransform: A%d = %s(A%d,A%d)\n", nTrans, Transform[nTrans].s, Transform[nTrans].i, Transform[nTrans].j); Transform[nTrans].type = 'n'; break; case 'A': /* Repeat the arguments to stderr */ for(m=0; m<=argc; m++) fprintf(stderr,"%s ",argv[m]); break; case 'R': fprintf(stderr,"%s\n",vtransform_version); PrintRCSIdentifier(stderr); Exit(0); case 'T': trace = GetChkedInt(0,255,s); break; default: ReportUsage("VTransform: Unknown switch %s",s); } } if (NumArgs() <= 1) ReportUsage("VTransform: Target file expected",""); /* ----------- Process Source [Source] Target sets ------------*/ while(NumArgs() > 1) { /* ----------- Process Sources ------------*/ /* Read input DMatrix */ if (NextArg()!=STRINGARG) HError(1019,"VTransform: Source file name expected"); nmat=0; infile=GetStrArg(); ReadHParm(&mStack, infile, &(InData[nmat]), trace); concatenation[nmat] = 'h'; Arows[0]=NumDRows(InData[nmat].X); Arows[nmat+1]=NumDRows(InData[nmat].X); Acols[0]=0; Acols[nmat+1]=NumDCols(InData[nmat].X); if(trace & T_VERBOSE) { fprintf(stderr,"Read input %d:(%d,%d) from %s. First column contains:\n", nmat,Arows[1],Acols[1],infile); for(m=1; m<=Arows[0]; m++) fprintf(stderr,"%d:$g ",m,InData[nmat].X[m][1]); fprintf(stderr,"\n"); } nmat++; /* If next arg is a SWITCHARG, assume that it is */ /* -h for horizontal concatenation, or */ /* -v for vertical concatenation */ while (NextArg() == SWITCHARG && nmat < MAXTRANS) { s = GetSwtArg(); if (strlen(s)!=1) HError(1019,"VTransform: Bad switch %s; must be single letter",s); concatenation[nmat]=s[0]; if(trace & T_VERBOSE) fprintf(stderr,"%d'th concatenation will be type %c\n", nmat,concatenation[nmat]); /* Get the next file */ if (NextArg()!=STRINGARG) HError(1019,"VTransform: After -h or -v, expected another source file"); if(trace & T_VERBOSE) fprintf(stderr,"Reading datafile %d from %s\n",nmat,infile); infile = GetStrArg(); ReadHParm(&mStack, infile, &(InData[nmat]), trace); if(trace & T_VERBOSE) fprintf(stderr,"Read input %d:(%d,%d) from %s\n", nmat,NumDRows(InData[nmat].X),NumDCols(InData[nmat].X),infile); /* In the case of vertical concatenation, add the NumDRows */ /* In the case of horizontal concatenation, add the NumDCols */ switch(concatenation[nmat]){ case 'v': Arows[nmat+1] = Arows[nmat]+NumDRows(InData[nmat].X); Acols[nmat+1] = Acols[nmat]; break; case 'h': Arows[nmat+1] = Arows[nmat]; Acols[nmat+1] = Acols[nmat]+NumDCols(InData[nmat].X); break; default: ReportUsage("VTransform: Expected -h or -v between sources, not %s\n", &(concatenation[nmat])); } if(trace & T_VERBOSE) fprintf(stderr,"Total output buffer is now (%d,%d)\n", Arows[nmat+1],Acols[nmat+1]); nmat++; } /* Create the matrix to which transformations will be applied */ /* Size = Arows by Acols */ if(trace & T_VERBOSE) fprintf(stderr,"Allocating input buffer of size (%d,%d)\n", Arows[nmat],Acols[nmat]); A = CreateDMatrix(&mStack, Arows[nmat], Acols[nmat]); /* Default content value is 0.0 */ ZeroDMatrix(A); /* Concatenate all of the matrices into the new array */ for(imat=0; imat ErrorExitThreshold || nr-Arows[imat] > ErrorExitThreshold) HError(-100,"VTransform: %d'th row count differs by too much: %d rows\n",imat,nr-Arows[imat]); /* If padding_type == 'r' and alignment=='s', repeat the initial row of InData[imat].X to pad the output */ if(Arows[imat] > nr && padding_alignment=='s' && padding_type=='r') for (A_ir=1; A_ir <= floor((Arows[imat]-nr)/2); A_ir++) CopySubVectorDD(InData[imat].X[1],A[A_ir],0,Acols[imat],nc); /* Otherwise, if alignment=='s', leave initial rows as zeros */ else A_ir = (Arows[imat]>nr && padding_alignment=='s') ? floor((Arows[imat]-nr)/2)+1 : 1; /* Truncate some initial rows from the input, if necessary */ R_ir = (Arows[imat] ErrorExitThreshold || nc-Acols[imat] > ErrorExitThreshold) HError(-100,"VTransform: %d'th column count differs by too much: %d columns\n",imat,nr-Arows[imat]); /* If padding_type == 'r' and alignment=='s', repeat the initial row of InData[imat].X to pad the output */ if(Acols[imat] > nc && padding_alignment=='s' && padding_type=='r') for (A_ic=1; A_ic <= floor((Acols[imat]-nc)/2); A_ic++) CopySubColumnDD(InData[imat].X,0,1,A,Arows[imat],A_ic,nr); /* Otherwise, if alignment=='s', leave initial rows as zeros */ else A_ic = (Acols[imat]>nc && padding_alignment=='s') ? floor((Acols[imat]-nc)/2)+1 : 1; /* Truncate some initial columns from the input, if necessary */ R_ic = (Acols[imat] %s\n",infile, outfile); if(trace & T_MEM) PrintAllHeapStats(); /* Before reading next infile&outfile: reset I/O stacks, but NOT xfStack */ ResetHeap(&mStack); } if (NumArgs() != 0) HError(-1019,"VTransform: Unused args ignored"); Exit(0); }