#!/usr/local/bin/perl -w use CGI; use CGI::Carp qw/fatalsToBrowser/; use strict; #configuration and environment variables #path to the environment script my $envScript = "/workspace/mickey0/akantor/pronounce/ifpWeb.env.bash"; ######################### my $query = new CGI; print $query->header; print $query->start_html("Pronounce"); print "

Pronounce Demo

\n"; print '(American English trained on CMU dictionary)',"\n"; &print_prompt($query); &do_work($query); &print_tail; print $query->end_html; sub print_prompt { my($query) = @_; print $query->start_form; print $query->h3("Word list"); print $query->textarea(-name=>'wordlist', -default=>"supercalifragilisticexpialodocious\nbit\nbite\nbi[t]\nbi[te]\nB_B_N\nu_s\n[B_B]_N\n401K\n90210", -rows=>15, -columns=>40); print $query->blockquote( "List the query words, one per line. Captialization does not matter. ". "The alphabet should consist of letters, numbers underscore_ or the apostrophe -- all other ". "characters are ignored. It is also possible to specify partial words ". "in context, by putting the unpronounced part of the word in [] brakets. ". "Try out the example words to see how it works.". "You can also specify acronyms in form of I_B_M or U_S_A - they will be " . "pronounced one letter at a time. " ); print "

"; print $query->checkbox(-name=>'listIsDict', -label=>'Treat word list as a pronounciation dictionary.', #-checked=>'on', ); print < The word list can also be a pronounciation dictionary for the purposes of forced alignment or model accuracy checking. Specify the pronounciation dictionary by following each word with a sequence of phones seperated by spaces and check the above check box. The phones should be from the CMU dictionary phonebet. If the checkbox is not checked, the pronounciation definitions will be ignored, and the model dictionary will be used instead.
If a pronounciation definition is found in a dictionary, forced alignment is performed between the string of letters and the sequence of phones.

END print $query->h3("Model Type"); print $query->radio_group( -name=>'modelType', -Values=>['monophone','triphone'], -Labels=>{'monophone','Monophone','triphone','Triphone'}, -default=>'triphone'); print $query->blockquote( "Both monophone and triphone models have been trained. The triphone ". "model almost always gives better results."); print $query->h3("Forced alignment"); print $query->checkbox(-name=>'noalignment', -label=>'Do not perform forced alignment even if it\'s possible.'); print < If the word (or partial word plus its context) is found in the pronounciation dictionary, it is possible to force alignment between letters and phones. This is more accurate than asking the model to come up with the sequence of phones on its own. By default the model attempts forced alignment, but it can turned off with the above checkbox.

END #print $query->h3("Word list as dictionary"); print $query->h3("Report Type"); my %outputType=( 'best','Best pronounciation', 'allInfo','All available information (can be read into PERL)', 'errorSum','Accuracy Summary', 'errorReport','Accuracy Summary and report mistakes'); print $query->radio_group( -name=>'outputType', -linebreak=>'true', -Values=>\%outputType, -default=>'best'); print < Accuracy summary prints out the word accuracy rate (a word is considered correct if its phone sequence matches the one in the dictionary) as well as the minimum edit distance between the correct phone sequence and predicted phone sequence summed over all the words. Some other statistics are also printed.

Accuracy summary and report mistakes will also show individual words which were 'mispronounced' as well as their edit distance.

All available information prints out a perl structure with dictionary, model and forced alignment definition or as much information as is available. The perl structure can then be read into PERL via EVAL function.

Best pronounciation simply returns the best possible pronounciation, one word per line.

The query will take a couple of minutes even for a small number of words. Please be patient.

END #print $query->textfield('name'); # print "

Where can you find English Sparrows?
"; # print $query->checkbox_group( # -name=>'Sparrow locations', # -Values=>[England,France,Spain,Asia,Hoboken], # -linebreak=>'yes', # -defaults=>[England,Asia]); # # # print "

What's your favorite color? "; # print $query->popup_menu(-name=>'Color', # -Values=>['black','brown','red','yellow'], # -default=>'red'); # # print $query->hidden('Reference','Monty Python and the Holy Grail'); # # print "

What have you got there?
"; # print $query->scrolling_list( # -name=>'possessions', # -Values=>['A Coconut','A Grail','An Icon', # 'A Sword','A Ticket'], # -size=>5, # -multiple=>'true'); # print $query->submit('action','submit')," "; print $query->endform; } sub do_work { my($query) = @_; my(@values,$key); my %outputType=( 'best' => '0', 'allInfo' => '1', 'errorSum' => '2', 'errorReport' => '3' ); #print "

Here are the current settings in this form

"; #foreach $key ($query->param) { # print "$key -> "; # @values = $query->param($key); # print join(", ",@values),"
\n"; #} #foreach $key (sort keys %ENV) { # print "$key=$ENV{$key}
\n"; #} if($query->param('action')){ print "

Query Results

\n"; my $wordlistFname = "queryDir/query.txt"; my $resultsFname = "queryDir/results.txt"; open (WL, ">$wordlistFname") || croak "cannot open >$wordlistFname"; my $wl = $query->param('wordlist'); $wl =~ s/\r//g; print WL $wl; close WL; my $cmd = "cat $wordlistFname | perl -w -S letterToPhone.pl " . " -m modelDir" . " -q queryDir/w " . " -f $wordlistFname " . " -o $resultsFname " . " -t " . $query->param('modelType') . " -v ". $outputType{$query->param('outputType')} . ($query->param('noalignment') ? " --noalignment " : "") . ($query->param('listIsDict') ? " -d $wordlistFname " : "") ; #print "commandline: $cmd"; my $output = system("bash -c \'source $envScript > /dev/null && $cmd &> /dev/null\'"); print "
\n";
		print `cat $resultsFname`;
		unlink $resultsFname;
		print "
\n"; if ($output==0){ print $query->h3('query succeeded'); } else{ print $query->h3("query failed with error $output."); } } } sub print_tail { print <
Arthur Kantor

Back to
speech at UIUC.\n END ; }