Bibliography

From SpeechWiki

To be added to references in Arthur's thesis

<bibentry> @inproceedings{Livescu2000Lexical,

   author = {Karen Livescu and James Glass},
   title = {Lexical Modeling Of Non-Native Speech For Automatic Speech Recognition},
   year = {2000},
   booktitle = {ICASSP},

} </bibentry>

<bibentry> @article{Hazeon2005Pronunciation,

 author    = {Timothy J. Hazen and
              I. Lee Hetherington and
              Han Shu and
              Karen Livescu},
 title     = {Pronunciation modeling using a finite-state transducer representation},
 journal   = {Speech Communication},
 volume    = {46},
 number    = {2},
 year      = {2005},
 pages     = {189-203},
 ee        = {http://dx.doi.org/10.1016/j.specom.2005.03.004},
 bibsource = {DBLP, http://dblp.uni-trier.de}

} </bibentry>

For the plot:

livescu 2007 10 word vocab results
fossler-lussier thesis table 5.4, and possibly many others

<bibentry> @inproceedings{Lamel1996On, title={On designing pronunciation lexicons for large vocabulary continuous speech recognition}, author={Lamel, L. and Adda, G.}, booktitle={Spoken Language, 1996. ICSLP 96. Proceedings., Fourth International Conference on}, year={1996}, month={Oct}, volume={1}, number={}, pages={6-9 vol.1}, } </bibentry>

<bibentry> @article{Riley1999Stochastic,

author = {Riley, Michael and Byrne, William and Finke, Michael and Khudanpur, Sanjeev and Ljolje, Andrej and McDonough, John and Nock, Harriet and Saraclar, Murat and Wooters, Charles and Zavaliagkos, George},
title = {Stochastic pronunciation modelling from hand-labelled phonetic corpora},
journal = {Speech Commun.},
volume = {29},
number = {2-4},
year = {1999},
issn = {0167-6393},
pages = {209--224},
doi = {http://dx.doi.org/10.1016/S0167-6393(99)00037-0},
publisher = {Elsevier Science Publishers B. V.},
address = {Amsterdam, The Netherlands, The Netherlands},

} </bibentry>

Lanugage Model stuff

<bibentry> @techreport{Goodman2001abit,

   author = {Joshua T. Goodman},
   title = {A bit of progress in language modeling extended version},
   institution = {Microsoft},
   year = {2001},
   url   = {http://research.microsoft.com/~joshuago/longcombine.pdf},

} </bibentry>

<bibentry> @article{Siivola2007On, title={On Growing and Pruning Kneser–Ney Smoothed $ N$-Gram Models}, author={Siivola, V. and Hirsimaki, T. and Virpioja, S.}, journal={Audio, Speech, and Language Processing, IEEE Transactions on}, year={2007}, month={July }, volume={15}, number={5}, pages={1617-1624}, keywords={computational linguistics, natural language processing, smoothing methods, speech recognition, statistical distributionsEnglish text corpora, Finnish text corpora, Kneser-Ney smoothed n-gram models pruning, baseline entropy, good-turing smoothed models, gram probability distributions, language models, vocabulary continuous speech recognition}, doi={10.1109/TASL.2007.896666}, ISSN={1558-7916}, url = {http://www.cis.hut.fi/vsiivola/papers/TASLP2007.pdf}, }

</bibentry>

<bibentry> @inproceedings{Chen1998empirical,

   author = {Stanley F. Chen and Joshua Goodman},
   title = {An Empirical Study of Smoothing Techniques for Language Modeling},
   booktitle = {Proc. of {A}{C}{L}},
   year = {1996},
   pages = {310--318},
   url   = {http://research.microsoft.com/~joshuago/tr-10-98.pdf},

}

</bibentry>

<bibentry> @inproceedings{Stolcke1998entropy-based,

 author       = {Andreas Stolcke},
 title        = {Entropy-based pruning of backoff language models},
 booktitle    = {In Proc. DARPA Broadcast News Transcription and Understanding Workshop},
 year         = {1998},
 pages        = {270--274},
 url          = { ftp://ftp.speech.sri.com/pub/papers/darpa98-lm-pruning.ps },

}

</bibentry>

Front End (roughly)

<bibentry> @article{NelderMead65Simplex,

author = {J.A. Nelder and R. Mead},
title = {A Simplex Method for Function Minimization},
journal = {Computer J.},
volume = {7},
year = {1965},

}

</bibentry>

<bibentry> @Inproceedings{Iwano2006Weight,

author = {K. Iwano and K. Kojima and S. Furui},
title = {A Weight Estimation Method Using {LDA} for Multi-Band Speech Recognition},
booktitle = {ICSLP},
pages = {2534-2537},
volume = {7},
year = {2006},

} address = {Pittsburgh, PA},

</bibentry>

<bibentry> @inproceedings{Joachims2006Training,

author = {T. Joachims},
title = {Training linear SVMs in linear time},
booktitle = {KDD},
pages = {217--226},
year = {2006},

isbn = {1-59593-339-5},

location = {Philadelphia, PA, USA},
doi = {http://doi.acm.org/10.1145/1150402.1150429},
publisher = {ACM Press},
address = {New York, NY, USA},
}

</bibentry>

everything else

<bibentry> @misc{ bilmes2002graphical,

 author = "J. Bilmes and G. Zweig",
 title = "The graphical models toolkit: An open source software system for speech
   and time-series processing",
 text = "J. Bilmes and G. Zweig. The graphical models toolkit: An open source software
   system for speech and time-series processing. Proc. IEEE Intl. Conf. on
   Acoustics, Speech, and Signal Processing, 2002.",
 year = "2002",
 url = "citeseer.ist.psu.edu/bilmes02graphical.html" }

</bibentry>

<bibentry> @Inproceedings{cetin2007articulatory,

author = {\"{O}. \c{C}etin and A. Kantor and S. King and C. Bartels and M. Magimai-Doss and J. Frankel and S. King and K. Livescu },
title = {An articulatory feature-based tandem approach and factored tandem observation modeling},
booktitle = {ICASSP},
year = {2007},

}

address = {Honolulu, HI},
month = {April},

</bibentry>

<bibentry> @Inproceedings{king05svitchboard, location = {http://www.scientificcommons.org/8450171}, title = {{S}vitchboard 1: Small vocabulary tasks from {S}witchboard 1}, booktitle = {Interspeech }, author = {S. King and C. Bartels and J. Bilmes}, year = {2005}, keywords = {Switchboard-1, speech, vocabulary, hidden Markov model, Markov}, }

</bibentry>

<bibentry> @Inproceedings{{Frankel2007,

author = {J. Frankel and M. Magimai-Doss and S. King and K. Livescu and \"{O}. \c{C}etin}, 
title = {Articulatory feature classifiers trained on 2000 hours of telephone speech},
booktitle = {ICASSP},
year = {2007},

}

address = {Honolulu, HI},
month = {April},

</bibentry>

<bibentry> @Inproceedings{Gravier2002Maximum,

author = {G. Gravier and S. Axelrod and G. Potamianos and C. Neti},
title = {Maximum entropy and {M}{C}{E} based {H}{M}{M} stream weight estimation for audio-visual {A}{S}{R}},
booktitle = {ICASSP},
year = {2002},

}

address = {Orlando, FL},

</bibentry>

<bibentry> @article{Katagiri1998Pattern,

author = {S. Katagiri and  B. Juang   and C.H. Lee },
title = {Pattern recognition using a family of design algorithm based upon the generalized probabilistic descent method},
journal = {Proc. of IEEE},
year = {1998},

} pages = {2345-2373}, volume = {86},

</bibentry>

<bibentry> @Inproceedings{livescu2007articulatory,

author = {K. Livescu and  \"{O}. \c{C}etin  and M. Hasegawa-Johnson and S. King and C. Bartels and N. Borges and A. Kantor and P. Lal and L. Yung and A. Bezman and S. Dawson-Haggerty and B. Woods and J. Frankel and M. Magimai-Doss and K. Saenko},
title = {Articulatory Feature-Based Methods for Acoustic and Audio-Visual Speech Recognition: Summary from the 2006 {JHU} Summer Workshop},
booktitle = {ICASSP},
year = {2007},

} address = {Honolulu, HI}, month = {April},

</bibentry>

<bibentry> @Inproceedings{ bilmes2000directed,

 author = {J. Bilmes and K. Kirchhoff},
 title = {Directed graphical models of classifier combination: Application to phone
   recognition},
 booktitle = {ICSLP},
 year = {2000},

}

</bibentry>

<bibentry> @book{htkbook, author = {Odell, J. and Ollason, D. and Woodland, P. and Young, S. and Jansen, J. }, citeulike-article-id = {351838}, keywords = {bibtex-import}, priority = {2}, publisher = {Cambridge University Press, Cambridge, UK}, title = {The {HTK} Book for {HTK} V2.0}, year = {1995} }

</bibentry>

<bibentry> @Inproceedings{frankel2007articulatory,

author = {Joe Frankel and Mathew Magimai-Doss and Simon King and Karen Livescu and \"{O}zg\"{u}r \c{C}etin},
title = {Articulatory feature classifiers trained on 2000 hours of telephone speech},
booktitle = {Interspeech},
year = {2007},

}

</bibentry>

<bibentry> @Inproceedings{pallett2003look,

author = {D.S. Pallett},
title = {A look at {NIST}'S benchmark {ASR} tests: past, present, and future},
booktitle = {ASRU},
year = {2003},

}

</bibentry>

<bibentry> @Inproceedings{pfitzinger1998local,

 author = "H. Pfitzinger",
 title = "Local speech rate as a combination of syllable and phone rate",
 booktitle = {ASRU},
 year = "1998"}

</bibentry>

<bibentry> @Inproceedings{greenberg1998speaking,

 author = "S. Greenberg",
 title = "Speaking in shorthand-- a syllable-centric perspective for understanding
   pronunciation variation",
 text = "Steven Greenberg. Speaking in shorthand-- a syllable-centric perspective
   for understanding pronunciation variation. In Proceedings of the ESCA Workshop
   on Modeling Pronunciation Variation for Automatic Speech Recognition, Kekrade,
   Netherlands, May 1998. ESCA.",
 booktitle = {ESCA Workshop on Modeling Pronunciation Variation for Automatic Speech Recognition},
 year = "1998"}

</bibentry>

<bibentry> @article{jelinek1976continuous,

author = {F. Jelinek},
title = {Continuous speech recognition by statistical methods},
journal = {Proc. of IEEE},
year = {1976},
volume = 64,
issue = 4

}

</bibentry>

<bibentry> @article{rabiner1989tutorial,

author = {L.R. Rabiner},
title = {A tutorial on hidden {M}arkov models and selected applications in speech recognition},
journal = {Proc. of IEEE},
year = {1989},
volume = 77,
issue = 2

}

</bibentry>

<bibentry> @book{rabiner1978digital,

author = {L.R. Rabiner and R. W. Shafer},
title ={Digital Processing of Speech Signals},
publisher = {Prentice-Hall},
year = 1978

}

</bibentry>

<bibentry> @article{hermansky1990PLP,

author = {H. Hermansky},
title = {Perceptual linear predictive ({PLP}) analysis of speech},
journal = {J. Acoust. Soc. Am.},
year = {1990},
volume = 87,
issue = 4

}

</bibentry>

<bibentry> @techreport{livescu2006JHU,

author = {K. Livescu and \"{O}. \c{C}etin and M. Hasegawa-Johnson and S. King and C. Bartels and N. Borges and A. Kantor and P. Lal and L. Yung and A. Bezman and S. Dawson-Haggerty and B. Woods and J. Frankel and M. Magimai-Doss and K. Saenko}, 
title =Template:Articulatory Feature-based Methods for Acoustic and Audio-Visual Speech Recognition: 2006 JHU Summer Workshop Final Report,
year = {2007},
url = {www.clsp.jhu.edu/ws2006/groups/afsr/documents/WS06AFSR_final_report.pdf},
institution = {John Hopkins Univerity}

}

</bibentry>

<bibentry> @article{browman1992articulatory,

author = {C. P. Browman and L. Goldstein},
title =Template:Articulatory phonology: An overview,
year ={1992},
journal = {Phonetica},
issue =49

}

</bibentry>

<bibentry> @Inproceedings{jurafsky2001what,

 author = "D. Jurafsky and W. Ward and Z. Jianping and K. Herold and Y. Xiuyang and Zhang Sen",
 title = "What kind of pronunciation variation is hard for triphones to model?",
 text = "D. Jurafsky, et al, What kind of pronunciation variation is hard for triphones to model?, Proc. ICASSP, Salt Lake City, UT, May 2001.",
 booktitle = {ICASSP},
 year = "2001",
 url = "citeseer.ist.psu.edu/jurafsky01what.html" }

</bibentry>

<bibentry> @Inproceedings{mcallaster1998fabricating,

 author = {Don McAllaster and Larry Gillick and Francesco Scattone and Mike Newman}, 
 title ={Fabricating conversational speech data with acoustic models: A program to examine model-data mismatch},
 booktitle = {ICSLP},
 year ={1998},
 volume = 5

}

</bibentry>

<bibentry> @Inproceedings{fosler1996automatic,

 author = {Eric Fosler and Mitch Weintraub and Steven Wegmann and Yu-Hung Kao and Sanjeev Khudanpur and Charles Galles and Murat Saraclar},
 title ={Automatic learning of word pronunciation from data},
 booktitle = {ICSLP},
 year ={1996}

}

</bibentry>

<bibentry> @InProceedings{sharma2000feature,

 author = {S. Sharma and D. Ellis and S. Kajarekar and P. Jain and H. Hermansky},
 title ={Feature extraction using non-linear transformation for robust speech recognition on the {A}urora database},
 booktitle = {ICASSP},
 year ={2000}

}

</bibentry>

<bibentry> @InProceedings{chang2001elitist,

 author = {Shuangyu Chang and Steven Greenberg and Mirjam Wester},
 title ={An Elitist Approach to Articulatory-Acoustic Feature Classiï¬cation},
 booktitle = {Eurospeech},
 year ={2001}

}

</bibentry>

<bibentry> @InProceedings{chang2000automatic,

 author = {Shuangyu Chang and Lokendra Shastri and Steven Greenberg},
 title ={ Automatic Phonetic Transcription of Spontaneous Speech {American English}},
 booktitle = {Interspeech},
 year ={2000}

}

</bibentry>

<bibentry> @InCollection{jordan2002graphical, booktitle = {Handbook of Neural Networks and Brain Theory}, title = {Graphical models: probabilistic inference}, publisher = {MIT Press}, year = {2002}, author = {M.I. Jordan and Y. Weiss }, }

</bibentry>

<bibentry> @TechReport{bilmes1998gentle, author = {J. Bilmes}, title = {A Gentle Tutorial on the {EM} Algorithm and its Application to Parameter Estimation for {G}aussian Mixture and Hidden {M}arkov Models}, institution = {ICSI}, year = {1998} }

</bibentry>

<bibentry> @InProceedings{chen2002low-resource,

 author = {Chia-Ping Chen and Jeff Bilmes and Katrin Kirchhoff},
 title ={Low-Resource Noise-Robust Feature Post-Processing On {A}urora 2.0},
 booktitle = {Interspeech},
 year ={2002}

}

</bibentry>

<bibentry> @InProceedings{sethy2003improvements,

 author = {Abhinav Sethy and Bhuvana Ramabhadran and Shrikanth Narayanan},
 title ={Improvements In {E}nglish {ASR} For The {MALACH} Project Using Syllable-Centric Models},
 booktitle = {ASRU},
 year ={2003}

}

</bibentry>

<bibentry> @inproceedings{hershey2007approximating, author = {Hershey, J. R. and Olsen, P. A. }, booktitle = {ICASSP}, keywords = {gmm, kl-divergence, mixture-model}, pages = {IV-317--IV-320}, title = {Approximating the {K}ullback {L}eibler Divergence Between Gaussian Mixture Models}, volume = {4}, year = {2007} }

- - REF***

</bibentry>

@inproceedings{cieri2004fisher, author = {C. Cieri and D. Miller and K. Walker}, booktitle = {International Conference On Language Resources And Evaluation}, keywords = {gmm, kl-divergence, mixture-model}, title = {The {F}isher Corpus: a Resource for the Next Generations of Speech-to-Text}, year = {2004} }

</bibentry>

<bibentry> @Misc{nuance2007web, author = {Nuance Communications}, title={http://www.nuance.com/}, url={http://www.nuance.com/}, }

</bibentry>

<bibentry> @inproceedings{zheng2000rate, author = "Jing Zheng and Horacio Franco and Andreas Stolcke", title = "Rate-Of-Speech Modeling For Large Vocabulary Conversational Speech Recognition", booktitle = {Speech Transcription Workshop}, year = {2000} }

</bibentry>

<bibentry> @PhdThesis{fosler-lussier1999thesis, author = {J. E. Fosler-Lussier}, title = {Dynamic Pronunciation Models for Automatic Speech Recognition}, school = {University of California, Berkeley}, year = {1999} }

</bibentry>

<bibentry> @Article{chen2006Prosody, author = {K. Chen and M. Hasegawa-Johnson and A. Cohen and S. Borys and S.-S. Kim and J. Cole and J.-Y. Choi}, title = {Prosody dependent speech recognition on radio news}, journal = {IEEE Transactions On Audio, Speech, And Language Processing}, year = {2006}, }

</bibentry>

<bibentry> @inproceedings{zheng2005Accent, title = {Accent Detection and Speech Recognition for {S}hanghai-Accented {M}andarin}, booktitle = {Interspeech }, author = {Y. Zheng and R. Sproat and L. Gu and I Shafran and H. Zhou and Y. Su and D. Jurafsky and R. Starr and S.-Y. Yoon}, year = {2005}, }

</bibentry>

<bibentry> @article{athanaselis2005asr,

author = {T. Athanaselis and S. Bakamidis and I. Dologlou and R. Cowie and E. Douglas-Cowie and C. Cox},
title = {{ASR} for emotional speech: clarifying the issues and enhancing performance},
journal = {Neural Networks},
volume = {18},
number = {4},
year = {2005},
issn = {0893-6080},
pages = {437--444},
doi = {http://dx.doi.org/10.1016/j.neunet.2005.03.008},
publisher = {Elsevier Science Ltd.},
address = {Oxford, UK, UK},
}

</bibentry>

<bibentry> @inproceedings{beckman1989timing,

author = {Mary E. Beckman},
title = {Timing models for prosody and cross-word coarticulation in connected speech},
booktitle = {HLT '89: Proceedings of the workshop on Speech and Natural Language},
year = {1989},
isbn = {1-55860-112-0},
pages = {12--21},
location = {Cape Cod, Massachusetts},
doi = {http://dx.doi.org.proxy2.library.uiuc.edu/10.3115/1075434.1075438},
publisher = {Association for Computational Linguistics},
address = {Morristown, NJ, USA},
}

</bibentry>

<bibentry> @inproceedings{ greenberg1996insights,

   author = "S. Greenberg and J. Hollenback and D. Ellis",
   title = "Insights into Spoken Language Gleaned from Phonetic Transcription of the {S}witchboard Corpus",
   booktitle = "ICSLP",
   year = "1996",

}

</bibentry>

<bibentry> @article{bacchiani1999joint,

author = {M. Bacchiani and M. Ostendorf},
title = {Joint lexicon, acoustic unit inventory and model design},
journal = {Speech Communication},
volume = {29},
number = {2-4},
year = {1999},
issn = {0167-6393},
pages = {99--114},
doi = {http://dx.doi.org/10.1016/S0167-6393(99)00033-3},
publisher = {Elsevier Science Publishers B. V.},
address = {Amsterdam, The Netherlands, The Netherlands},
}

</bibentry>

<bibentry> @PhdThesis{saraclar2000thesis,

 author = 	 {Murat Saraclar},
 title = 	 {Pronunciation Modeling for Conversational Speech Recognition},
 school = 	 {Johns Hopkins University},
 year = 	 {2000},
 address = 	 {Baltimore, MD, USA}

}

- - READ***

</bibentry>

<bibentry> @InProceedings{polzin1998pronunciation, author = {T.S. Polzin and A.H. Waibel }, title = {Pronunciation variations in emotional speech}, booktitle = {Proc. of the ESCA Workshop}, year = {1998}, }

- - READ***

</bibentry>

<bibentry> @InProceedings{junqua1999lombard, author = {J-C. Junqua }, title = {The {L}ombard effect: A reflex to better communicate with others in noise}, booktitle = {ICASSP}, year = {1999}, }

</bibentry>

<bibentry> @InProceedings{strik2001pronunciation, author = {H. Strik}, title = {Pronunciation adaptation at the lexical level}, booktitle = {ITRW on Adaptation Methods for Speech Recognition}, year = {2001}, }

</bibentry>

<bibentry> @InProceedings{livescu2004Feature, author = "Karen Livescu and James Glass", title = "Feature-based Pronunciation Modeling with Trainable Asynchrony Probabilities", booktitle = {ICSLP}, year = 2004 }

</bibentry>

<bibentry> @InProceedings{ristad1998surficial,

 author = "E. Ristad and P. Yianilos",
 title = "A surficial pronunciation model",
 booktitle = "Proc. of the ESCA Workshop `Modeling Pronunciation Variation for Automatic Speech Recognition'",
 year = "1998",
 url = "citeseer.ist.psu.edu/ristad98surficial.html" }

</bibentry>

<bibentry> @InProceedings{ byrne97pronunciation,

 author = "B. Byrne  and M. Finke and S. Khudanpur and  J. McDonough and H. Nock and M. Riley and M. Saraclar and C. Wooters and G. Zavaliagkos",
 title = "Pronunciation Modelling for Conversational Speech Recognition: A Status Report from {WS}97",
 year = "1997",
 booktitle = "ASRU" }

</bibentry>

<bibentry> @InProceedings{faria2008when,

 author = "A. Faria and N. Morgan",

title = "When a Mismatch Can Be Good: Large vocabulary speech recognition trained with idealized Tandem features.",

booktitle = "Proc. ACM Symposium on Applied Computing (SAC)",

year = "2008" }

- - READ***
  - REF***

</bibentry>

<bibentry> @InProceedings{holter1998maximum,

 author = "T. Holter and T. Svendsen",
 title = "Maximum likelihood modelling of pronunciation variation",
 booktitle = "Proc. of the ESCA Workshop on Modeling Pronunciation Variation for Automatic Speech Recognition",
 year = "1998",

}

</bibentry>

<bibentry> @InProceedings{bates2002modeling,

 author = "R. Bates and M. Ostendorf",
 title = "Modeling Pronunciation Variation in Conversational Speech Using Prosody",
 booktitle = "Proc. of the ISCA Workshop on  Pronunciation Modeling and Lexicon Adaptation",
 year = "2002",

}

- - REF***

</bibentry>

<bibentry> @Article{ganapathiraju2001syllable, author = {A. Ganapathiraju and J. Hamaker and J. Picone and M. Ordowski and G. R. Doddington}, title = {Syllable-based large vocabulary continuous speech recognition}, journal = {IEEE Transactions On Speech and Audio Processing}, year = {2001}, }

</bibentry>

<bibentry> @InProceedings{ganapathiraju1997syllable, title = {Syllable-a promising recognition unit for {LVCSR}}, author ={ Ganapathiraju, A. and Goel, V. and Picone, J. and Corrada, A. and Doddington, G. and Kirchhoff, K. and Ordowski, M. and Wheatley, B.}, year=1997, booktitle = "ASRU" }

</bibentry>

<bibentry> @inproceedings{sethy2003split-lexicon, author = {Sethy, Abhinav and Narayanan, Shrikanth }, booktitle = {ICASSP}, title = {Split-Lexicon based hierarchical recognition of speech using syllable and word level acoustic units}, year = {2003} }

</bibentry>

<bibentry> @unpublished{hasegawa2007dictionary,

title = {The Illinois Speech and Language Engineering Dictionary},
author = {M. Hasegawa-Johnson},
year = {2007},
month = {June},
note = {http://www.isle.uiuc.edu/dict/index.html}

}

</bibentry>

<bibentry> @inproceedings{wu1998incorporating, author = " S.-L. Wu and B.E.D. Kingsbury and N. Morgan and S. Greenberg", title = "Incorporating Information From Syllable-length Time Scales into Automatic Speech Recognition", booktitle = {ICASSP}, year = "1998"}

- - REF***

</bibentry>

<bibentry> @unpublished{noel1997alphadigits,

title = {Alphadigits corpus},
author = {M. Noel},
year = {1997},
note = {http://cslu.cse.ogi.edu/corpora/alphadigit/}

}

- - REF***

</bibentry>

<bibentry> @InProceedings{hamalainen2007modelling, author = {A. H\"{a}m\"{a}l\"{a}inen and L. {ten Bosch} and L. Boves}, title = {Modelling Pronunciation Variation Using Multi-Path {HMM}s for Syllables}, booktitle = {ICASSP}, year = {2007}, }

</bibentry>

<bibentry> @Article{hamalainen2007on, author = {A. H\"{a}m\"{a}l\"{a}inen and L. Boves and J. {de Veth} and L. {ten Bosch}}, title = {On the Utility of Syllable-Based Acoustic Models for Pronunciation Variation Modelling}, journal = {EURASIP Journal on Audio, Speech, and Music Processing}, year = {2007}, }

</bibentry>

<bibentry> @TechReport{elffers2005ADAPT, author = {Elffers, B. and Van Bael, C. and Strik, H.}, title = "ADAPT: Algorithm for Dynamic Alignment of Phonetic Transcriptions", institution = {Department of Language and Speech, Radboud University Nijmegen, the Netherlands}, year = {2005}, note = {http://lands.let.ru.nl/literature/elffers.2005.1.pdf}, }

</bibentry>

<bibentry> @Article{fujimura1975syllable, author = {Fujimura, O.}, title = {Syllable as a Unit of Speech Recognition}, journal = {IEEE Trans. on Accoustics Speech and Signal Processing}, year = {1975}, }

</bibentry>

<bibentry> @article{hain2005Implicit,

author = {T. Hain},
title = {Implicit modelling of pronunciation variation in automatic speech recognition},
journal = {Speech Communication},
volume = {42},
number = {2},
year = {2005},

}

- - READ***

</bibentry>

<bibentry> @misc{riley1995automatic,

 author = "M. Riley and A. Ljolje",
 title = "Automatic generation of detailed pronunciation lexicons",
 text = "M. Riley and A. Ljolje, Automatic generation of detailed pronunciation
   lexicons.  Automatic Speech and Speaker Recognition: Advanced Topics. Kluwer.
   1995.",
 year = "1995",
 url = "citeseer.ist.psu.edu/riley95automatic.html" }

</bibentry>

<bibentry> @InProceedings{bourlard1996Copernicus, author = {Bourlard, H. and Hermansky, H. and Morgan, N.}, title = {Copernicus and the {ASR} challenge -- Waiting for Kepler}, booktitle = {proc. ARPA Speech Recognition Workshop}, year = {1996}, }

- - REF***

</bibentry>

<bibentry> @InProceedings{kantor2008stream, author = {A. Kantor and M. Hasegawa-Johnson}, title = {Stream Weight Tuning In Dynamic {B}ayesian Networks}, booktitle = {ICASSP}, year = {2008}, }

</bibentry>

<bibentry> @InProceedings{levinson1986continuously, author = {S. E. Levinson}, title = {Continuously variable duration hidden {M}arkov models for speech analysis}, booktitle = {ICASSP}, year = {1986}, }

</bibentry>

<bibentry> @InProceedings{kirchhoff2000combination, author = { K. Kirchhoff and J. Bilmes}, title = {Combination and Joint Training of Acoustic Classifiers for Speech Recognition}, booktitle = {Proceedings of {ASR}}, year = {2000}, }

</bibentry>

<bibentry> @InProceedings{young1994treebased,

 author = "S. Young and J. Odell and P. Woodland",
 title = "Tree-based state tying for high accuracy acoustic modelling",
 booktitle = {Proceedings of ARPA Workshop on Human Language Technology},
 year = "1994",
 url = "citeseer.ist.psu.edu/young94treebased.html" }

</bibentry>

<bibentry> @ARTICLE{hain2005automatic, title={Automatic transcription of conversational telephone speech}, author={Hain, T. and Woodland, P.C. and Evermann, G. and Gales, M.J.F. and Xunying Liu and Moore, G.L. and Povey, D. and Lan Wang}, journal={IEEE Transactions on Speech and Audio Processing}, year={Nov. 2005}, volume={13}, number={6}, pages={ 1173-1185}, keywords={ decoding, error statistics, hidden Markov models, interpolation, natural languages, speech coding, speech recognition Cambridge University HTK, acoustic modeling, automatic transcription, cepstral normalization, conversational telephone speech, decoding, front-end processing, heteroscedastic linear discriminant analysis, hidden Markov model, interpolation, language, lattice-based model adaptation, minimum phone error training, pronunciation modeling, score estimation, speaker adaptive training, speech recognition, word error rate}, doi={10.1109/TSA.2005.852999}, ISSN={1063-6676 }, }

- - READ more***

</bibentry>

<bibentry> @InProceedings{oostdijk2002experiences, author={N. Oostdijk and W. Goedetier and F. Van Eynde and L. Boves and J.P. Martens and M. Moortgat and H. Baayen}, title={Experiences from the Spoken {D}utch Corpus Project}, year= 2002, booktitle = {Proceedings of LREC}, }

</bibentry>

<bibentry> @misc{molau-efficient,

 author = "Sirko Molau and Stephan Kanthak and Hermann Ney",
 title = "Efficient Vocal Tract Normalization in Automatic Speech Recognition",
 url = "citeseer.ist.psu.edu/381929.html" }

- - REF*** if you feel like it

</bibentry>

<bibentry> @InProceedings{toth2005explicit, author={Toth, L. and Kocsor, A.}, year= 2005, title= "Explicit duration modelling in {HMM}/{ANN} Hybrids", booktitle={Proceedings of Text, Speech and Dialogue} }

</bibentry>

<bibentry> @InProceedings{ljolje2006pronunciation, author={Ljolje, A.}, year =2006, title="Pronunciation dependent language models", booktitle={Interspeech} }

</bibentry>

<bibentry> @article{chen2007factored,

author = {Ken. Chen and M. Hasegawa-Johnson and J. Cole},
title = "A Factored Language Model for Prosody-Dependent Speech Recognition",
journal = {Advanced Robotic Systems},
year = {2007},

}

</bibentry>

<bibentry> @InProceedings{borys2004Modeling, author={S. Borys and M. Hasegawa-Johnson and K. Chen and Aaron Cohen}, year =2004, title="Modeling and Recognition of Phonetic and Prosodic Factors for Improvements to Acoustic Speech Recognition Models", booktitle={Interspeech} }

</bibentry>

<bibentry> @InProceedings{greenberg1997origins,

 author = "S. Greenberg",
 title = "The Origins Of Speech Intelligibility In The Real World",
 booktitle= "ESCA Workshop on Robust Speech Recognition for Unknown Channels",
 year = "1997"}

Confidence measures for large vocabulary continuous speech recognition Wessel, F.; Schluter, R.; Macherey, K.; Ney, H. Page(s): 288-298 Digital Object Identifier 10.1109/89.906002

</bibentry>

<bibentry> @article{ostendorf1996from,

author = "M. Ostendorf and V. Digalakis and O. Kimball",
title = "From {HMM}s to segment models: a unified view of stochastic modeling for speech recognition",
journal = {IEEE Transactions On Audio, Speech, And Language Processing},
year = {1996},

}

</bibentry>

<bibentry> @article{baum1967inequality, author="L. E. Baum and J. A. Eagon", title="An inequality with applications to statistical estimation for probabilistic functions of {M}arkov processes and to a model for ecology", journal = {Bull. Amer. Math. Soc.}, volume = 73, pages = {360-363}, year = 1967}

- - READ***

</bibentry>

<bibentry> @article{juang1986maximum,

author = {B. H. Juang and S. E. Levinson and M. M. Sondhi},
title = {Maximum likelihood estimation for multivariate mixture observations of {M}arkov chains},
journal = {IEEE Trans. Inf. Theor.},
volume = {32},
number = {2},
year = {1986},
issn = {0018-9448},
pages = {307--309},
doi = {http://dx.doi.org/10.1109/TIT.1986.1057145},
publisher = {IEEE Press},
address = {Piscataway, NJ, USA},
}

- - READ***

</bibentry>

<bibentry> @InProceedings{halberstadt1998heterogeneous, author = "A. K. Halberstadt and J. R. Glass", title = "Heterogeneous measurements and multiple classifiers for speech recognition", booktitle= "ICSLP", year = "1998"}

- - READ***

</bibentry>

<bibentry> @inproceedings{halberstadt1997heterogeneous, author = "A. K. Halberstadt and J. R. Glass", title = "Heterogeneous Acoustic Measurements for Phonetic Classification", booktitle = "Eurospeech", pages = "401--404", year = "1997", url = "citeseer.ist.psu.edu/article/halberstadt97heterogeneous.html" }

- - READ***

</bibentry>

<bibentry> @article{glass2003probabilistic, author = "J. R. Glass", title = "A probabilistic framework for segment-based speech recognition", journal = "Computer Speech \& Language", volume = {17}, number = {2-3}, pages = "137-152", year = "2003"}

- - READ***

</bibentry>

<bibentry> @ARTICLE{zue1989Acoustic, title={Acoustic segmentation and phonetic classification in the {SUMMIT} system}, author={Zue, V. and Glass, J. and Philips, M. and Seneff, S.}, journal={ICASSP}, year={1989}, volume={}, number={}, pages={389-392}, keywords={speech recognitionSUMMIT system, acoustic segmentation, phonetic classification, speech knowledge, speech understanding system, spoken-language-understanding system}, doi={10.1109/ICASSP.1989.266447}, ISSN={}, }

- - READ***

</bibentry>

<bibentry> @inproceedings{hasegawa2007audiovisual, author = "M. Hasegawa-Johnson and K. Livescu and P. Lal and K. Saenko", title = "Audiovisual Speech Recognition with Articulator Positions as Hidden Variables", booktitle = "Proc. International Congress on Phonetic Sciences (ICPhS)",

year = "2007"}

</bibentry>

<bibentry> @inproceedings{nam2003competitive, author = "Nam, H. and Saltzman, E.", title = "A competitive, coupled oscillator of syllable structure", booktitle = "Proceedings of the XIIth International Congress of Phonetic Sciences",

year = "2003"}

</bibentry>

Bibliography

From SpeechWiki

Contents

To be added to references in Arthur's thesis

Lanugage Model stuff

Front End (roughly)

everything else

Views

Personal tools

Navigation

Toolbox

Search