@inproceedings{03348b57a94340669682b039a4cd1ccb,
title = "Joint prosodic and spectral modeling for robust speaker verification",
abstract = "In this paper, a joint prosodic and spectral modeling framework is proposed instead of traditional score-domain fusion approaches to alleviate the problem of mismatch channel/handset/ambient noise. The basic idea is to embed the concept of hierarchical structure of speech prosody into an ergodic HMM (EHMM), and model the prosodic status transitions and prosodic/spectral features by EHMM's states, state transition probabilities and state-dependent observation distributions, respectively. Experimental results evaluated on the standard single-speaker detection task of NIST 2001 speaker recognition evaluation (NIST-SRE 2001) showed that the proposed approach not only outperformed the spectral feature-based baseline (8.04% vs. 8.64% in equal error rate, EER) but also worked a little bit better than score-domain fusion (8.44%) approach.",
author = "Liao, {Yuan Fu} and Chang, {Wen Chieh} and Xie, {Zong You} and Zeng, {Ding Yun} and Juang, {Yau Tarng}",
year = "2008",
language = "???core.languages.en_GB???",
isbn = "9780616220030",
series = "Proceedings of the 4th International Conference on Speech Prosody, SP 2008",
publisher = "International Speech Communications Association",
pages = "143--146",
booktitle = "Proceedings of the 4th International Conference on Speech Prosody, SP 2008",
note = "4th International Conference on Speech Prosody 2008, SP 2008 ; Conference date: 06-05-2008 Through 09-05-2008",
}