@inproceedings{197e1853ddb44a7cbf810a2f7a44093a,
title = "Code-switched Text Data Augmentation for Chinese-English Mixed Speech Recognition",
abstract = "Code-switching is a common mode of language expression, which means that two or more languages are used interchangeably in a conversation. At present, the development of such code-switching technology in the field of speech recognition research is still limited by insufficient training corpus of text, which affects the system performance. This paper will use a neural network to train a generator to generate code-switching text to expand the corpus to achieve the purpose of improving the mixed recognition rate of Chinese and English. Our method is to use the Chinese and English texts in the SEAME corpus to train the BERT-BiLSTM-CRF model and use the model to know the code-switching position, generating sentences that conform to the characteristics of this corpus. The experimental results show that the method in this paper has better performance than other methods.",
keywords = "BERT, Code-Switching, NLP, Speech Recognition",
author = "Lee, {Chung Ting} and Wang, {Teng Hui} and Liang, {Kai Wen} and Le, {Phuong Thi} and Li, {Yung Hui} and Wang, {Jia Ching}",
note = "Publisher Copyright: {\textcopyright} 2022 IEEE.; 11th IEEE Global Conference on Consumer Electronics, GCCE 2022 ; Conference date: 18-10-2022 Through 21-10-2022",
year = "2022",
doi = "10.1109/GCCE56475.2022.10014140",
language = "???core.languages.en_GB???",
series = "GCCE 2022 - 2022 IEEE 11th Global Conference on Consumer Electronics",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "922--923",
booktitle = "GCCE 2022 - 2022 IEEE 11th Global Conference on Consumer Electronics",
}