@inproceedings{1ff8083e5d8043ecbb205fad42c0d909,
title = "Automatic Punctuation Restoration for corpus in Traditional Chinese Language using Deep Learning",
abstract = "The Automatic Speech Recognition (ASR) technique has already been applied to several chat apps, allowing people to orally input messages instead of typing words by hand. Meanwhile, ASR techniques have also been used in the transcription of meeting minutes from audio records. However, there exist two main reasons such that ASR systems are not suitable for some formal situations: wrong words caused by erroneous recognition and lacking punctuation marks, which degrade the readability and might express wrong meaning. In our work, we expect to set up a model to automatically restore punctuation marks for the corpus generated by ASR systems; however, since lacking such labeled data for our ASR corpus, we train and test our model totally on the corresponding transcript data. This research focuses on automatic punctuation restoration for traditional Chinese language corpus using neural network model. Our results show that the bidirectional Gated Recurrent Unit (GRU) with attention mechanism outperforms other models on our punctuation restoration task when the amount of the training data is limited. ",
keywords = "Automatic punctuation restoration, Deep Learning",
author = "Chao, {Yu Chieh} and Chang, {Chia Hui}",
note = "Publisher Copyright: {\textcopyright} 2020 IEEE.; 25th International Conference on Technologies and Applications of Artificial Intelligence, TAAI 2020 ; Conference date: 03-12-2020 Through 05-12-2020",
year = "2020",
month = dec,
doi = "10.1109/TAAI51410.2020.00025",
language = "???core.languages.en_GB???",
series = "Proceedings - 25th International Conference on Technologies and Applications of Artificial Intelligence, TAAI 2020",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "91--96",
booktitle = "Proceedings - 25th International Conference on Technologies and Applications of Artificial Intelligence, TAAI 2020",
}