@inproceedings{73b168da25544b4eb2ec06d39b997ab4,
title = "PTT 網站餐廳美食類別擷取之研究",
abstract = "In this study, we hope to develop a system to automatically extract restaurant type from the FOOD board of PTT, the largest BBS web site in Taiwan. This paper is divided into three parts. The first part is pre-processing, where we crawl articles from the PTT FOOD board and extract title、restaurant name、telephone、address and URL information via regular expressions. The second part is restaurant type labeling from title data. We used WIDM NER TOOL to train a model for restaurant type extraction. The last part of the article is experiment. We randomly selected 10,000 titles for manual labeling and testing. We used the labeled data for supervised learning and included unlabeled data for Semi-Supervised learning. Finally we got a good result using this method in restaurant type extraction.",
keywords = "Distant Learning, Machine Learning, Named Entity Recognition, Tri-Training",
author = "Chung, {Chih Yu} and Chou, {Chien Lung} and Chang, {Chia Hui}",
note = "Publisher Copyright: {\textcopyright} The Association for Computational Linguistics and Chinese Language Processing; 29th Conference on Computational Linguistics and Speech Processing, ROCLING 2017 ; Conference date: 27-11-2017 Through 28-11-2017",
year = "2017",
month = nov,
day = "1",
language = "繁體中文",
series = "Proceedings of the 29th Conference on Computational Linguistics and Speech Processing, ROCLING 2017",
publisher = "The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)",
pages = "183--196",
editor = "Lun-Wei Ku and Yu Tsao and Chi-Chun Lee and Cheng-Zen Yang and Hung-Yi Lee and Tsai, {Richard T.-H.} and Wen-Hsiang Lu and Shih-Hung Wu",
booktitle = "Proceedings of the 29th Conference on Computational Linguistics and Speech Processing, ROCLING 2017",
}