@inproceedings{25a9156f751841b0bcb65983e24c175c,
title = "Mining features for web ner model construction based on distant learning",
abstract = "In this paper, we study the problem of developing a WIDM NER tool to prepare training corpus from the Web for custom named entity recognition (NER) models via distant learning. We consider two major issues including efficient automatic labelling and effective feature mining for training accurate NER models via sequence labelling technique. While the idea of collecting training sentences from search snippets via known entities (seeds) is not new, efficient automatic labelling becomes an issue when we have a large number of seeds (e.g. 500K) and sentences (e.g. 2M). The second issue regards the mining of interesting terms or k-grams as features for supervised learning. We conduct experiments on four types of entity recognition including Chinese person name, food name, location name, and point of interest (POI) to demonstrate the improvement in efficiency and effectiveness with the proposed Web NER model construction tool.",
keywords = "Distant learning, Features mining, Scalable automatic labeling, Semi-supervised Learning, Sequence labeling",
author = "Chou, {Chien Lung} and Chang, {Chia Hui}",
note = "Publisher Copyright: {\textcopyright} 2017 IEEE.; 21st International Conference on Asian Language Processing, IALP 2017 ; Conference date: 05-12-2017 Through 07-12-2017",
year = "2017",
month = jul,
day = "2",
doi = "10.1109/IALP.2017.8300608",
language = "???core.languages.en_GB???",
series = "Proceedings of the 2017 International Conference on Asian Language Processing, IALP 2017",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "322--325",
editor = "Rong Tong and Yue Zhang and Yanfeng Lu and Minghui Dong",
booktitle = "Proceedings of the 2017 International Conference on Asian Language Processing, IALP 2017",
}