@inproceedings{7646402f556e4df2af984a6bfc787c37,
title = "Evaluation via negativa of chinese word segmentation for information retrieval",
abstract = "Numerous studies have analyzed the influences of word segmentation (WS) performance on information retrieval (IR) for Mandarin Chinese and have demonstrated a non-monotonic relationship between WS accuracy and IR effectiveness. The usefulness of the compound words that have been a focus of the IR literature is not reflected by common WS evaluation metrics of word-based precision (P) and recall (R). This investigation proposes alternative measurements of WS accuracy, which are based on negative segments that are annotated against four standards of referenced corpora, called true negative rate (TNR) and negative predictive value (NPV), and compares with P and R through search engine simulation,. Accuracy-controlled WS systems segment queries for the simulation including NTCIR collections and Sogou logs. Mean average precision (MAP) estimates the similarity of search results between the original and segmented queries. The statistics demonstrate that TNR and NPV are generally more closely correlated with MAP than are P and R.",
keywords = "Information retrieval, True negative rate, Word segmentation",
author = "Jiang, {Mike Tian Jian} and Shih, {Cheng Wei} and Kuo, {Chan Hung} and Tsai, {Richard Tzong Han} and Hsu, {Wen Lian}",
year = "2011",
language = "???core.languages.en_GB???",
isbn = "9784905166023",
series = "PACLIC 25 - Proceedings of the 25th Pacific Asia Conference on Language, Information and Computation",
pages = "100--109",
booktitle = "PACLIC 25 - Proceedings of the 25th Pacific Asia Conference on Language, Information and Computation",
note = "25th Pacific Asia Conference on Language, Information and Computation, PACLIC 25 ; Conference date: 16-12-2011 Through 18-12-2011",
}