@inproceedings{a27cf9816e4f42d788dfae95a9f9b469,
title = "Objectionable content filtering by click-through data",
abstract = "This paper explores users' browsing intents to predict the category of a user's next access during web surfing, and applies the results to objectionable content filtering. A user's access trail represented as a sequence of URLs reveals the contextual information of web browsing behaviors. We extract behavioral features of each clicked URL, i.e., hostname, bag-of-words, gTLD, IP, and port, to develop a linear chain CRF model for context-aware category prediction. Large-scale experiments show that our method achieves a promising accuracy of 0.9396 for objectionable access identification without requesting their corresponding page content. Error analysis indicates that our proposed model results in a low false positive rate of 0.0571. In real-life filtering simulations, our proposed model accomplishes macro-averaging blocking rate 0.9271, while maintaining a favorably low macro-averaging over-blocking rate 0.0575 for collaboratively filtering objectionable content with time change on the dynamic web. Copyright is held by the owner/author(s).",
keywords = "Click-through mining, Collaborative filtering, Internet censorship",
author = "Lee, {Lung Hao} and Juan, {Yen Cheng} and Chen, {Hsin Hsi} and Tseng, {Yuen Hsien}",
year = "2013",
doi = "10.1145/2505515.2507849",
language = "???core.languages.en_GB???",
isbn = "9781450322638",
series = "International Conference on Information and Knowledge Management, Proceedings",
pages = "1581--1584",
booktitle = "CIKM 2013 - Proceedings of the 22nd ACM International Conference on Information and Knowledge Management",
note = "22nd ACM International Conference on Information and Knowledge Management, CIKM 2013 ; Conference date: 27-10-2013 Through 01-11-2013",
}