@inproceedings{3763ddf2145040cfbedb83dd37dc165e,
title = "Effective web crawling for chinese addresses and associated information",
abstract = "With the advance of wireless networks, location-based services have become very important as people often need to query for addresses of unfamiliar locations through Web and then locate the position on the map. Existing geographic information systems based on crowd-sourcing are insufficient and have a slow update progress. However, it can actually be complemented by automatically extracting addresses of location entities and associated information from general pages. Thus, effectively crawling webpages with addresses is a practical challenge for enriching the location entity database. This research is devoted to automatic address and associated information extraction to provide information retrieval on maps, i.e. integrating the process of location entity query on Web and positioning on maps. We build a geographic information system of location entities by crawling the Web via three strategies for Chinese addresses. One point two seven (1.27) million distinct Chinese addresses are crawled using 1.08 million HTTP requests, leading to a return-of-investment of 1.169.",
keywords = "Associated information extraction, Chinese postal address extraction, Crawling strategies, Geographic information retrieval",
author = "Chuang, {Hsiu Min} and Chang, {Chia Hui} and Kao, {Ting Yao}",
note = "Publisher Copyright: {\textcopyright} Springer International Publishing Switzerland 2014.; 15th International Conference on E-Commerce and Web Technologies, EC-Web 2014 ; Conference date: 01-09-2014 Through 04-09-2014",
year = "2014",
doi = "10.1007/978-3-319-10491-1_2",
language = "???core.languages.en_GB???",
series = "Lecture Notes in Business Information Processing",
publisher = "Springer Verlag",
pages = "13--25",
editor = "Martin Hepp and Yigal Hoffner",
booktitle = "E-Commerce and Web Technologies - 15th International Conference, EC-Web 2014, Proceedings",
}