@inproceedings{c3b021448fd34fc38aac611256f86cfe,
title = "Automatic information extraction for multiple singular web pages",
abstract = "The World WideWeb is now undeniably the richest and most dense source of information, yet its structure makes it difficult to make use of that information in a systematic way. This paper extends a pattern discovery approach called IEPAD to the rapid generation of information extractors that can extract structured data from semi-structured Web documents. IEPAD is proposed to automate wrapper generation from a multiple-record Web page without user-labeled examples. In this paper, we consider another case when multiple Web pages are available but each input Web page contains only one record (called singular Web pages). To solve this case, a hierarchical multiple string alignment is proposed to allow wrapper induction for multiple singular Web pages.",
author = "Chang, {Chia Hui} and Kuo, {Shih Chien} and Hwang, {Kuo Yu} and Ho, {Tsung Hsin} and Lin, {Chih Lung}",
note = "Publisher Copyright: {\textcopyright} Springer-Verlag Berlin Heidelberg 2002.; 6th Pacific-Asia Conference on Knowledge Discovery and Data Mining, PAKDD 2002 ; Conference date: 06-05-2002 Through 08-05-2002",
year = "2002",
doi = "10.1007/3-540-47887-6_29",
language = "???core.languages.en_GB???",
isbn = "9783540437048",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "297--303",
editor = "Ming-Syan Chen and Yu, {Philip S.} and Bing Liu",
booktitle = "Advances in Knowledge Discovery and Data Mining - 6th Pacific-Asia Conference, PAKDD 2002, Proceedings",
}