@inproceedings{e731b987df45405cb226bfbfb20f22fc,
title = "Towards building a scholarly big data platform: Challenges, lessons and opportunities",
abstract = "We introduce a big data platform that provides various services for harvesting scholarly information and enabling efficient scholarly applications. The core architecture of the platform is built on a secured private cloud, crawls data using a scholarly focused crawler that leverages a dynamic scheduler, processes by utilizing a map reduce based crawl-extraction-ingestion (CEI) workflow, and is stored in distributed repositories and databases. Services such as scholarly data harvesting, information extraction, and user information and log data analytics are integrated into the platform and provided by an OAI and RESTful API. We also introduce a set of scholarly applications built on top of this platform including citation recommendation and collaborator discovery.",
keywords = "Big Data, Information Extraction, Scholarly Big Data",
author = "Zhaohui Wu and Jian Wu and Madian Khabsa and Kyle Williams and Chen, {Hung Hsuan} and Wenyi Huang and Suppawong Tuarob and Choudhury, {Sagnik Ray} and Alexander Ororbia and Prasenjit Mitra and Giles, {C. Lee}",
note = "Publisher Copyright: {\textcopyright} 2014 IEEE.; 2014 14th IEEE/ACM Joint Conference on Digital Libraries, JCDL 2014 ; Conference date: 08-09-2014 Through 12-09-2014",
year = "2014",
month = dec,
day = "1",
doi = "10.1109/JCDL.2014.6970157",
language = "???core.languages.en_GB???",
series = "Proceedings of the ACM/IEEE Joint Conference on Digital Libraries",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "117--126",
booktitle = "2014 IEEE/ACM Joint Conference on Digital Libraries, JCDL 2014",
}