@inproceedings{ea23657cea9c4de08bab35ef8b6f9381,
title = "Observing the web by understanding the past: Archival internet research",
abstract = "This paper discusses the challenges and opportunities for using archival Internet data in order to observe a host of social science phenomena. Specifically, this paper introduces HistoryTracker, a new tool for accessing and extracting archived data from the Internet Archive, the largest repository of archived Web data in existence. The HistoryTracker tool serves to create a Web observatory that allows scholars to study the history of the Web. HistoryTracker takes advantages of Hadoop processing capacity, and allows researchers to extract large swaths of archived data into a link list format that can be easily transferred to a number of other analytical tools. A brief illustration of the use of HistoryTracker is presented demonstrating the use of the tool. Finally, a number of continuing research challenges are discussed, and future research opportunities are outlined.",
keywords = "Archived data, Data extraction, Network analysis, Occupy wall street, Social sciences, Web observatory",
author = "Weber, {Matthew S.}",
year = "2014",
month = apr,
day = "7",
doi = "10.1145/2567948.2579213",
language = "English (US)",
series = "WWW 2014 Companion - Proceedings of the 23rd International Conference on World Wide Web",
publisher = "Association for Computing Machinery, Inc",
pages = "1031--1036",
booktitle = "WWW 2014 Companion - Proceedings of the 23rd International Conference on World Wide Web",
note = "23rd International Conference on World Wide Web, WWW 2014 ; Conference date: 07-04-2014 Through 11-04-2014",
}