{ "items" : [ { "pdfurl" : "http://cs-www.cs.yale.edu/homes/dna/papers/FinalMPhil.pdf", "pub-type" : "misc", "uri" : "urn:f0ea7d848dca64a7975440527dc05d14", "date" : "2003", "author" : "Abadi, Daniel J.", "note" : "M.Phil. Thesis", "type" : "Publication", "year" : "2003", "howpublished" : "Cambridge University MPhil Dissertation", "abstract" : "Three different pronominal anaphora resolution techniques are examined. The first two techniques compare traditional salience-based approaches when different amounts of syntactic information are available. The improvement in pronoun resolution precision is quantified when a large scale grammar is used to extract detailed syntactic information rather than inferring this information robustly using pattern matching. The third technique uses domain knowledge instead of syntactic information to resolve pronouns. The domain knowledge required for this algorithm can be automatically acquired from a database backend schema representation of the domain. Each of these three techniques is evaluated separately, and then the domain-specific and non-domain-specific algorithms are combined and evaluated.", "label" : "Comparing Domain-Specific and Non-Domain-Specific Anaphora Resolution Techniques", "publicationtype" : "Thesis", "pdfkb" : "164", "key" : "abadi-anaphora" }, { "label" : "Marcus, Adam", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Marcus%2C+Adam", "original-name" : "Adam Marcus", "last-name" : "Marcus" }, { "pdfurl" : "http://cs-www.cs.yale.edu/homes/dna/papers/vcoko.pdf", "pub-type" : "misc", "venue" : "SIGMOD", "uri" : "urn:a6c62c8d7f0b23841ff631b2012decbb", "pages" : "617--617", "date" : "2002", "author" : [ "Abadi, Daniel J.", "Cherniack, Mitch" ], "type" : "Publication", "year" : "2002", "howpublished" : "Demonstration. SIGMOD", "abstract" : "Query optimization generates plans to retrieve data requested by queries. Query rewriting, which is the first step of this process, rewrites a query expression into an equivalent form to prepare it for plan generation. COKO-KOLA introduced a new approach to query rewriting that enables query rewrites to be formally verified using an automated theorem prover. KOLA is a language for expressing term rewriting rules that can be \'fired\' on query expressions. COKO is a language for expressing query rewriting transformations that are too complex to express with simple KOLA rules. COKO is a programming language designed for query optimizer development. Programming languages require debuggers, and in this demonstration, we illustrate our COKO debugger: Visual COKO. Visual COKO enables a query optimization developer to visually trace the execution of a COKO transformation. At every step of the transformation, the developer can view a tree-display that illustrates how the original query expression has evolved.", "label" : "Visual COKO: A Debugger for Query Optimizer Development", "publicationtype" : "Demonstration", "address" : "Madison, Wisconsin", "pdfkb" : "111", "key" : "vcoko" }, { "label" : "Maskey, Anurag S.", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Maskey%2C+Anurag+S.", "original-name" : "Anurag S. Maskey", "last-name" : "Maskey" }, { "pdfurl" : "http://cs-www.cs.yale.edu/homes/dna/papers/hadoopdb.pdf", "booktitle" : "VLDB", "pub-type" : "inproceedings", "venue" : "VLDB", "uri" : "urn:867d7624963359bf110dbb439a2870ea", "date" : "2009", "author" : [ "Abouzeid, Azza", "Bajda-Pawlikowski, Kamil", "Abadi, Daniel J.", "Silberschatz, Avi", "Rasin, Alexander" ], "type" : "Publication", "year" : "2009", "label" : "HadoopDB: An Architectural Hybrid of MapReduce and DBMS Technologies for Analytical Workloads", "publicationtype" : "Conference Paper", "address" : "Lyon, France", "pdfkb" : "400", "key" : "hadoopdb" }, { "label" : "Abouzeid, Azza", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Abouzeid%2C+Azza", "original-name" : "Azza Abouzeid", "last-name" : "Abouzeid" }, { "pdfurl" : "http://cs-www.cs.yale.edu/homes/dna/papers/vldb07hstore.pdf", "booktitle" : "VLDB", "pub-type" : "inproceedings", "venue" : "VLDB", "uri" : "urn:0f7bc08487dc57bb84c0270b783b4d41", "date" : "2007", "author" : [ "Stonebraker, Michael", "Madden, Samuel R.", "Abadi, Daniel J.", "Harizopoulos, Stavros", "Hachem, Nabil", "Helland, Pat" ], "type" : "Publication", "year" : "2007", "abstract" : "In previous papers, some of us predicted the end of \'one size fits all\' as a commercial relational DBMS paradigm. These papers presented reasons and experimental evidence that showed that the major relational RDBMS vendors can be outperformed by 1-2 orders of magnitude by specialized engines in the data warehouse, stream processing, text, and scientific database markets. Assuming that specialized engines dominate these markets over time, the current relational DBMS code lines will be left with the business data processing (OLTP) market and hybrid markets where more than one capability is required. In this paper we show that current RDBMSs can be beaten by nearly two orders of magnitude in the OLTP market as well. The experimental evidence comes from comparing a new OLTP prototype, H-Store, which we have built at M.I.T., to a popular RDBMS on the standard transactional benchmark, TPC-C. We conclude that current RDBMS code lines, while attempting to be a \'one size fits all\' solution, in fact, excel at nothing. Hence, they are 25 year old legacy code lines that should be retired in favor of a collection of \'from scratch\' specialized engines. The DBMS vendors (and research community) should start with a clean sheet of paper and design systems for tomorrow\'s requirements, not continue to push code lines and architectures designed for yesterday\'s needs.", "label" : "The End of an Architectural Era (It\'s Time for a Complete Rewrite)", "publicationtype" : "Conference Paper", "address" : "Vienna, Austria", "pdfkb" : "444", "key" : "hstore" }, { "label" : "Carney, Don", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Carney%2C+Don", "original-name" : "Don Carney", "last-name" : "Carney" }, { "label" : "Ryvkina, Esther", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Ryvkina%2C+Esther", "original-name" : "Esther Ryvkina", "last-name" : "Ryvkina" }, { "label" : "Rasin, Alex", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Rasin%2C+Alex", "original-name" : "Alex Rasin", "last-name" : "Rasin" }, { "label" : "Paulson, Erik", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Paulson%2C+Erik", "original-name" : "Erik Paulson", "last-name" : "Paulson" }, { "label" : "Kallman, Robert", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Kallman%2C+Robert", "original-name" : "Robert Kallman", "last-name" : "Kallman" }, { "pdfurl" : "http://cs-www.cs.yale.edu/homes/dna/papers/abadi-cloud-ieee09.pdf", "pub-type" : "misc", "venue" : "IEEE Data Engineering Bulletin", "uri" : "urn:8aad33b96e300b34d188179bca01b2d6", "pages" : "3-12", "date" : "2009-03", "author" : "Abadi, Daniel J.", "month" : "March", "type" : "Publication", "year" : "2009", "howpublished" : "IEEE Data Engineering Bulletin, 32(1)", "label" : "Data Management in the Cloud: Limitations and Opportunities", "publicationtype" : "Journal Article", "pdfkb" : "62", "key" : "abadi-ieee-cloud" }, { "label" : "Batkin, Adam", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Batkin%2C+Adam", "original-name" : "Adam Batkin", "last-name" : "Batkin" }, { "label" : "Hatoun, Matt", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Hatoun%2C+Matt", "original-name" : "Matt Hatoun", "last-name" : "Hatoun" }, { "label" : "Hwang, Jeong-Hyon", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Hwang%2C+Jeong-Hyon", "original-name" : "Jeong-Hyon Hwang", "last-name" : "Hwang" }, { "label" : "Singer, A.", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Singer%2C+A.", "original-name" : "A. Singer", "last-name" : "Singer" }, { "label" : "Cetintemel, Ugur", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Cetintemel%2C+Ugur", "original-name" : "Ugur Cetintemel", "last-name" : "Cetintemel" }, { "label" : "Hollenbach, Kate", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Hollenbach%2C+Kate", "original-name" : "Kate Hollenbach", "last-name" : "Hollenbach" }, { "label" : "Kimura, Hideaki", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Kimura%2C+Hideaki", "original-name" : "Hideaki Kimura", "last-name" : "Kimura" }, { "label" : "Balazinska, Magdalena", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Balazinska%2C+Magdalena", "original-name" : "Magdalena Balazinska", "last-name" : "Balazinska" }, { "label" : "Zdonik, Stan B.", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Zdonik%2C+Stan+B.", "original-name" : "Stan B. Zdonik", "last-name" : "Zdonik" }, { "label" : "O\'Neil, Patrick E.", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#O%27Neil%2C+Patrick+E.", "original-name" : "Patrick E. O\'Neil", "last-name" : "O\'Neil" }, { "label" : "Liang, Velen", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Liang%2C+Velen", "original-name" : "Velen Liang", "last-name" : "Liang" }, { "pdfurl" : "http://cs-www.cs.yale.edu/homes/dna/papers/abadisigmod06.pdf", "booktitle" : "SIGMOD", "pub-type" : "inproceedings", "venue" : "SIGMOD", "uri" : "urn:217de9092ca3f267826272889a979472", "pages" : "671--682", "date" : "2006", "author" : [ "Abadi, Daniel J.", "Madden, Samuel R.", "Ferreira, Miguel" ], "type" : "Publication", "year" : "2006", "abstract" : "Column-oriented database system architectures invite a re-evaluation of how and when data in databases is compressed. Storing data in a column-oriented fashion greatly increases the similarity of adjacent records on disk and thus opportunities for compression. The ability to compress many adjacent tuples at once lowers the per-tuple cost of compression, both in terms of CPU and space overheads. In this paper, we discuss how we extended C-Store (a column-oriented DBMS) with a compression sub-system. We show how compression schemes not traditionally used in row-oriented DBMSs can be applied to column-oriented systems. We then evaluate a set of compression schemes and show that the best scheme depends not only on the properties of the data but also on the nature of the query workload.", "label" : "Integrating Compression and Execution in Column-Oriented Database Systems", "publicationtype" : "Conference Paper", "address" : "Chicago, IL, USA", "pdfkb" : "265", "key" : "cstore-comp" }, { "label" : "Pavlo, Andrew", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Pavlo%2C+Andrew", "original-name" : "Andrew Pavlo", "last-name" : "Pavlo" }, { "label" : "Lindner, Wolfgang", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Lindner%2C+Wolfgang", "original-name" : "Wolfgang Lindner", "last-name" : "Lindner" }, { "label" : "Abadi, Daniel J.", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Abadi%2C+Daniel+J.", "original-name" : "Daniel J. Abadi", "last-name" : "Abadi" }, { "pdfurl" : "http://cs-www.cs.yale.edu/homes/dna/papers/AuroraDemo.pdf", "pub-type" : "misc", "venue" : "SIGMOD", "uri" : "urn:61c2b6408880f9d1986177444651ec74", "pages" : "666-666", "date" : "2003", "author" : [ "Abadi, Daniel J.", "Carney, Don", "Cetintemel, Ugur", "Cherniack, Mitch", "Convey, Christian", "Erwin, Christina", "Galvez, Eddie", "Hatoun, Matt", "Hwang, Jeong-Hyon", "Maskey, Anurag S.", "Rasin, Alexander", "Singer, A.", "Stonebraker, Michael", "Tatbul, Nesime", "Xing, Ying", "Yan, R.", "Zdonik, Stan B." ], "type" : "Publication", "year" : "2003", "howpublished" : "Demonstration. SIGMOD", "abstract" : "Streams are continuous data feeds generated by such sources as sensors, satellites, and stock feeds. Monitoring applications track data from numerous streams, filtering them for signs of abnormal activity, and processing them for purposes of filtering, aggregation, reduction, and correlation. Aurora is a general-purpose data stream manager that is being designed and implemented (at Brandeis University, Brown University, and M.I.T.) to efficiently support a variety of real-time monitoring applications.", "label" : "Aurora: A Data Stream Management System", "publicationtype" : "Demonstration", "address" : "San Diego, CA, USA", "pdfkb" : "151", "key" : "aurora-demo" }, { "pdfurl" : "http://cs-www.cs.yale.edu/homes/dna/papers/abadiphd.pdf", "pub-type" : "misc", "uri" : "urn:ba8a70237ac26acd488f480ab3c3fc60", "date" : "2008", "author" : "Abadi, Daniel J.", "award" : "2008 SIGMOD Jim Gray Doctoral Dissertation Award", "year" : "2008", "note" : "PhD Thesis", "type" : "Publication", "howpublished" : "MIT PhD Dissertation", "abstract" : "There are two obvious ways to map a two-dimension relational database table onto a one-dimensional storage interface: store the table row-by-row, or store the table column-by-column. Historically, database system implementations and research have focused on the row-by row data layout, since it performs best on the most common application for database systems: business transactional data processing. However, there are a set of emerging applications for database systems for which the row-by-row layout performs poorly. These applications are more analytical in nature, whose goal is to read through the data to gain new insight and use it to drive decision making and planning. In this dissertation, we study the problem of poor performance of row-by-row data layout for these emerging applications, and evaluate the column-by-column data layout opportunity as a solution to this problem. There have been a variety of proposals in the literature for how to build a database system on top of column-by-column layout. These proposals have different levels of implementation effort, and have different performance characteristics. If one wanted to build a new database system that utilizes the column-by-column data layout, it is unclear which proposal to follow. This dissertation provides (to the best of our knowledge) the only detailed study of mutliple implementation approaches of such systems, categorizing the different approaches into three broad categories, and evaluating the tradeoffs between approaches. We conclude that building a query executer specifically designed for the column-by-column query layout is essensial to acheive good performance. Consequently, we describe the implementation of C-Store, a new database system with a storage layer and query executer built for column-by-column data layout. We introduce three new query execution technqiues that significantly improve performance. First, we look at the problem of integrating compression and execution so that the query executer is capable of directly operating on compressed data. This improves performance by improving I/O (less data needs to be read off disk), and CPU (the data need not be decompressed). We describe our solution to the problem of executer extensibility -- how can new compression techniques be added to the system without having to rewrite the operator code? Second, we analyze the problem of tuple construction (stitching together attributes from multiple columns into a row-oriented ``tuple\'\'). Tuple construction is required when operators need to access multiple attributes from the same tuple; however, if done at the wrong point in a query plan, a significant performance penalty is paid. We introduce an analytical model and some heuristics to use that help decide when in a query plan tuple construction should occur. Third, we introduce a new join technique, the ``invisible join\'\' that improves performance of a specific type of join that is common in the applications for which column-by-column data layout is a good idea. Finally, we benchmark performance of the complete C-Store database system against other column-oriented database system implementation approachs, and against row-oriented databases. We benchmark two applications. The first application is a typical analytical application for which column-by-column data layout is known to outperform row-by-row data layout. The second application is another emerging application, the Semantic Web, for which column-oriented database systems are not currently used. We find that on the first application, the complete C-Store system performed 10 to 18 times faster than alternative column-store implementation approaches, and 6 to 12 times faster than a commercial database system that uses a row-by-row data layout. On the Semantic Web application, we find that C-Store outperforms other state-of-the-art data management techniques by an order of magnitude, and outperforms other common data management technqiues by almost two orders of magnitude. Benchmark queries, which used to take multiple minutes to execute, can now be answered in several seconds.", "label" : "Query Execution in Column-Oriented Database Systems", "publicationtype" : "Thesis", "pdfkb" : "1301", "key" : "abadi-phd" }, { "pdfurl" : "http://cs-www.cs.yale.edu/homes/dna/papers/VLDB06.pdf", "booktitle" : "VLDB", "pub-type" : "inproceedings", "venue" : "VLDB", "uri" : "urn:521fb5b268807590adab6e9cea3ebf11", "pages" : "487--498", "date" : "2006", "author" : [ "Harizopoulos, Stavros", "Liang, Velen", "Abadi, Daniel J.", "Madden, Samuel R." ], "type" : "Publication", "year" : "2006", "abstract" : "Database systems have traditionally optimized performance for write-intensive workloads. Recently, there has been renewed interest in architectures that optimize read performance by using column-oriented data representation and light-weight compression. This previous work has shown that under certain broad classes of workloads, column-based systems can outperform row-based systems. Previous work, however, has not characterized the precise conditions under which a particular query workload can be expected to perform better on a column-oriented database. In this paper we first identify the distinctive components of a read-optimized DBMS and describe our implementation of a high-performance query engine that can operate on both row and column-oriented data. We then use our prototype to perform an in-depth analysis of the tradeoffs between column and row-oriented architectures. We explore these tradeoffs in terms of disk bandwidth, CPU cache latency, and CPU cycles. We show that for most database workloads, a carefully designed column system can outperform a carefully designed row system, sometimes by an order of magnitude. We also present an analytical model to predict whether a given workload on a particular hardware configuration is likely to perform better on a row or column-based system.", "label" : "Performance Tradeoffs in Read-Optimized Databases", "publicationtype" : "Conference Paper", "address" : "Seoul, Korea", "pdfkb" : "354", "key" : "cstore-perf" }, { "label" : "Jones, Evan", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Jones%2C+Evan", "original-name" : "Evan Jones", "last-name" : "Jones" }, { "label" : "Cherniack, Mitch", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Cherniack%2C+Mitch", "original-name" : "Mitch Cherniack", "last-name" : "Cherniack" }, { "label" : "Rasin, Alexander", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Rasin%2C+Alexander", "original-name" : "Alexander Rasin", "last-name" : "Rasin" }, { "label" : "Harizopoulos, Stavros", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Harizopoulos%2C+Stavros", "original-name" : "Stavros Harizopoulos", "last-name" : "Harizopoulos" }, { "label" : "Schuler, Jorg", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Schuler%2C+Jorg", "original-name" : "Jorg Schuler", "last-name" : "Schuler" }, { "pdfurl" : "http://cs-www.cs.yale.edu/homes/dna/papers/benchmarks-sigmod09.pdf", "booktitle" : "SIGMOD", "pub-type" : "inproceedings", "venue" : "SIGMOD", "uri" : "urn:f314e37c058de97009e7bc8c3d44be31", "date" : "2009", "author" : [ "Pavlo, Andrew", "Paulson, Erik", "Rasin, Alexander", "Abadi, Daniel J.", "DeWitt, David J.", "Madden, Samuel R.", "Stonebraker, Michael" ], "type" : "Publication", "year" : "2009", "label" : "A Comparison of Approaches to Large Scale Data Analysis", "publicationtype" : "Conference Paper", "address" : "Providence, Rhode Island, USA", "pdfkb" : "251", "key" : "benchmarks-sigmod09" }, { "label" : "Ferreira, Miguel", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Ferreira%2C+Miguel", "original-name" : "Miguel Ferreira", "last-name" : "Ferreira" }, { "label" : "Zhang, Yang", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Zhang%2C+Yang", "original-name" : "Yang Zhang", "last-name" : "Zhang" }, { "label" : "Lau, Edmond", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Lau%2C+Edmond", "original-name" : "Edmond Lau", "last-name" : "Lau" }, { "label" : "DeWitt, David J.", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#DeWitt%2C+David+J.", "original-name" : "David J. DeWitt", "last-name" : "DeWitt" }, { "pdfurl" : "http://cs-www.cs.yale.edu/homes/dna/papers/abadi-rdf-vldbj.pdf", "pub-type" : "misc", "venue" : "VLDB Journal", "uri" : "urn:3aac45e0645490a7c35c66ce664e650e", "pages" : "385--406", "date" : "2009-04", "author" : [ "Abadi, Daniel J.", "Marcus, Adam", "Madden, Samuel R.", "Hollenbach, Kate" ], "month" : "April", "type" : "Publication", "year" : "2009", "howpublished" : "VLDB Journal, 18(2)", "label" : "SW-Store: A Vertically Partitioned DBMS for Semantic Web Data Management", "publicationtype" : "Journal Article", "pdfkb" : "1124", "key" : "abadi-swstore" }, { "pdfurl" : "http://cs-www.cs.yale.edu/homes/dna/papers/vldb04.pdf", "pub-type" : "misc", "venue" : "VLDB", "uri" : "urn:451206a42d2321d5a49fec668a9a6abd", "pages" : "1361--1364", "date" : "2004", "author" : [ "Abadi, Daniel J.", "Lindner, Wolfgang", "Madden, Samuel R.", "Schuler, Jorg" ], "type" : "Publication", "year" : "2004", "howpublished" : "Demonstration. VLDB", "abstract" : "This demonstration shows an integrated query processing environment where users can seamlessly query both a data stream management system and a sensor network with one query expression. By integrating the two query processing systems, the optimization goals of the sensor network (primarily power) and server network (primarily latency and quality) can be unified into one quality of service metric.", "label" : "An Integration Framework for Sensor Networks and Data Stream Management Systems", "publicationtype" : "Demonstration", "address" : "Toronto, Canada", "pdfkb" : "116", "key" : "sensor-stream-integration" }, { "label" : "Xing, Ying", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Xing%2C+Ying", "original-name" : "Ying Xing", "last-name" : "Xing" }, { "label" : "Convey, Christian", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Convey%2C+Christian", "original-name" : "Christian Convey", "last-name" : "Convey" }, { "label" : "Tatbul, Nesime", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Tatbul%2C+Nesime", "original-name" : "Nesime Tatbul", "last-name" : "Tatbul" }, { "label" : "Hugg, John", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Hugg%2C+John", "original-name" : "John Hugg", "last-name" : "Hugg" }, { "id" : "9ea9632e1b3f95c1163dc975cf4ae827", "pdfurl" : "http://cs-www.cs.yale.edu/homes/dna/papers/VisualCOKOThesis.pdf", "pub-type" : "misc", "uri" : "urn:9ea9632e1b3f95c1163dc975cf4ae827", "date" : "2002", "author" : "Abadi, Daniel J.", "type" : "Publication", "note" : "Undergraduate Thesis", "year" : "2002", "howpublished" : "Brandeis University Senior Honors Thesis", "abstract" : "Query optimization generates plans to retrieve data requested by queries, and query rewriting (rewriting a query expression into an equivalent form to prepare it for plan generation) is typically the first step. COKO-KOLA introduced a new approach to query rewriting that enables query rewrites to be formally verified using an automated theorem prover. KOLA is a language for expressing rewriting rules that can be fired on query expressions. COKO is a language for expressing query rewriting transformations that are too complex to express with simple KOLA rules. COKO is a programming language designed for query optimizer development. Programming languages require debuggers, and this paper describes a COKO debugger: Visual COKO. Visual COKO enables a query optimization developer to visually trace the execution of a COKO transformation. At every step of the transformation, the developer can view a tree-display that illustrates how the original query expression has evolved. Rule-based query rewriting and the COKO-KOLA project are described for background. Then the COKO syntax is summarized from the point of view of the COKO programmer using an example query transformation that converts query predicates to conjunctive normal form. Visual COKO is described and instructions for its use are presented. Finally, a description of its implementation is given.", "label" : "Visual COKO: A Debugger for Query Optimizer Development", "publicationtype" : "Thesis", "pdfkb" : "132", "key" : "abadi-ugrad-thesis" }, { "label" : "Lee, Sangdon", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Lee%2C+Sangdon", "original-name" : "Sangdon Lee", "last-name" : "Lee" }, { "label" : "Madden, Samuel R.", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Madden%2C+Samuel+R.", "original-name" : "Samuel R. Madden", "last-name" : "Madden" }, { "label" : "Bajda-Pawlikowski, Kamil", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Bajda-Pawlikowski%2C+Kamil", "original-name" : "Kamil Bajda-Pawlikowski", "last-name" : "Bajda-Pawlikowski" }, { "label" : "O\'Neil, Elizabeth J.", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#O%27Neil%2C+Elizabeth+J.", "original-name" : "Elizabeth J. O\'Neil", "last-name" : "O\'Neil" }, { "pdfurl" : "http://cs-www.cs.yale.edu/homes/dna/papers/abadi-sigmod08.pdf", "booktitle" : "SIGMOD", "pub-type" : "inproceedings", "venue" : "SIGMOD", "uri" : "urn:570d036dc91a8e8a88297d4e7d5ea32a", "date" : "2008", "author" : [ "Abadi, Daniel J.", "Madden, Samuel R.", "Hachem, Nabil" ], "type" : "Publication", "year" : "2008", "label" : "Column-Stores vs. Row-Stores: How Different Are They Really?", "publicationtype" : "Conference Paper", "address" : "Vancouver, Canada", "pdfkb" : "424", "key" : "abadi-sigmod08" }, { "pdfurl" : "http://cs-www.cs.yale.edu/homes/dna/papers/vldb095.pdf", "pub-type" : "misc", "venue" : "VLDB Journal", "uri" : "urn:8a8fd8faad1b206254345064d52e9beb", "pages" : "120--139", "date" : "2003-09", "author" : [ "Abadi, Daniel J.", "Carney, Don", "Cetintemel, Ugur", "Cherniack, Mitch", "Convey, Christian", "Lee, Sangdon", "Stonebraker, Michael", "Tatbul, Nesime", "Zdonik, Stan B." ], "month" : "September", "type" : "Publication", "year" : "2003", "howpublished" : "VLDB Journal, 12(2)", "abstract" : "This paper describes the basic processing model and architecture of Aurora, a new system to manage data streams for monitoring applications. Monitoring applications differ substantially from conventional business data processing. The fact that a software system must process and react to continual inputs from many sources (e.g., sensors) rather than from human operators requires one to rethink the fundamental architecture of a DBMS for this application area. In this paper, we present Aurora, a new DBMS currently under construction at Brandeis University, Brown University, and M.I.T. We first provide an overview of the basic Aurora model and architecture and then describe in detail a stream-oriented set of operators.", "label" : "Aurora: A New Model and Architecture for Data Stream Management", "publicationtype" : "Journal Article", "pdfkb" : "984", "key" : "aurora" }, { "label" : "Natkins, Jonathan", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Natkins%2C+Jonathan", "original-name" : "Jonathan Natkins", "last-name" : "Natkins" }, { "pdfurl" : "http://cs-www.cs.yale.edu/homes/dna/papers/hstore-demo.pdf", "pub-type" : "misc", "venue" : "VLDB", "uri" : "urn:9eb56eef3356d136bd37c95eebbd42e0", "date" : "2008", "author" : [ "Kallman, Robert", "Kimura, Hideaki", "Natkins, Jonathan", "Pavlo, Andrew", "Rasin, Alex", "Zdonik, Stan", "Jones, Evan", "Zhang, Yang", "Madden, Samuel", "Stonebraker, Michael", "Hugg, John", "Abadi, Daniel J." ], "year" : "2008", "type" : "Publication", "howpublished" : "Demonstration. VLDB", "label" : "H-Store: A High-Performance, Distributed Main Memory Transaction Processing System", "publicationtype" : "Demonstration", "address" : "Aukland, New Zealand", "pdfkb" : "440", "key" : "hstore-demo" }, { "label" : "Chen, Xuedong", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Chen%2C+Xuedong", "original-name" : "Xuedong Chen", "last-name" : "Chen" }, { "pdfurl" : "http://cs-www.cs.yale.edu/homes/dna/papers/abadivldb05.pdf", "booktitle" : "VLDB", "pub-type" : "inproceedings", "venue" : "VLDB", "uri" : "urn:ffe2d285721c608bf0a378e8472a836e", "pages" : "769--780", "date" : "2005", "author" : [ "Abadi, Daniel J.", "Madden, Samuel R.", "Lindner, Wolfgang" ], "type" : "Publication", "year" : "2005", "abstract" : "This paper presents a set of algorithms for efficiently evaluating join queries over static data tables in sensor networks. We describe and evaluate three algorithms that take advantage of distributed join techniques. Our algorithms are capable of running in limited amounts of RAM, can distribute the storage burden over groups of nodes, and are tolerant to dropped packets and node failures. REED is thus suitable for a wide range of event-detection applications that traditional sensor network database and data collection systems cannot be used to implement.", "label" : "REED: Robust, Efficient Filtering and Event Detection in Sensor Networks", "publicationtype" : "Conference Paper", "address" : "Trondheim, Norway", "pdfkb" : "292", "key" : "reed" }, { "label" : "Lin, Amerson", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Lin%2C+Amerson", "original-name" : "Amerson Lin", "last-name" : "Lin" }, { "pdfurl" : "http://cs-www.cs.yale.edu/homes/dna/papers/oltpperf-sigmod08.pdf", "booktitle" : "SIGMOD", "pub-type" : "inproceedings", "venue" : "SIGMOD", "uri" : "urn:affc0d0f33a7e1776b1d4baf31c42ada", "date" : "2008", "author" : [ "Harizopoulos, Stavros", "Abadi, Daniel J.", "Madden, Samuel R.", "Stonebraker, Michael" ], "type" : "Publication", "year" : "2008", "label" : "OLTP Through the Looking Glass, And What We Found There", "publicationtype" : "Conference Paper", "address" : "Vancouver, Canada", "pdfkb" : "287", "key" : "oltp-perf" }, { "label" : "Ahmad, Yanif", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Ahmad%2C+Yanif", "original-name" : "Yanif Ahmad", "last-name" : "Ahmad" }, { "label" : "Yan, R.", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Yan%2C+R.", "original-name" : "R. Yan", "last-name" : "Yan" }, { "pdfurl" : "http://cs-www.cs.yale.edu/homes/dna/papers/bench.pdf", "pub-type" : "techreport", "institution" : "MIT", "uri" : "urn:cd590ace475d4603a5ceaaeeb233cda6", "date" : "2007", "number" : "MIT-CSAIL-TR-2007-036", "author" : [ "Abadi, Daniel J.", "Marcus, Adam", "Madden, Samuel R.", "Hollenbach, Kate" ], "year" : "2007", "type" : "Publication", "abstract" : "This report describes the Barton Libraries RDF dataset and Longwell query benchmark that we use for our recent VLDB paper on Scalable Semantic Web Data Management Using Vertical Partitioning", "label" : "Using The Barton Libraries Dataset As An RDF Benchmark", "publicationtype" : "Technical Report", "pdfkb" : "357", "key" : "barton-benchmark" }, { "pdfurl" : "http://cs-www.cs.yale.edu/homes/dna/papers/abadirdf.pdf", "booktitle" : "VLDB", "pub-type" : "inproceedings", "venue" : "VLDB", "uri" : "urn:cfa5460acf189cd7ff0a8ae31264a541", "date" : "2007", "author" : [ "Abadi, Daniel J.", "Marcus, Adam", "Madden, Samuel R.", "Hollenbach, Kate" ], "award" : "Best Paper Award", "type" : "Publication", "year" : "2007", "abstract" : "Efficient management of RDF data is an important factor in realizing the Semantic Web vision. Performance and scalability issues are becoming increasingly pressing as Semantic Web technology is applied to real-world applications. In this paper, we examine the reasons why current data management solutions for RDF data scale poorly, and explore the fundamental scalability limitations of these approaches. We review the state of the art for improving performance for RDF databases and consider a recent suggestion, \'property tables\'. We then discuss practically and empirically why this solution has undesirable features. As an improvement, we propose an alternative solution: vertically partitioning the RDF data. We compare the performance of vertical partitioning with prior art on queries generated by a Web-based RDF browser over a large-scale (more than 50 million triples) catalog of library data. Our results show that a vertical partitioned schema achieves similar performance to the property table technique while being much simpler to design. Further, if a column-oriented DBMS (a database architected specially for the vertically partitioned case) is used instead of a row-oriented DBMS, another order of magnitude performance improvement is observed, with query times dropping from minutes to several seconds.", "label" : "Scalable Semantic Web Data Management Using Vertical Partitioning", "publicationtype" : "Conference Paper", "address" : "Vienna, Austria", "pdfkb" : "246", "key" : "abadi-rdf" }, { "label" : "Silberschatz, Avi", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Silberschatz%2C+Avi", "original-name" : "Avi Silberschatz", "last-name" : "Silberschatz" }, { "label" : "Erwin, Christina", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Erwin%2C+Christina", "original-name" : "Christina Erwin", "last-name" : "Erwin" }, { "label" : "Stonebraker, Michael", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Stonebraker%2C+Michael", "original-name" : "Michael Stonebraker", "last-name" : "Stonebraker" }, { "pdfurl" : "http://cs-www.cs.yale.edu/homes/dna/papers/vldb.pdf", "booktitle" : "VLDB", "pub-type" : "inproceedings", "venue" : "VLDB", "uri" : "urn:109c5b6b52727ceee286929f8c2d8cfb", "pages" : "553--564", "date" : "2005", "author" : [ "Stonebraker, Michael", "Abadi, Daniel J.", "Batkin, Adam", "Chen, Xuedong", "Cherniack, Mitch", "Ferreira, Miguel", "Lau, Edmond", "Lin, Amerson", "Madden, Samuel R.", "O\'Neil, Elizabeth J.", "O\'Neil, Patrick E.", "Rasin, Alexander", "Tran, Nga", "Zdonik, Stan B." ], "type" : "Publication", "year" : "2005", "abstract" : "This paper presents the design of a read-optimized relational DBMS that contrasts sharply with most current systems, which are write-optimized. Among the many differences in its design are: storage of data by column rather than by row, careful coding and packing of objects into storage including main memory during query processing, storing an overlapping collection of column-oriented projections, rather than the current fare of tables and indexes, a non-traditional implementation of transactions which includes high availability and snapshot isolation for read-only transactions, and the extensive use of bitmap indexes to complement B-tree structures. We present preliminary performance data on a subset of TPC-H and show that the system we are building, C-Store, is substantially faster than popular commercial products. Hence, the architecture looks very encouraging.", "label" : "C-Store: A Column-Oriented DBMS", "publicationtype" : "Conference Paper", "address" : "Trondheim, Norway", "pdfkb" : "170", "key" : "cstore" }, { "label" : "Helland, Pat", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Helland%2C+Pat", "original-name" : "Pat Helland", "last-name" : "Helland" }, { "pdfurl" : "http://cs-www.cs.yale.edu/homes/dna/papers/abadiicde2007.pdf", "booktitle" : "ICDE", "pub-type" : "inproceedings", "venue" : "ICDE", "uri" : "urn:f8a5852e1a905a66304a954b67aa74dc", "pages" : "466--475", "date" : "2007", "author" : [ "Abadi, Daniel J.", "Myers, Daniel S.", "DeWitt, David J.", "Madden, Samuel R." ], "type" : "Publication", "year" : "2007", "abstract" : "There has been renewed interest in column-oriented database architectures in recent years. For read-mostly query workloads such as those found in data warehouse and decision support applications, column-stores have been shown to perform particularly well relative to row-stores. In order for column-stores to be readily adopted as a replacement for row-stores, however, they must present the same interface to client applications as do row stores, which implies that they must output row-store-style tuples. Thus, the input columns stored on disk must be converted to rows at some point in the query plan, but the optimal point at which to do the conversion is not obvious. This problem can be considered as the opposite of the projection problem in row-store systems: while row-stores need to determine where in query plans to place projection operators to make tuples narrower, column-stores need to determine when to combine single-column projections into wider tuples. This paper describes a variety of strategies for tuple construction and intermediate result representations and provides a systematic evaluation of these strategies.", "label" : "Materialization Strategies in a Column-Oriented DBMS", "publicationtype" : "Conference Paper", "address" : "Istanbul, Turkey", "pdfkb" : "327", "key" : "cstore-mat" }, { "label" : "Hachem, Nabil", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Hachem%2C+Nabil", "original-name" : "Nabil Hachem", "last-name" : "Hachem" }, { "label" : "Tran, Nga", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Tran%2C+Nga", "original-name" : "Nga Tran", "last-name" : "Tran" }, { "label" : "Galvez, Eddie", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Galvez%2C+Eddie", "original-name" : "Eddie Galvez", "last-name" : "Galvez" }, { "label" : "Madden, Samuel", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Madden%2C+Samuel", "original-name" : "Samuel Madden", "last-name" : "Madden" }, { "pdfurl" : "http://cs-www.cs.yale.edu/homes/dna/papers/abadicidr07.pdf", "booktitle" : "CIDR", "pub-type" : "inproceedings", "venue" : "CIDR", "uri" : "urn:017beec44686f6b1a98acf968cdad9d3", "date" : "2007", "author" : "Abadi, Daniel J.", "type" : "Publication", "year" : "2007", "abstract" : "While it is generally accepted that data warehouses and OLAP workloads are excellent applications for column-stores, this paper speculates that column-stores may well be suited for additional applications. In particular we observe that column-stores do not see a performance degradation when storing extremely wide tables, and column-stores handle sparse data very well. These two properties lead us to conjecture that column-stores may be good storage layers for Semantic Web data, XML data, and data with GEM-style schemas.", "label" : "Column Stores for Wide and Sparse Data", "publicationtype" : "Conference Paper", "address" : "Asilomar, CA, USA", "pdfkb" : "156", "key" : "abadi-cidr" }, { "pdfurl" : "http://cs-www.cs.yale.edu/homes/dna/papers/cidr05.pdf", "booktitle" : "CIDR", "pub-type" : "inproceedings", "venue" : "CIDR", "uri" : "urn:9cdb4fe774b9a78952f479abcdbd5070", "date" : "2005", "author" : [ "Abadi, Daniel J.", "Ahmad, Yanif", "Balazinska, Magdalena", "Cetintemel, Ugur", "Cherniack, Mitch", "Hwang, Jeong-Hyon", "Lindner, Wolfgang", "Maskey, Anurag S.", "Rasin, Alexander", "Ryvkina, Esther", "Tatbul, Nesime", "Xing, Ying", "Zdonik, Stan B." ], "type" : "Publication", "year" : "2005", "abstract" : "Borealis is a second-generation distributed stream processing engine that is being developed at Brandeis University, Brown University, and MIT. Borealis inherits core stream processing functionality from Aurora and distribution functionality from Medusa. Borealis modifies and extends both systems in non-trivial and critical ways to provide advanced capabilities that are commonly required by newly-emerging stream processing applications. In this paper, we outline the basic design and functionality of Borealis. Through sample real-world applications, we motivate the need for dynamically revising query results and modifying query specifications. We then describe how Borealis addresses these challenges through an innovative set of features, including revision records, time travel, and control lines. Finally, we present a highly flexible and scalable QoS-based optimization model that operates across server and sensor networks and a new fault-tolerance model with flexible consistency-availability trade-offs.", "label" : "The Design of the Borealis Stream Processing Engine", "publicationtype" : "Conference Paper", "address" : "Asilomar, CA, USA", "pdfkb" : "143", "key" : "borealis" }, { "label" : "Zdonik, Stan", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Zdonik%2C+Stan", "original-name" : "Stan Zdonik", "last-name" : "Zdonik" }, { "label" : "Myers, Daniel S.", "type" : "Author", "uri" : "http://cs-www.cs.yale.edu/homes/dna/pubs/abadirefs.bib#Myers%2C+Daniel+S.", "original-name" : "Daniel S. Myers", "last-name" : "Myers" } ], "types" : { "Author" : { "uri" : "http://simile.mit.edu/2006/11/bibtex#Author" }, "Publication" : { "label" : "Publication", "uri" : "http://simile.mit.edu/2006/11/bibtex#Publication", "pluralLabel" : "Publications" } }, "properties" : { "number" : { "uri" : "http://simile.mit.edu/2006/11/bibtex#number" }, "note" : { "uri" : "http://simile.mit.edu/2006/11/bibtex#note" }, "author" : { "reverseLabel" : "authors of", "label" : "author", "groupingLabel" : "their authors", "uri" : "http://simile.mit.edu/2006/11/bibtex#author", "reverseGroupingLabel" : "what they author", "reversePluralLabel" : "authors of", "valueType" : "item", "pluralLabel" : "authors" }, "address" : { "uri" : "http://simile.mit.edu/2006/11/bibtex#address" }, "howpublished" : { "uri" : "http://simile.mit.edu/2006/11/bibtex#howpublished" }, "publicationtype" : { "uri" : "http://simile.mit.edu/2006/11/bibtex#publicationtype" }, "venue" : { "uri" : "http://simile.mit.edu/2006/11/bibtex#venue" }, "institution" : { "uri" : "http://simile.mit.edu/2006/11/bibtex#institution" }, "pages" : { "uri" : "http://simile.mit.edu/2006/11/bibtex#pages" }, "year" : { "uri" : "http://simile.mit.edu/2006/11/bibtex#year" }, "booktitle" : { "uri" : "http://simile.mit.edu/2006/11/bibtex#booktitle" }, "abstract" : { "uri" : "http://simile.mit.edu/2006/11/bibtex#abstract" }, "pdfkb" : { "uri" : "http://simile.mit.edu/2006/11/bibtex#pdfkb" }, "original-name" : { "uri" : "http://simile.mit.edu/2006/11/bibtex#original-name" }, "pdfurl" : { "uri" : "http://simile.mit.edu/2006/11/bibtex#pdfurl" }, "date" : { "uri" : "http://purl.org/dc/elements/1.1/date" }, "award" : { "uri" : "http://simile.mit.edu/2006/11/bibtex#award" }, "month" : { "uri" : "http://simile.mit.edu/2006/11/bibtex#month" }, "last-name" : { "uri" : "http://simile.mit.edu/2006/11/bibtex#last-name" }, "key" : { "uri" : "http://simile.mit.edu/2006/11/bibtex#key" } } }