CSV Reader
#include <csv2/reader.hpp>
int main() {
csv2::Reader<delimiter<','>,
quote_character<'"'>,
first_row_is_header<true>,
trim_policy::trim_whitespace> csv;
if (csv.mmap("foo.csv")) {
const auto header = csv.header();
for (const auto row: csv) {
for (const auto cell: row) {
// Do something with cell value
// std::string value;
// cell.read_value(value);
}
}
}
}
Performance Benchmark
This benchmark measures the average execution time (of 5 runs after 3 warmup runs) for csv2
to memory-map the input CSV file and iterate over every cell in the CSV. See benchmark/main.cpp
for more details.
cd benchmark
g++ -I../include -O3 -std=c++11 -o main main.cpp
./main <csv_file>
Hardware
MacBook Pro (15-inch, 2019)
Processor: 2.4 GHz 8-Core Intel Core i9
Memory: 32 GB 2400 MHz DDR4
Operating System: macOS Catalina version 10.15.3
Results (as of 23 APR 2020)
Dataset | File Size | Rows | Cols | Time |
---|---|---|---|---|
Denver Crime Data | 111 MB | 479,100 | 19 | 0.174s |
AirBnb Paris Listings | 196 MB | 141,730 | 96 | 0.289s |
2015 Flight Delays and Cancellations | 574 MB | 5,819,079 | 31 | 1.047s |
StackLite: Stack Overflow questions | 870 MB | 17,203,824 | 7 | 1.505s |
Used Cars Dataset | 1.4 GB | 539,768 | 25 | 1.979s |
Title-Based Semantic Subject Indexing | 3.7 GB | 12,834,026 | 4 | 5.929s |
Bitcoin tweets - 16M tweets | 4 GB | 47,478,748 | 9 | 7.040s |
DDoS Balanced Dataset | 6.3 GB | 12,794,627 | 85 | 12.648s |
Seattle Checkouts by Title | 7.1 GB | 34,892,623 | 11 | 12.883s |
SHA-1 password hash dump | 11 GB | 2,62,974,241 | 2 | 19.505s |
DOHUI NOH scaled_data | 16 GB | 496,782 | 3213 | 32.780s |
Reader API
Here is the public API available to you:
template <class delimiter = delimiter<','>,
class quote_character = quote_character<'"'>,
class first_row_is_header = first_row_is_header<true>,
class trim_policy = trim_policy::trim_whitespace>
class Reader {
public:
// Use this if you'd like to mmap and read from file
bool mmap(string_type filename);
// Use this if you have the CSV contents in std::string already
bool parse(string_type contents);
// Shape
size_t rows() const;
size_t cols() const;
// Row iterator
// If first_row_is_header, row iteration will start
// from the second row
RowIterator begin() const;
RowIterator end() const;
// Access the first row of the CSV
Row header() const;
};
Here's the Row
class:
// Row class
class Row {
public:
// Get raw contents of the row
void read_raw_value(Container& value) const;
// Cell iterator
CellIterator begin() const;
CellIterator end() const;
};
and here's the Cell
class:
// Cell class
class Cell {
public:
// Get raw contents of the cell
void read_raw_value(Container& value) const;
// Get converted contents of the cell
// Handles escaped content, e.g.,
// """foo""" => ""foo""
void read_value(Container& value) const;
};
CSV Writer
This library also provides a basic csv2::Writer
class - one that can be used to write CSV rows to file. Here's a basic usage:
#include <csv2/writer.hpp>
#include <vector>
#include <string>
using namespace csv2;
int main() {
std::ofstream stream("foo.csv");
Writer<delimiter<','>> writer(stream);
std::vector<std::vector<std::string>> rows =
{
{"a", "b", "c"},
{"1", "2", "3"},
{"4", "5", "6"}
};
writer.write_rows(rows);
stream.close();
}
Writer API
Here is the public API available to you:
template <class delimiter = delimiter<','>>
class Writer {
public:
// Construct using an std::ofstream
Writer(output_file_stream stream);
// Use this to write a single row to file
void write_row(container_of_strings row);
// Use this to write a list of rows to file
void write_rows(container_of_rows rows);