Merge pull request #9 from vincentlaucsb/cxx17

Upgrade to C++17
vincentlaucsb · Jul 1, 2018 · 8d4a98e · 8d4a98e
2 parents 9ff7e2e + ffdf601
commit 8d4a98e
Show file tree

Hide file tree

Showing 175 changed files with 7,730 additions and 4,684 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -4,19 +4,19 @@ matrix:
       addons:
         apt:
           sources: ['ubuntu-toolchain-r-test']
-          packages: ['g++-6', 'valgrind']
+          packages: ['g++-8', 'valgrind']
 dist: trusty
 sudo: required
 language:
   - cpp
 script:
-  - export CC=gcc-6
-  - export CXX=g++-6
+  - export CC=gcc-8
+  - export CXX=g++-8
   - make test_all
   # - valgrind --leak-check=full ./test_csv_parser
   # Disable until Travis updates their version of Valgrind
 after_success:
-  - if [ "$CXX" == "g++-6" ]; then
+  - if [ "$CXX" == "g++-8" ]; then
         cd test_results;
         bash <(curl -s https://codecov.io/bash);
     fi;
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,39 +1,48 @@
 cmake_minimum_required(VERSION 3.9)
 project(csv_parser)
 
-set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD 17)
 
 if (MSVC)
 else()
 	set(CMAKE_CXX_FLAGS "-pthread")
+	set(CMAKE_CXX_FLAGS_RELEASE "-O3")
 	set(CMAKE_CXX_FLAGS_DEBUG "-Og -g -lgcov --coverage")
 endif(MSVC)
 
-set(SOURCES src/csv_reader.cpp src/csv_stat.cpp)
+set(SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR}/src)
+set(TEST_DIR ${CMAKE_CURRENT_LIST_DIR}/tests)
+set(SOURCES
+    ${SOURCE_DIR}/csv_reader.cpp
+	${SOURCE_DIR}/csv_row.cpp
+    ${SOURCE_DIR}/csv_stat.cpp
+	${SOURCE_DIR}/data_type.cpp
+)
 set(TEST_SOURCES
-	tests/catch.hpp
-	tests/main.cpp
-	tests/test_data_type.cpp
-	tests/test_csv_stat.cpp
-	tests/test_read_csv.cpp
-	tests/test_write_csv.cpp
+	${TEST_DIR}/catch.hpp
+	${TEST_DIR}/main.cpp
+	${TEST_DIR}/test_csv_row.cpp
+	${TEST_DIR}/test_data_type.cpp
+	${TEST_DIR}/test_csv_stat.cpp
+	${TEST_DIR}/test_read_csv.cpp
+	${TEST_DIR}/test_write_csv.cpp
 )
 
-include_directories(${CMAKE_SOURCE_DIR}/src/)
-include_directories(${CMAKE_SOURCE_DIR}/tests/)
+include_directories(${SOURCE_DIR})
+include_directories(${TEST_DIR})
 
 ## Main Library
-add_library(csv ${SOURCES})
+add_library(csv STATIC ${SOURCES})
 set_target_properties(csv PROPERTIES LINKER_LANGUAGE CXX)
 
 ## Executables
-add_executable(csv_info programs/csv_info.cpp)
+add_executable(csv_info ${CMAKE_CURRENT_LIST_DIR}/programs/csv_info.cpp)
 target_link_libraries(csv_info csv)
 
-add_executable(csv_bench programs/csv_bench.cpp)
+add_executable(csv_bench ${CMAKE_CURRENT_LIST_DIR}/programs/csv_bench.cpp)
 target_link_libraries(csv_bench csv)
 
-add_executable(csv_stats programs/csv_stats.cpp)
+add_executable(csv_stats ${CMAKE_CURRENT_LIST_DIR}/programs/csv_stats.cpp)
 target_link_libraries(csv_stats csv)
 
 ## Tests

diff --git a/Doxyfile b/Doxyfile
@@ -228,7 +228,7 @@ TAB_SIZE               = 4
 # "Side Effects:". You can put \n's in the value part of an alias to insert
 # newlines.
 
-ALIASES                =
+ALIASES                = "complexity=@par Complexity:\n" 
 
 # This tag can be used to specify a number of word-keyword mappings (TCL only).
 # A mapping has the form "name=value". For example adding "class=itcl::class"
@@ -811,7 +811,7 @@ RECURSIVE              = NO
 # Note that relative paths are relative to the directory from which doxygen is
 # run.
 
-EXCLUDE                =
+EXCLUDE                = tests/catch.hpp
 
 # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
 # directories that are symbolic links (a Unix file system feature) are excluded
@@ -827,7 +827,7 @@ EXCLUDE_SYMLINKS       = NO
 # Note that the wildcards are matched against the file with absolute path, so to
 # exclude all test directories for example use the pattern */test/*
 
-EXCLUDE_PATTERNS       = *sqlite*
+EXCLUDE_PATTERNS       =
 
 # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
 # (namespaces, classes, functions, etc.) that should be excluded from the
@@ -844,7 +844,7 @@ EXCLUDE_SYMBOLS        =
 # that contain example code fragments that are included (see the \include
 # command).
 
-EXAMPLE_PATH           =
+EXAMPLE_PATH           = programs/ tests/
 
 # If the value of the EXAMPLE_PATH tag contains directories, you can use the
 # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and
@@ -933,7 +933,7 @@ USE_MDFILE_AS_MAINPAGE = README.md
 # also VERBATIM_HEADERS is set to NO.
 # The default value is: NO.
 
-SOURCE_BROWSER         = NO
+SOURCE_BROWSER         = YES
 
 # Setting the INLINE_SOURCES tag to YES will include the body of functions,
 # classes and enums directly into the documentation.
@@ -1416,7 +1416,7 @@ DISABLE_INDEX          = NO
 # The default value is: NO.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
-GENERATE_TREEVIEW      = NO
+GENERATE_TREEVIEW      = YES
 
 # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that
 # doxygen will group on one line in the generated HTML documentation.

diff --git a/Makefile b/Makefile
@@ -1,7 +1,7 @@
 BUILD_DIR = build
 TEST_DIR = tests
 IDIR = src/
-CFLAGS = -pthread -std=c++11
+CFLAGS = -pthread -std=c++17
 TFLAGS = -I$(IDIR) -Itests/ $(CFLAGS) -Og -g --coverage
 
 # Main Library
@@ -36,7 +36,7 @@ run_test_csv_parser: test_csv_parser
 code_cov: test_csv_parser
 	mkdir -p test_results
 	mv *.gcno *.gcda $(PWD)/test_results
-	gcov $(SOURCES) -o test_results --relative-only
+	gcov-8 $(SOURCES) -o test_results --relative-only
 	mv *.gcov test_results
 
 code_cov_report:

diff --git a/README.md b/README.md
@@ -11,8 +11,7 @@ This CSV parser uses multiple threads to simulatenously pull data from disk and
 ### RFC 4180 Compliance
 This CSV parser is much more than a fancy string splitter, and follows every guideline from [RFC 4180](https://www.rfc-editor.org/rfc/rfc4180.txt). On the other hand, it is also robust and capable of handling deviances from the standard. An optional strict parsing mode can be enabled to sniff out errors in files.
 
-### Easy to Use and Well-Documented
-https://vincentlaucsb.github.io/csv-parser
+### Easy to Use and [Well-Documented](https://vincentlaucsb.github.io/csv-parser)
 
 In additon to being easy on your computer's hardware, this library is also easy on you--the developer. Some helpful features include:
  * Decent ability to guess the dialect of a file (CSV, tab-delimited, etc.)
@@ -22,43 +21,36 @@ In additon to being easy on your computer's hardware, this library is also easy
 ### Well Tested
 
 ## Building
-All of this library's essentials are located under `src/`, with no dependencies aside from the STL. This is a C++11 library developed using Microsoft Visual Studio and compatible with g++ and clang. The CMakeList and Makefile contain instructions for building the main library, some sample programs, and the test suite.
+All of this library's essentials are located under `src/`, with no dependencies aside from the STL. This is a C++17 library developed using Microsoft Visual Studio and compatible with g++ and clang. The CMakeList and Makefile contain instructions for building the main library, some sample programs, and the test suite.
 
-**GCC/Clang Compiler Flags**: `-pthread-O3 -std=c++11`
+**GCC/Clang Compiler Flags**: `-pthread -O3 -std=c++17`
 
-## Features & Examples
-### Reading a Large File
-With this library, you can easily stream over a large file without reading its entirety into memory.
-
-```cpp
-# include "csv_parser.h"
-
-using namespace csv;
+### CMake Instructions
+If you're including this in another CMake project, you can simply clone this repo into your project directory, 
+and add the following to your CMakeLists.txt:
 
-...
+```
+include(${CMAKE_SOURCE_DIR}/.../csv-parser/CMakeLists.txt)
 
-CSVReader reader("very_big_file.csv");
-std::vector<std::string> row;
+# ...
 
-while (reader.read_row(row)) {
-    // Do stuff with row here
-}
+add_executable(<your program> ...)
+target_link_libraries(<your program> csv)
 
 ```
 
-### Reordering/Subsetting Data
-You can also reorder a CSV or only keep a subset of the data simply by passing
-in a vector of column indices.
+## Features & Examples
+### Reading a Large File
+With this library, you can easily stream over a large file without reading its entirety into memory.
 
 ```cpp
-# include "csv_parser.h"
+# include "csv_parser.hpp"
 
 using namespace csv;
 
 ...
 
-std::vector<size_t> new_order = { 0, 2, 3, 5 };
-CSVReader reader("very_big_file.csv", new_order);
+CSVReader reader("very_big_file.csv");
 std::vector<std::string> row;
 
 while (reader.read_row(row)) {
@@ -67,11 +59,13 @@ while (reader.read_row(row)) {
 
 ```
 
-### Automatic Type Conversions
+### Indexing Rows by Column Name and Type Conversions
 If your CSV has lots of numeric values, you can also have this parser automatically convert them to the proper data type.
+For efficiency, numeric values are lazily converted, and the indexing feature is implemented by having all rows
+share a pointer to the original set of column names.
 
 ```cpp
-# include "csv_parser.h"
+# include "csv_parser.hpp"
 
 using namespace csv;
 
@@ -80,14 +74,14 @@ using namespace csv;
 CSVReader reader("very_big_file.csv");
 std::vector<CSVField> row;
 
-size_t date = reader.index_of("timestamp");
-
 while (reader.read_row(row)) {
-    if (row[date].is_int())
-        row[date].get<int>();
+    if (row["timestamp"].is_int())
+        row["timestamp"].get<int>();
     
-    // get<std::string>() can be called on any values
-    std::cout << row[date].get<std::string>() << std::endl;
+    // get<>() returns a std::string_view of the original field
+    for (size_t i = 0; i < row.size(); i++) {
+        std::cout << row[i].get<>() << ...
+    }
 }
 
 ```
@@ -96,7 +90,7 @@ while (reader.read_row(row)) {
 Although the CSV parser has a decent guessing mechanism, in some cases it is preferrable to specify the exact parameters of a file.
 
 ```cpp
-# include "csv_parser.h"
+# include "csv_parser.hpp"
 # include ...
 
 using namespace csv;
@@ -120,31 +114,33 @@ while (reader.read_row(row)) {
 ### Parsing an In-Memory String
 
 ```cpp
-# include "csv_parser.h"
-# include ...
+# include "csv_parser.hpp"
 
 using namespace csv;
 
-int main() { 
-    std::string csv_string = "Actor,Character"
-        "Will Ferrell,Ricky Bobby\r\n"
-        "John C. Reilly,Cal Naughton Jr.\r\n"
-        "Sacha Baron Cohen,Jean Giard\r\n"
+...
 
-    // Method 1
-    std::deque<CSVRow> rows = parse(csv_string);
-    for (auto& r: rows) {
-        // Do stuff with row here
-    }
-
-    // Method 2
-    std::deque< std::vector<std::string> > rows = parse(csv_string);
-    for (auto& r: rows) {
-        // Do stuff with row here
-    }
+// Method 1: Using parse()
+std::string csv_string = "Actor,Character"
+    "Will Ferrell,Ricky Bobby\r\n"
+    "John C. Reilly,Cal Naughton Jr.\r\n"
+    "Sacha Baron Cohen,Jean Giard\r\n"
+
+auto rows = parse(csv_string);
+for (auto& r: rows) {
+    // Do stuff with row here
+}
     
-    // ..
+// Method 2: Using _csv operator
+auto rows = "Actor,Character"
+    "Will Ferrell,Ricky Bobby\r\n"
+    "John C. Reilly,Cal Naughton Jr.\r\n"
+    "Sacha Baron Cohen,Jean Giard\r\n"_csv;
+
+for (auto& r: rows) {
+    // Do stuff with row here
 }
+
 ```
 
 ### Writing CSV Files
@@ -169,7 +165,6 @@ writer << vector<string>({ "A", "B", "C" })
 
 ```
 
-
 ### Utility Functions
  * **Return column names:** get_col_names()
  * **Return the position of a column:** get_col_pos();