Skip to content

Commit

Permalink
Merge pull request #138 from jameslamb/bugfix/parsing
Browse files Browse the repository at this point in the history
changed strategy for dropping duplicates (fixes #137)
  • Loading branch information
jameslamb committed Jan 30, 2019
2 parents 98172a9 + 9101883 commit ced8bb4
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 44 deletions.
10 changes: 10 additions & 0 deletions .ci/before_install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@

# failure is a natural part of life
set -e

# If language: r,
# install these testing packages we need
if [[ "$TASK" == "rpkg" ]];
then
Rscript -e "install.packages(c('devtools', 'knitr', 'testthat', 'rmarkdown'), repos = 'http://cran.rstudio.com')"
fi
129 changes: 87 additions & 42 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ addons:
packages:
- oracle-java8-set-default

before_install:
- .ci/before_install.sh

# Manually specifying each build configuration.
# Would be better to figure out how to build a matrix with two
# languages but share the ES_VERSION matrix across all builds.
Expand All @@ -30,9 +33,10 @@ matrix:
- language: r
warnings_are_errors: true
cache: packages
env: ES_VERSION=1.0.0
env:
- ES_VERSION=1.0.0
- TASK=rpkg
install:
- Rscript -e "install.packages('devtools', repos = 'http://cran.rstudio.com')"
- Rscript -e "devtools::install('r-pkg')"
script:
- R CMD build r-pkg/
Expand All @@ -42,9 +46,10 @@ matrix:
- language: r
warnings_are_errors: true
cache: packages
env: ES_VERSION=1.4.4
env:
- ES_VERSION=1.4.4
- TASK=rpkg
install:
- Rscript -e "install.packages('devtools', repos = 'http://cran.rstudio.com')"
- Rscript -e "devtools::install('r-pkg')"
script:
- R CMD build r-pkg/
Expand All @@ -54,9 +59,10 @@ matrix:
- language: r
warnings_are_errors: true
cache: packages
env: ES_VERSION=1.7.2
env:
- ES_VERSION=1.7.2
- TASK=rpkg
install:
- Rscript -e "install.packages('devtools', repos = 'http://cran.rstudio.com')"
- Rscript -e "devtools::install('r-pkg')"
script:
- R CMD build r-pkg/
Expand All @@ -66,9 +72,10 @@ matrix:
- language: r
warnings_are_errors: true
cache: packages
env: ES_VERSION=2.0.2
env:
- ES_VERSION=2.0.2
- TASK=rpkg
install:
- Rscript -e "install.packages('devtools', repos = 'http://cran.rstudio.com')"
- Rscript -e "devtools::install('r-pkg')"
script:
- R CMD build r-pkg/
Expand All @@ -78,9 +85,10 @@ matrix:
- language: r
warnings_are_errors: true
cache: packages
env: ES_VERSION=2.1.2
env:
- ES_VERSION=2.1.2
- TASK=rpkg
install:
- Rscript -e "install.packages('devtools', repos = 'http://cran.rstudio.com')"
- Rscript -e "devtools::install('r-pkg')"
script:
- R CMD build r-pkg/
Expand All @@ -90,9 +98,10 @@ matrix:
- language: r
warnings_are_errors: true
cache: packages
env: ES_VERSION=2.2.2
env:
- ES_VERSION=2.2.2
- TASK=rpkg
install:
- Rscript -e "install.packages('devtools', repos = 'http://cran.rstudio.com')"
- Rscript -e "devtools::install('r-pkg')"
script:
- R CMD build r-pkg/
Expand All @@ -102,9 +111,10 @@ matrix:
- language: r
warnings_are_errors: true
cache: packages
env: ES_VERSION=2.3.5
env:
- ES_VERSION=2.3.5
- TASK=rpkg
install:
- Rscript -e "install.packages('devtools', repos = 'http://cran.rstudio.com')"
- Rscript -e "devtools::install('r-pkg')"
script:
- R CMD build r-pkg/
Expand All @@ -114,9 +124,10 @@ matrix:
- language: r
warnings_are_errors: true
cache: packages
env: ES_VERSION=5.0.2
env:
- ES_VERSION=5.0.2
- TASK=rpkg
install:
- Rscript -e "install.packages('devtools', repos = 'http://cran.rstudio.com')"
- Rscript -e "devtools::install('r-pkg')"
script:
- R CMD build r-pkg/
Expand All @@ -126,9 +137,10 @@ matrix:
- language: r
warnings_are_errors: true
cache: packages
env: ES_VERSION=5.3.3
env:
- ES_VERSION=5.3.3
- TASK=rpkg
install:
- Rscript -e "install.packages('devtools', repos = 'http://cran.rstudio.com')"
- Rscript -e "devtools::install('r-pkg')"
script:
- R CMD build r-pkg/
Expand All @@ -138,9 +150,10 @@ matrix:
- language: r
warnings_are_errors: true
cache: packages
env: ES_VERSION=5.4.3
env:
- ES_VERSION=5.4.3
- TASK=rpkg
install:
- Rscript -e "install.packages('devtools', repos = 'http://cran.rstudio.com')"
- Rscript -e "devtools::install('r-pkg')"
script:
- R CMD build r-pkg/
Expand All @@ -150,9 +163,10 @@ matrix:
- language: r
warnings_are_errors: true
cache: packages
env: ES_VERSION=5.6.9
env:
- ES_VERSION=5.6.9
- TASK=rpkg
install:
- Rscript -e "install.packages('devtools', repos = 'http://cran.rstudio.com')"
- Rscript -e "devtools::install('r-pkg')"
script:
- R CMD build r-pkg/
Expand All @@ -162,9 +176,10 @@ matrix:
- language: r
warnings_are_errors: true
cache: packages
env: ES_VERSION=6.0.1
env:
- ES_VERSION=6.0.1
- TASK=rpkg
install:
- Rscript -e "install.packages('devtools', repos = 'http://cran.rstudio.com')"
- Rscript -e "devtools::install('r-pkg')"
script:
- R CMD build r-pkg/
Expand All @@ -174,9 +189,10 @@ matrix:
- language: r
warnings_are_errors: true
cache: packages
env: ES_VERSION=6.1.4
env:
- ES_VERSION=6.1.4
- TASK=rpkg
install:
- Rscript -e "install.packages('devtools', repos = 'http://cran.rstudio.com')"
- Rscript -e "devtools::install('r-pkg')"
script:
- R CMD build r-pkg/
Expand All @@ -186,9 +202,10 @@ matrix:
- language: r
warnings_are_errors: true
cache: packages
env: ES_VERSION=6.2.4
env:
- ES_VERSION=6.2.4
- TASK=rpkg
install:
- Rscript -e "install.packages('devtools', repos = 'http://cran.rstudio.com')"
- Rscript -e "devtools::install('r-pkg')"
script:
- R CMD build r-pkg/
Expand All @@ -204,98 +221,126 @@ matrix:
pip install py-pkg/
script:
- pytest --verbose py-pkg/
env: ES_VERSION=1.0.0
env:
- ES_VERSION=1.0.0
- TASK=pypkg
- language: python
python: 3.5
install:
pip install py-pkg/
script:
- pytest --verbose py-pkg/
env: ES_VERSION=1.4.4
env:
- ES_VERSION=1.4.4
- TASK=pypkg
- language: python
python: 3.5
install:
pip install py-pkg/
script:
- pytest --verbose py-pkg/
env: ES_VERSION=1.7.2
env:
- ES_VERSION=1.7.2
- TASK=pypkg
- language: python
python: 3.5
install:
pip install py-pkg/
script:
- pytest --verbose py-pkg/
env: ES_VERSION=2.0.2
env:
- ES_VERSION=2.0.2
- TASK=pypkg
- language: python
python: 3.5
install:
pip install py-pkg/
script:
- pytest --verbose py-pkg/
env: ES_VERSION=2.1.2
env:
- ES_VERSION=2.1.2
- TASK=pypkg
- language: python
python: 3.5
install:
pip install py-pkg/
script:
- pytest --verbose py-pkg/
env: ES_VERSION=2.2.2
env:
- ES_VERSION=2.2.2
- TASK=pypkg
- language: python
python: 3.5
install:
pip install py-pkg/
script:
- pytest --verbose py-pkg/
env: ES_VERSION=2.3.5
env:
- ES_VERSION=2.3.5
- TASK=pypkg
- language: python
python: 3.5
install:
pip install py-pkg/
script:
- pytest --verbose py-pkg/
env: ES_VERSION=5.0.2
env:
- ES_VERSION=5.0.2
- TASK=pypkg
- language: python
python: 3.5
install:
pip install py-pkg/
script:
- pytest --verbose py-pkg/
env: ES_VERSION=5.3.3
env:
- ES_VERSION=5.3.3
- TASK=pypkg
- language: python
python: 3.5
install:
pip install py-pkg/
script:
- pytest --verbose py-pkg/
env: ES_VERSION=5.4.3
env:
- ES_VERSION=5.4.3
- TASK=pypkg
- language: python
python: 3.5
install:
pip install py-pkg/
script:
- pytest --verbose py-pkg/
env: ES_VERSION=5.6.9
env:
- ES_VERSION=5.6.9
- TASK=pypkg
- language: python
python: 3.5
install:
pip install py-pkg/
script:
- pytest --verbose py-pkg/
env: ES_VERSION=6.0.1
env:
- ES_VERSION=6.0.1
- TASK=pypkg
- language: python
python: 3.5
install:
pip install py-pkg/
script:
- pytest --verbose py-pkg/
env: ES_VERSION=6.1.4
env:
- ES_VERSION=6.1.4
- TASK=pypkg
- language: python
python: 3.5
install:
pip install py-pkg/
script:
- pytest --verbose py-pkg/
env: ES_VERSION=6.2.4
env:
- ES_VERSION=6.2.4
- TASK=pypkg

before_script:
- case "$ES_VERSION" in
Expand Down
4 changes: 2 additions & 2 deletions r-pkg/R/es_search.R
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ es_search <- function(es_host
)
log_fatal(msg)
}

# assign 1 core by default, if the number of cores is NA
if (is.na(n_cores) || !assertthat::is.count(n_cores)){
msg <- "detectCores() returned NA. Assigning number of cores to be 1."
Expand Down Expand Up @@ -397,7 +397,7 @@ es_search <- function(es_host

# It's POSSIBLE that the parallel process gave us duplicates. Correct for that
data.table::setkeyv(outDT, NULL)
outDT <- unique(outDT)
outDT <- unique(outDT, by = "_id")

# Check we got the number of unique records we expected
if (nrow(outDT) < hits_to_pull && break_on_duplicates){
Expand Down

0 comments on commit ced8bb4

Please sign in to comment.