From 2f1e55b3f55921977f3b26feab06cf23761accf5 Mon Sep 17 00:00:00 2001 From: Wush Wu Date: Mon, 21 Sep 2015 00:11:24 +0800 Subject: [PATCH] resolve #91 --- R/RcppExports.R | 8 ++++---- R/hashed.model.matrix.R | 2 +- man/hashed.model.matrix.Rd | 2 +- src/RcppExports.cpp | 36 ++++++++++++++++++------------------ tests/test-hashing.R | 10 +++++----- tests/test-memcheck.R | 2 +- 6 files changed, 30 insertions(+), 30 deletions(-) diff --git a/R/RcppExports.R b/R/RcppExports.R index 18b0d18..a7629a2 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -17,6 +17,10 @@ tomatrix <- function(m) { .Call('FeatureHashing_tomatrix', PACKAGE = 'FeatureHashing', m) } +.hashed.model.matrix.dataframe <- function(tf, data, hash_size, transpose, retval, keep_hashing_mapping, is_xi, progress) { + .Call('FeatureHashing_hashed_model_matrix_dataframe', PACKAGE = 'FeatureHashing', tf, data, hash_size, transpose, retval, keep_hashing_mapping, is_xi, progress) +} + #'@export hash.sign hash.sign <- function(src) { .Call('FeatureHashing_xi', PACKAGE = 'FeatureHashing', src) @@ -32,10 +36,6 @@ hashed.interaction.value <- function(src) { .Call('FeatureHashing_h2', PACKAGE = 'FeatureHashing', src) } -.hashed.model.matrix.dataframe <- function(tf, data, hash_size, transpose, retval, keep_hashing_mapping, is_xi, progress) { - .Call('FeatureHashing_hashed_model_matrix_dataframe', PACKAGE = 'FeatureHashing', tf, data, hash_size, transpose, retval, keep_hashing_mapping, is_xi, progress) -} - #'@title Convert the integer to raw vector with endian correction #'@param src integer value. #'@return raw vector with length 4 diff --git a/R/hashed.model.matrix.R b/R/hashed.model.matrix.R index 24c950c..2952661 100644 --- a/R/hashed.model.matrix.R +++ b/R/hashed.model.matrix.R @@ -196,7 +196,7 @@ #'@importClassesFrom Matrix dgCMatrix #'@aliases hashed.value hash.sign hashed.interaction.value hashed.model.matrix <- function(formula, data, hash.size = 2^18, transpose = FALSE, - create.mapping = FALSE, is.dgCMatrix = TRUE, signed.hash = TRUE, + create.mapping = FALSE, is.dgCMatrix = TRUE, signed.hash = FALSE, progress = FALSE) { stopifnot(hash.size >= 0) stopifnot(is.data.frame(data)) diff --git a/man/hashed.model.matrix.Rd b/man/hashed.model.matrix.Rd index 5a4f7f9..38bbe5d 100644 --- a/man/hashed.model.matrix.Rd +++ b/man/hashed.model.matrix.Rd @@ -8,7 +8,7 @@ \title{Create a model matrix with feature hashing} \usage{ hashed.model.matrix(formula, data, hash.size = 2^18, transpose = FALSE, - create.mapping = FALSE, is.dgCMatrix = TRUE, signed.hash = TRUE, + create.mapping = FALSE, is.dgCMatrix = TRUE, signed.hash = FALSE, progress = FALSE) } \arguments{ diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 5bf9d1e..4d7691d 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -50,6 +50,24 @@ BEGIN_RCPP return __result; END_RCPP } +// hashed_model_matrix_dataframe +SEXP hashed_model_matrix_dataframe(RObject tf, DataFrame data, unsigned long hash_size, bool transpose, S4 retval, bool keep_hashing_mapping, bool is_xi, bool progress); +RcppExport SEXP FeatureHashing_hashed_model_matrix_dataframe(SEXP tfSEXP, SEXP dataSEXP, SEXP hash_sizeSEXP, SEXP transposeSEXP, SEXP retvalSEXP, SEXP keep_hashing_mappingSEXP, SEXP is_xiSEXP, SEXP progressSEXP) { +BEGIN_RCPP + Rcpp::RObject __result; + Rcpp::RNGScope __rngScope; + Rcpp::traits::input_parameter< RObject >::type tf(tfSEXP); + Rcpp::traits::input_parameter< DataFrame >::type data(dataSEXP); + Rcpp::traits::input_parameter< unsigned long >::type hash_size(hash_sizeSEXP); + Rcpp::traits::input_parameter< bool >::type transpose(transposeSEXP); + Rcpp::traits::input_parameter< S4 >::type retval(retvalSEXP); + Rcpp::traits::input_parameter< bool >::type keep_hashing_mapping(keep_hashing_mappingSEXP); + Rcpp::traits::input_parameter< bool >::type is_xi(is_xiSEXP); + Rcpp::traits::input_parameter< bool >::type progress(progressSEXP); + __result = Rcpp::wrap(hashed_model_matrix_dataframe(tf, data, hash_size, transpose, retval, keep_hashing_mapping, is_xi, progress)); + return __result; +END_RCPP +} // xi IntegerVector xi(CharacterVector src); RcppExport SEXP FeatureHashing_xi(SEXP srcSEXP) { @@ -83,24 +101,6 @@ BEGIN_RCPP return __result; END_RCPP } -// hashed_model_matrix_dataframe -SEXP hashed_model_matrix_dataframe(RObject tf, DataFrame data, unsigned long hash_size, bool transpose, S4 retval, bool keep_hashing_mapping, bool is_xi, bool progress); -RcppExport SEXP FeatureHashing_hashed_model_matrix_dataframe(SEXP tfSEXP, SEXP dataSEXP, SEXP hash_sizeSEXP, SEXP transposeSEXP, SEXP retvalSEXP, SEXP keep_hashing_mappingSEXP, SEXP is_xiSEXP, SEXP progressSEXP) { -BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; - Rcpp::traits::input_parameter< RObject >::type tf(tfSEXP); - Rcpp::traits::input_parameter< DataFrame >::type data(dataSEXP); - Rcpp::traits::input_parameter< unsigned long >::type hash_size(hash_sizeSEXP); - Rcpp::traits::input_parameter< bool >::type transpose(transposeSEXP); - Rcpp::traits::input_parameter< S4 >::type retval(retvalSEXP); - Rcpp::traits::input_parameter< bool >::type keep_hashing_mapping(keep_hashing_mappingSEXP); - Rcpp::traits::input_parameter< bool >::type is_xi(is_xiSEXP); - Rcpp::traits::input_parameter< bool >::type progress(progressSEXP); - __result = Rcpp::wrap(hashed_model_matrix_dataframe(tf, data, hash_size, transpose, retval, keep_hashing_mapping, is_xi, progress)); - return __result; -END_RCPP -} // intToRaw SEXP intToRaw(int src); RcppExport SEXP FeatureHashing_intToRaw(SEXP srcSEXP) { diff --git a/tests/test-hashing.R b/tests/test-hashing.R index 9fd38ae..5eaa537 100644 --- a/tests/test-hashing.R +++ b/tests/test-hashing.R @@ -12,7 +12,7 @@ if (require(RUnit)) { checkTrue(all(hashed.value(names(mapping_value)) %% 2^32 == mapping_value), "Unexpected hashing result by hashed.value") - m <- hashed.model.matrix(~ ., CO2, hash.size = 2^10, create.mapping = TRUE, transpose = TRUE, is.dgCMatrix = FALSE) + m <- hashed.model.matrix(~ ., CO2, hash.size = 2^10, create.mapping = TRUE, transpose = TRUE, is.dgCMatrix = FALSE, signed.hash = TRUE) mapping <- as.list(attr(m, "mapping")) checkTrue(all(!duplicated(unlist(mapping) %% 2^10 + 1)), "Unexpected collision of hashing example") @@ -41,7 +41,7 @@ if (require(RUnit)) { } m <- hashed.model.matrix(~ .^2, CO2, hash.size = 2^10, create.mapping = TRUE, - transpose = TRUE, is.dgCMatrix = FALSE) + transpose = TRUE, is.dgCMatrix = FALSE, signed.hash = TRUE) mapping_value <- hash.mapping(m) mapping_value.expected <- structure(list(PlantQc1 = 2636986885, PlantQn1 = 3789462177, @@ -112,7 +112,7 @@ if (require(RUnit)) { "Unexpected hashing result of interaction term") m2 <- hashed.model.matrix(~ . ^ 2, data = CO2, hash.size = 32, create.mapping = TRUE, - transpose = TRUE, is.dgCMatrix = FALSE) + transpose = TRUE, is.dgCMatrix = FALSE, signed.hash = TRUE) checkTrue(!all(m2@i == 0), "All hashed indices created by hashed.model.matrix are zero") checkTrue(sum(m2 %*% rep(1, ncol(m2)) != 0) > 1, @@ -136,13 +136,13 @@ if (require(RUnit)) { } # check handling of NA - tryCatch(m <- hashed.model.matrix(~ PlAnT, CO2, 8, + tryCatch(m <- hashed.model.matrix(~ PlAnT, CO2, 8, signed.hash = TRUE, transpose = TRUE, is.dgCMatrix = FALSE), error = function(e) { if (class(e)[1] != "std::invalid_argument") stop(e) if (conditionMessage(e) != "Failed to find the column:PlAnT") stop(e) }) - m <- hashed.model.matrix(~ Plant:Type:Treatment, CO2, create.mapping = TRUE) + m <- hashed.model.matrix(~ Plant:Type:Treatment, CO2, create.mapping = TRUE, signed.hash = TRUE) map <- hash.mapping(m) map <- map[grepl("\\w+:\\w+:\\w+", names(map))] checkTrue(all(hashed.interaction.value(names(map)) %% (2^18) + 1== map), diff --git a/tests/test-memcheck.R b/tests/test-memcheck.R index 5bddd90..dccde44 100644 --- a/tests/test-memcheck.R +++ b/tests/test-memcheck.R @@ -29,7 +29,7 @@ if (require(RUnit)) { 1.58683345454085, 0.558486425565304, -1.27659220845804, -0.573265414236886, -1.22461261489836, -0.473400636439312)) m <- hashed.model.matrix(~ split(a, delim = ",", type = "existence"):b, df, 2^6, - create.mapping = TRUE, transpose = TRUE, is.dgCMatrix = TRUE) + create.mapping = TRUE, transpose = TRUE, is.dgCMatrix = TRUE, signed.hash = TRUE) cat(paste(m@i, collapse=","));cat("\n") checkEquals(digest::digest(m@x), "fd23fdd30634b57ebb8f044ff74fa762") checkEquals(digest::digest(m@p), "fea11342d41629d0be3627468a288229")