Skip to content

Commit

Permalink
Merge pull request #100 from wush978/dev/91
Browse files Browse the repository at this point in the history
resolve #91
  • Loading branch information
wush978 committed Sep 21, 2015
2 parents 73f570a + 2f1e55b commit 7f4beb3
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 30 deletions.
8 changes: 4 additions & 4 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ tomatrix <- function(m) {
.Call('FeatureHashing_tomatrix', PACKAGE = 'FeatureHashing', m)
}

.hashed.model.matrix.dataframe <- function(tf, data, hash_size, transpose, retval, keep_hashing_mapping, is_xi, progress) {
.Call('FeatureHashing_hashed_model_matrix_dataframe', PACKAGE = 'FeatureHashing', tf, data, hash_size, transpose, retval, keep_hashing_mapping, is_xi, progress)
}

#'@export hash.sign
hash.sign <- function(src) {
.Call('FeatureHashing_xi', PACKAGE = 'FeatureHashing', src)
Expand All @@ -32,10 +36,6 @@ hashed.interaction.value <- function(src) {
.Call('FeatureHashing_h2', PACKAGE = 'FeatureHashing', src)
}

.hashed.model.matrix.dataframe <- function(tf, data, hash_size, transpose, retval, keep_hashing_mapping, is_xi, progress) {
.Call('FeatureHashing_hashed_model_matrix_dataframe', PACKAGE = 'FeatureHashing', tf, data, hash_size, transpose, retval, keep_hashing_mapping, is_xi, progress)
}

#'@title Convert the integer to raw vector with endian correction
#'@param src integer value.
#'@return raw vector with length 4
Expand Down
2 changes: 1 addition & 1 deletion R/hashed.model.matrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@
#'@importClassesFrom Matrix dgCMatrix
#'@aliases hashed.value hash.sign hashed.interaction.value
hashed.model.matrix <- function(formula, data, hash.size = 2^18, transpose = FALSE,
create.mapping = FALSE, is.dgCMatrix = TRUE, signed.hash = TRUE,
create.mapping = FALSE, is.dgCMatrix = TRUE, signed.hash = FALSE,
progress = FALSE) {
stopifnot(hash.size >= 0)
stopifnot(is.data.frame(data))
Expand Down
2 changes: 1 addition & 1 deletion man/hashed.model.matrix.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
\title{Create a model matrix with feature hashing}
\usage{
hashed.model.matrix(formula, data, hash.size = 2^18, transpose = FALSE,
create.mapping = FALSE, is.dgCMatrix = TRUE, signed.hash = TRUE,
create.mapping = FALSE, is.dgCMatrix = TRUE, signed.hash = FALSE,
progress = FALSE)
}
\arguments{
Expand Down
36 changes: 18 additions & 18 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,24 @@ BEGIN_RCPP
return __result;
END_RCPP
}
// hashed_model_matrix_dataframe
SEXP hashed_model_matrix_dataframe(RObject tf, DataFrame data, unsigned long hash_size, bool transpose, S4 retval, bool keep_hashing_mapping, bool is_xi, bool progress);
RcppExport SEXP FeatureHashing_hashed_model_matrix_dataframe(SEXP tfSEXP, SEXP dataSEXP, SEXP hash_sizeSEXP, SEXP transposeSEXP, SEXP retvalSEXP, SEXP keep_hashing_mappingSEXP, SEXP is_xiSEXP, SEXP progressSEXP) {
BEGIN_RCPP
Rcpp::RObject __result;
Rcpp::RNGScope __rngScope;
Rcpp::traits::input_parameter< RObject >::type tf(tfSEXP);
Rcpp::traits::input_parameter< DataFrame >::type data(dataSEXP);
Rcpp::traits::input_parameter< unsigned long >::type hash_size(hash_sizeSEXP);
Rcpp::traits::input_parameter< bool >::type transpose(transposeSEXP);
Rcpp::traits::input_parameter< S4 >::type retval(retvalSEXP);
Rcpp::traits::input_parameter< bool >::type keep_hashing_mapping(keep_hashing_mappingSEXP);
Rcpp::traits::input_parameter< bool >::type is_xi(is_xiSEXP);
Rcpp::traits::input_parameter< bool >::type progress(progressSEXP);
__result = Rcpp::wrap(hashed_model_matrix_dataframe(tf, data, hash_size, transpose, retval, keep_hashing_mapping, is_xi, progress));
return __result;
END_RCPP
}
// xi
IntegerVector xi(CharacterVector src);
RcppExport SEXP FeatureHashing_xi(SEXP srcSEXP) {
Expand Down Expand Up @@ -83,24 +101,6 @@ BEGIN_RCPP
return __result;
END_RCPP
}
// hashed_model_matrix_dataframe
SEXP hashed_model_matrix_dataframe(RObject tf, DataFrame data, unsigned long hash_size, bool transpose, S4 retval, bool keep_hashing_mapping, bool is_xi, bool progress);
RcppExport SEXP FeatureHashing_hashed_model_matrix_dataframe(SEXP tfSEXP, SEXP dataSEXP, SEXP hash_sizeSEXP, SEXP transposeSEXP, SEXP retvalSEXP, SEXP keep_hashing_mappingSEXP, SEXP is_xiSEXP, SEXP progressSEXP) {
BEGIN_RCPP
Rcpp::RObject __result;
Rcpp::RNGScope __rngScope;
Rcpp::traits::input_parameter< RObject >::type tf(tfSEXP);
Rcpp::traits::input_parameter< DataFrame >::type data(dataSEXP);
Rcpp::traits::input_parameter< unsigned long >::type hash_size(hash_sizeSEXP);
Rcpp::traits::input_parameter< bool >::type transpose(transposeSEXP);
Rcpp::traits::input_parameter< S4 >::type retval(retvalSEXP);
Rcpp::traits::input_parameter< bool >::type keep_hashing_mapping(keep_hashing_mappingSEXP);
Rcpp::traits::input_parameter< bool >::type is_xi(is_xiSEXP);
Rcpp::traits::input_parameter< bool >::type progress(progressSEXP);
__result = Rcpp::wrap(hashed_model_matrix_dataframe(tf, data, hash_size, transpose, retval, keep_hashing_mapping, is_xi, progress));
return __result;
END_RCPP
}
// intToRaw
SEXP intToRaw(int src);
RcppExport SEXP FeatureHashing_intToRaw(SEXP srcSEXP) {
Expand Down
10 changes: 5 additions & 5 deletions tests/test-hashing.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ if (require(RUnit)) {
checkTrue(all(hashed.value(names(mapping_value)) %% 2^32 == mapping_value),
"Unexpected hashing result by hashed.value")

m <- hashed.model.matrix(~ ., CO2, hash.size = 2^10, create.mapping = TRUE, transpose = TRUE, is.dgCMatrix = FALSE)
m <- hashed.model.matrix(~ ., CO2, hash.size = 2^10, create.mapping = TRUE, transpose = TRUE, is.dgCMatrix = FALSE, signed.hash = TRUE)
mapping <- as.list(attr(m, "mapping"))
checkTrue(all(!duplicated(unlist(mapping) %% 2^10 + 1)),
"Unexpected collision of hashing example")
Expand Down Expand Up @@ -41,7 +41,7 @@ if (require(RUnit)) {
}

m <- hashed.model.matrix(~ .^2, CO2, hash.size = 2^10, create.mapping = TRUE,
transpose = TRUE, is.dgCMatrix = FALSE)
transpose = TRUE, is.dgCMatrix = FALSE, signed.hash = TRUE)
mapping_value <- hash.mapping(m)

mapping_value.expected <- structure(list(PlantQc1 = 2636986885, PlantQn1 = 3789462177,
Expand Down Expand Up @@ -112,7 +112,7 @@ if (require(RUnit)) {
"Unexpected hashing result of interaction term")

m2 <- hashed.model.matrix(~ . ^ 2, data = CO2, hash.size = 32, create.mapping = TRUE,
transpose = TRUE, is.dgCMatrix = FALSE)
transpose = TRUE, is.dgCMatrix = FALSE, signed.hash = TRUE)
checkTrue(!all(m2@i == 0),
"All hashed indices created by hashed.model.matrix are zero")
checkTrue(sum(m2 %*% rep(1, ncol(m2)) != 0) > 1,
Expand All @@ -136,13 +136,13 @@ if (require(RUnit)) {
}

# check handling of NA
tryCatch(m <- hashed.model.matrix(~ PlAnT, CO2, 8,
tryCatch(m <- hashed.model.matrix(~ PlAnT, CO2, 8, signed.hash = TRUE,
transpose = TRUE, is.dgCMatrix = FALSE), error = function(e) {
if (class(e)[1] != "std::invalid_argument") stop(e)
if (conditionMessage(e) != "Failed to find the column:PlAnT") stop(e)
})

m <- hashed.model.matrix(~ Plant:Type:Treatment, CO2, create.mapping = TRUE)
m <- hashed.model.matrix(~ Plant:Type:Treatment, CO2, create.mapping = TRUE, signed.hash = TRUE)
map <- hash.mapping(m)
map <- map[grepl("\\w+:\\w+:\\w+", names(map))]
checkTrue(all(hashed.interaction.value(names(map)) %% (2^18) + 1== map),
Expand Down
2 changes: 1 addition & 1 deletion tests/test-memcheck.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ if (require(RUnit)) {
1.58683345454085, 0.558486425565304, -1.27659220845804, -0.573265414236886,
-1.22461261489836, -0.473400636439312))
m <- hashed.model.matrix(~ split(a, delim = ",", type = "existence"):b, df, 2^6,
create.mapping = TRUE, transpose = TRUE, is.dgCMatrix = TRUE)
create.mapping = TRUE, transpose = TRUE, is.dgCMatrix = TRUE, signed.hash = TRUE)
cat(paste(m@i, collapse=","));cat("\n")
checkEquals(digest::digest(m@x), "fd23fdd30634b57ebb8f044ff74fa762")
checkEquals(digest::digest(m@p), "fea11342d41629d0be3627468a288229")
Expand Down

0 comments on commit 7f4beb3

Please sign in to comment.