Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge remote-tracking branch 'origin/master' into dev/0.9
Conflicts: R/RcppExports.R R/hashed.model.matrix.R man/hashed.model.matrix.Rd src/RcppExports.cpp
- Loading branch information
Showing
38 changed files
with
2,091 additions
and
116 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
* installing *source* package ‘FeatureHashing’ ... | ||
** libs | ||
make: Nothing to be done for `all'. | ||
installing to /Users/wush/Projects/FeatureHashing/..Rcheck/FeatureHashing/libs | ||
** R | ||
** data | ||
** inst | ||
** preparing package for lazy loading | ||
** help | ||
*** installing help indices | ||
** building package indices | ||
** testing if installed package can be loaded | ||
* DONE (FeatureHashing) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
pkgname <- "FeatureHashing" | ||
source(file.path(R.home("share"), "R", "examples-header.R")) | ||
options(warn = 1) | ||
base::assign(".ExTimings", "FeatureHashing-Ex.timings", pos = 'CheckExEnv') | ||
base::cat("name\tuser\tsystem\telapsed\n", file=base::get(".ExTimings", pos = 'CheckExEnv')) | ||
base::assign(".format_ptime", | ||
function(x) { | ||
if(!is.na(x[4L])) x[1L] <- x[1L] + x[4L] | ||
if(!is.na(x[5L])) x[2L] <- x[2L] + x[5L] | ||
options(OutDec = '.') | ||
format(x[1L:3L], digits = 7L) | ||
}, | ||
pos = 'CheckExEnv') | ||
|
||
### * </HEADER> | ||
library('FeatureHashing') | ||
|
||
base::assign(".oldSearch", base::search(), pos = 'CheckExEnv') | ||
cleanEx() | ||
nameEx("CSCMatrix-class") | ||
### * CSCMatrix-class | ||
|
||
flush(stderr()); flush(stdout()) | ||
|
||
base::assign(".ptime", proc.time(), pos = "CheckExEnv") | ||
### Name: CSCMatrix-class | ||
### Title: CSCMatrix | ||
### Aliases: CSCMatrix-class [,CSCMatrix,missing,numeric,ANY-method | ||
### [,CSCMatrix,numeric,missing,ANY-method | ||
### [,CSCMatrix,numeric,numeric,ANY-method %*%,CSCMatrix,numeric-method | ||
### %*%,numeric,CSCMatrix-method dim,CSCMatrix-method | ||
### dim<-,CSCMatrix-method | ||
|
||
### ** Examples | ||
|
||
# construct a CSCMatrix | ||
m <- hashed.model.matrix(~ ., CO2, 8) | ||
# convert it to dgCMatrix | ||
m2 <- as(m, "dgCMatrix") | ||
|
||
|
||
|
||
base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") | ||
base::cat("CSCMatrix-class", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") | ||
cleanEx() | ||
nameEx("hashed.model.matrix") | ||
### * hashed.model.matrix | ||
|
||
flush(stderr()); flush(stdout()) | ||
|
||
base::assign(".ptime", proc.time(), pos = "CheckExEnv") | ||
### Name: hashed.model.matrix | ||
### Title: Create a model matrix with feature hashing | ||
### Aliases: hash_h hash_xi hashed.model.matrix | ||
|
||
### ** Examples | ||
|
||
# Construct the model matrix. The transposed matrix is returned by default. | ||
m <- hashed.model.matrix(~ ., CO2, 2^6, keep.hashing_mapping = TRUE) | ||
# Print the matrix via dgCMatrix | ||
as(m, "dgCMatrix") | ||
# Check the result of hashing | ||
mapping <- unlist(as.list(attr(m, "mapping"))) | ||
# Check the rate of collision | ||
# mean(duplicated(mapping %% 2^6)) | ||
# The result is CSCMatrix which supports simple subsetting and matrix-vector | ||
# multiplication | ||
# rnorm(2^6) %*% m | ||
|
||
# Detail of the hashing | ||
## The main effect is hashed via `hash_h` | ||
all(hash_h(names(mapping)) %% 2^6 == mapping %% 2^6) | ||
## The sign is corrected by `hash_xi` | ||
hash_xi(names(mapping)) | ||
## The interaction term is implemented as follow: | ||
m2 <- hashed.model.matrix(~ .^2, CO2, 2^6, keep.hashing_mapping = TRUE) | ||
mapping2 <- unlist(as.list(attr(m2, "mapping"))) | ||
mapping2[2] # PlantQn2:uptake | ||
h1 <- mapping2["PlantQn2"] | ||
h2 <- mapping2["uptake"] | ||
library(pack) | ||
hash_h(rawToChar(c(numToRaw(h1, 4), numToRaw(h2, 4)))) # should be mapping2[2] | ||
|
||
# The tag-like feature | ||
data(test.tag) | ||
df <- data.frame(a = test.tag, b = rnorm(length(test.tag))) | ||
m <- hashed.model.matrix(~ tag(a, split = ",", type = "existence"):b, df, 2^6, | ||
keep.hashing_mapping = TRUE) | ||
# The column `a` is splitted by "," and have an interaction with "b": | ||
mapping <- unlist(as.list(attr(m, "mapping"))) | ||
names(mapping) | ||
|
||
|
||
|
||
base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") | ||
base::cat("hashed.model.matrix", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") | ||
### * <FOOTER> | ||
### | ||
options(digits = 7L) | ||
base::cat("Time elapsed: ", proc.time() - base::get("ptime", pos = 'CheckExEnv'),"\n") | ||
grDevices::dev.off() | ||
### | ||
### Local variables: *** | ||
### mode: outline-minor *** | ||
### outline-regexp: "\\(> \\)?### [*]+" *** | ||
### End: *** | ||
quit('no') |
Large diffs are not rendered by default.
Oops, something went wrong.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
name user system elapsed | ||
CSCMatrix-class 0.003 0.000 0.005 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
Package: FeatureHashing | ||
Type: Package | ||
Title: Creates a Model Matrix via Feature Hashing With a Formula | ||
Interface | ||
Version: 0.8.1 | ||
Date: 2014-08-01 | ||
Authors@R: as.person(c( "Wush Wu <wush978@gmail.com> [aut, cre]")) | ||
Author: Wush Wu [aut, cre] | ||
Maintainer: Wush Wu <wush978@gmail.com> | ||
Description: Feature hashing, also called as the hashing trick, is a method to | ||
transform features to vector. Without looking up the indices in an | ||
associative array, it applies a hash function to the features and uses their | ||
hash values as indices directly. The method of feature hashing in this package | ||
was proposed in Weinberger et al. (2009). The hashing algorithm is the murmurhash3 | ||
from the digest package. Please see the README.md for more information. | ||
License: GPL(>= 3) | ||
Depends: R (>= 3.1), methods | ||
Imports: Rcpp (>= 0.11), Matrix, digest(>= 0.6.8) | ||
LinkingTo: Rcpp, digest(>= 0.6.8), BH | ||
Suggests: pack, RUnit | ||
SystemRequirements: C++11 | ||
BugReports: https://github.com/wush978/FeatureHashing/issues | ||
URL: https://github.com/wush978/FeatureHashing | ||
Built: R 3.1.2; x86_64-apple-darwin13.4.0; 2015-02-21 09:20:06 UTC; unix |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
CSCMatrix-class CSCMatrix | ||
hashed.model.matrix Create a model matrix with feature hashing | ||
tag Expand concatenated feature | ||
test.tag test.tag |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# Generated by roxygen2 (4.0.1): do not edit by hand | ||
|
||
export(hash_h) | ||
export(hash_xi) | ||
export(hashed.model.matrix) | ||
export(tag) | ||
import(digest) | ||
importClassesFrom(Matrix,dgCMatrix) | ||
importFrom(Rcpp,evalCpp) | ||
importFrom(methods,checkAtAssignment) | ||
importFrom(methods,new) | ||
useDynLib(FeatureHashing) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# File share/R/nspackloader.R | ||
# Part of the R package, http://www.R-project.org | ||
# | ||
# Copyright (C) 1995-2012 The R Core Team | ||
# | ||
# This program is free software; you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation; either version 2 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# A copy of the GNU General Public License is available at | ||
# http://www.r-project.org/Licenses/ | ||
|
||
local({ | ||
info <- loadingNamespaceInfo() | ||
pkg <- info$pkgname | ||
ns <- .getNamespace(as.name(pkg)) | ||
if (is.null(ns)) | ||
stop("cannot find namespace environment for ", pkg, domain = NA); | ||
dbbase <- file.path(info$libname, pkg, "R", pkg) | ||
lazyLoad(dbbase, ns, filter = function(n) n != ".__NAMESPACE__.") | ||
}) |
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
%*%,CSCMatrix,numeric-method CSCMatrix-class | ||
%*%,numeric,CSCMatrix-method CSCMatrix-class | ||
CSCMatrix-class CSCMatrix-class | ||
dim,CSCMatrix-method CSCMatrix-class | ||
dim<-,CSCMatrix-method CSCMatrix-class | ||
hashed.model.matrix hashed.model.matrix | ||
hash_h hashed.model.matrix | ||
hash_xi hashed.model.matrix | ||
tag tag | ||
test.tag test.tag | ||
[,CSCMatrix,missing,numeric,ANY-method CSCMatrix-class | ||
[,CSCMatrix,numeric,missing,ANY-method CSCMatrix-class | ||
[,CSCMatrix,numeric,numeric,ANY-method CSCMatrix-class |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> | ||
<html><head><title>R: Creates a Model Matrix via Feature Hashing With a Formula | ||
Interface</title> | ||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"> | ||
<link rel="stylesheet" type="text/css" href="R.css"> | ||
</head><body> | ||
<h1> Creates a Model Matrix via Feature Hashing With a Formula | ||
Interface | ||
<img class="toplogo" src="../../../doc/html/logo.jpg" alt="[R logo]"> | ||
</h1> | ||
<hr> | ||
<div align="center"> | ||
<a href="../../../doc/html/packages.html"><img src="../../../doc/html/left.jpg" alt="[Up]" width="30" height="30" border="0"></a> | ||
<a href="../../../doc/html/index.html"><img src="../../../doc/html/up.jpg" alt="[Top]" width="30" height="30" border="0"></a> | ||
</div><h2>Documentation for package ‘FeatureHashing’ version 0.8.1</h2> | ||
|
||
<ul><li><a href="../DESCRIPTION">DESCRIPTION file</a>.</li> | ||
</ul> | ||
|
||
<h2>Help Pages</h2> | ||
|
||
|
||
<table width="100%"> | ||
<tr><td width="25%"><a href="CSCMatrix-class.html">%*%-method</a></td> | ||
<td>CSCMatrix</td></tr> | ||
<tr><td width="25%"><a href="CSCMatrix-class.html">CSCMatrix-class</a></td> | ||
<td>CSCMatrix</td></tr> | ||
<tr><td width="25%"><a href="CSCMatrix-class.html">dim-method</a></td> | ||
<td>CSCMatrix</td></tr> | ||
<tr><td width="25%"><a href="CSCMatrix-class.html">dim<--method</a></td> | ||
<td>CSCMatrix</td></tr> | ||
<tr><td width="25%"><a href="hashed.model.matrix.html">hashed.model.matrix</a></td> | ||
<td>Create a model matrix with feature hashing</td></tr> | ||
<tr><td width="25%"><a href="hashed.model.matrix.html">hash_h</a></td> | ||
<td>Create a model matrix with feature hashing</td></tr> | ||
<tr><td width="25%"><a href="hashed.model.matrix.html">hash_xi</a></td> | ||
<td>Create a model matrix with feature hashing</td></tr> | ||
<tr><td width="25%"><a href="tag.html">tag</a></td> | ||
<td>Expand concatenated feature</td></tr> | ||
<tr><td width="25%"><a href="test.tag.html">test.tag</a></td> | ||
<td>test.tag</td></tr> | ||
<tr><td width="25%"><a href="CSCMatrix-class.html">[-method</a></td> | ||
<td>CSCMatrix</td></tr> | ||
</table> | ||
</body></html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
BODY{ background: white; | ||
color: black } | ||
|
||
A:link{ background: white; | ||
color: blue } | ||
A:visited{ background: white; | ||
color: rgb(50%, 0%, 50%) } | ||
|
||
H1{ background: white; | ||
color: rgb(55%, 55%, 55%); | ||
font-family: monospace; | ||
font-size: x-large; | ||
text-align: center } | ||
|
||
H2{ background: white; | ||
color: rgb(40%, 40%, 40%); | ||
font-family: monospace; | ||
font-size: large; | ||
text-align: center } | ||
|
||
H3{ background: white; | ||
color: rgb(40%, 40%, 40%); | ||
font-family: monospace; | ||
font-size: large } | ||
|
||
H4{ background: white; | ||
color: rgb(40%, 40%, 40%); | ||
font-family: monospace; | ||
font-style: italic; | ||
font-size: large } | ||
|
||
H5{ background: white; | ||
color: rgb(40%, 40%, 40%); | ||
font-family: monospace } | ||
|
||
H6{ background: white; | ||
color: rgb(40%, 40%, 40%); | ||
font-family: monospace; | ||
font-style: italic } | ||
|
||
IMG.toplogo{ vertical-align: middle } | ||
|
||
IMG.arrow{ width: 30px; | ||
height: 30px; | ||
border: 0 } | ||
|
||
span.acronym{font-size: small} | ||
span.env{font-family: monospace} | ||
span.file{font-family: monospace} | ||
span.option{font-family: monospace} | ||
span.pkg{font-weight: bold} | ||
span.samp{font-family: monospace} | ||
|
||
div.vignettes a:hover { | ||
background: rgb(85%, 85%, 85%); | ||
} | ||
|
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
library(RUnit) | ||
test <- defineTestSuite("FeatureHashing", dirs = "tests", testFileRegexp = "^test-") | ||
runTestSuite(test) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
Version: 1.0 | ||
|
||
RestoreWorkspace: Default | ||
SaveWorkspace: Default | ||
AlwaysSaveHistory: Default | ||
|
||
EnableCodeIndexing: Yes | ||
UseSpacesForTab: Yes | ||
NumSpacesForTab: 2 | ||
Encoding: UTF-8 | ||
|
||
RnwWeave: Sweave | ||
LaTeX: pdfLaTeX | ||
|
||
BuildType: Package | ||
PackageInstallArgs: --no-multiarch --with-keep.source | ||
PackageCheckArgs: --as-cran | ||
PackageRoxygenize: rd,namespace |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.