Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into dev/0.9
Browse files Browse the repository at this point in the history
Conflicts:
	R/RcppExports.R
	R/hashed.model.matrix.R
	man/hashed.model.matrix.Rd
	src/RcppExports.cpp
  • Loading branch information
wush978 committed Mar 10, 2015
2 parents f82055b + 6fa1fa3 commit 9da7f9f
Show file tree
Hide file tree
Showing 38 changed files with 2,091 additions and 116 deletions.
1,100 changes: 1,100 additions & 0 deletions ..Rcheck/00check.log

Large diffs are not rendered by default.

13 changes: 13 additions & 0 deletions ..Rcheck/00install.out
@@ -0,0 +1,13 @@
* installing *source* package ‘FeatureHashing’ ...
** libs
make: Nothing to be done for `all'.
installing to /Users/wush/Projects/FeatureHashing/..Rcheck/FeatureHashing/libs
** R
** data
** inst
** preparing package for lazy loading
** help
*** installing help indices
** building package indices
** testing if installed package can be loaded
* DONE (FeatureHashing)
107 changes: 107 additions & 0 deletions ..Rcheck/FeatureHashing-Ex.R
@@ -0,0 +1,107 @@
pkgname <- "FeatureHashing"
source(file.path(R.home("share"), "R", "examples-header.R"))
options(warn = 1)
base::assign(".ExTimings", "FeatureHashing-Ex.timings", pos = 'CheckExEnv')
base::cat("name\tuser\tsystem\telapsed\n", file=base::get(".ExTimings", pos = 'CheckExEnv'))
base::assign(".format_ptime",
function(x) {
if(!is.na(x[4L])) x[1L] <- x[1L] + x[4L]
if(!is.na(x[5L])) x[2L] <- x[2L] + x[5L]
options(OutDec = '.')
format(x[1L:3L], digits = 7L)
},
pos = 'CheckExEnv')

### * </HEADER>
library('FeatureHashing')

base::assign(".oldSearch", base::search(), pos = 'CheckExEnv')
cleanEx()
nameEx("CSCMatrix-class")
### * CSCMatrix-class

flush(stderr()); flush(stdout())

base::assign(".ptime", proc.time(), pos = "CheckExEnv")
### Name: CSCMatrix-class
### Title: CSCMatrix
### Aliases: CSCMatrix-class [,CSCMatrix,missing,numeric,ANY-method
### [,CSCMatrix,numeric,missing,ANY-method
### [,CSCMatrix,numeric,numeric,ANY-method %*%,CSCMatrix,numeric-method
### %*%,numeric,CSCMatrix-method dim,CSCMatrix-method
### dim<-,CSCMatrix-method

### ** Examples

# construct a CSCMatrix
m <- hashed.model.matrix(~ ., CO2, 8)
# convert it to dgCMatrix
m2 <- as(m, "dgCMatrix")



base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv")
base::cat("CSCMatrix-class", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t")
cleanEx()
nameEx("hashed.model.matrix")
### * hashed.model.matrix

flush(stderr()); flush(stdout())

base::assign(".ptime", proc.time(), pos = "CheckExEnv")
### Name: hashed.model.matrix
### Title: Create a model matrix with feature hashing
### Aliases: hash_h hash_xi hashed.model.matrix

### ** Examples

# Construct the model matrix. The transposed matrix is returned by default.
m <- hashed.model.matrix(~ ., CO2, 2^6, keep.hashing_mapping = TRUE)
# Print the matrix via dgCMatrix
as(m, "dgCMatrix")
# Check the result of hashing
mapping <- unlist(as.list(attr(m, "mapping")))
# Check the rate of collision
# mean(duplicated(mapping %% 2^6))
# The result is CSCMatrix which supports simple subsetting and matrix-vector
# multiplication
# rnorm(2^6) %*% m

# Detail of the hashing
## The main effect is hashed via `hash_h`
all(hash_h(names(mapping)) %% 2^6 == mapping %% 2^6)
## The sign is corrected by `hash_xi`
hash_xi(names(mapping))
## The interaction term is implemented as follow:
m2 <- hashed.model.matrix(~ .^2, CO2, 2^6, keep.hashing_mapping = TRUE)
mapping2 <- unlist(as.list(attr(m2, "mapping")))
mapping2[2] # PlantQn2:uptake
h1 <- mapping2["PlantQn2"]
h2 <- mapping2["uptake"]
library(pack)
hash_h(rawToChar(c(numToRaw(h1, 4), numToRaw(h2, 4)))) # should be mapping2[2]

# The tag-like feature
data(test.tag)
df <- data.frame(a = test.tag, b = rnorm(length(test.tag)))
m <- hashed.model.matrix(~ tag(a, split = ",", type = "existence"):b, df, 2^6,
keep.hashing_mapping = TRUE)
# The column `a` is splitted by "," and have an interaction with "b":
mapping <- unlist(as.list(attr(m, "mapping")))
names(mapping)



base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv")
base::cat("hashed.model.matrix", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t")
### * <FOOTER>
###
options(digits = 7L)
base::cat("Time elapsed: ", proc.time() - base::get("ptime", pos = 'CheckExEnv'),"\n")
grDevices::dev.off()
###
### Local variables: ***
### mode: outline-minor ***
### outline-regexp: "\\(> \\)?### [*]+" ***
### End: ***
quit('no')
265 changes: 265 additions & 0 deletions ..Rcheck/FeatureHashing-Ex.Rout

Large diffs are not rendered by default.

Binary file added ..Rcheck/FeatureHashing-Ex.pdf
Binary file not shown.
2 changes: 2 additions & 0 deletions ..Rcheck/FeatureHashing-Ex.timings
@@ -0,0 +1,2 @@
name user system elapsed
CSCMatrix-class 0.003 0.000 0.005
24 changes: 24 additions & 0 deletions ..Rcheck/FeatureHashing/DESCRIPTION
@@ -0,0 +1,24 @@
Package: FeatureHashing
Type: Package
Title: Creates a Model Matrix via Feature Hashing With a Formula
Interface
Version: 0.8.1
Date: 2014-08-01
Authors@R: as.person(c( "Wush Wu <wush978@gmail.com> [aut, cre]"))
Author: Wush Wu [aut, cre]
Maintainer: Wush Wu <wush978@gmail.com>
Description: Feature hashing, also called as the hashing trick, is a method to
transform features to vector. Without looking up the indices in an
associative array, it applies a hash function to the features and uses their
hash values as indices directly. The method of feature hashing in this package
was proposed in Weinberger et al. (2009). The hashing algorithm is the murmurhash3
from the digest package. Please see the README.md for more information.
License: GPL(>= 3)
Depends: R (>= 3.1), methods
Imports: Rcpp (>= 0.11), Matrix, digest(>= 0.6.8)
LinkingTo: Rcpp, digest(>= 0.6.8), BH
Suggests: pack, RUnit
SystemRequirements: C++11
BugReports: https://github.com/wush978/FeatureHashing/issues
URL: https://github.com/wush978/FeatureHashing
Built: R 3.1.2; x86_64-apple-darwin13.4.0; 2015-02-21 09:20:06 UTC; unix
4 changes: 4 additions & 0 deletions ..Rcheck/FeatureHashing/INDEX
@@ -0,0 +1,4 @@
CSCMatrix-class CSCMatrix
hashed.model.matrix Create a model matrix with feature hashing
tag Expand concatenated feature
test.tag test.tag
Binary file added ..Rcheck/FeatureHashing/Meta/Rd.rds
Binary file not shown.
Binary file added ..Rcheck/FeatureHashing/Meta/data.rds
Binary file not shown.
Binary file added ..Rcheck/FeatureHashing/Meta/hsearch.rds
Binary file not shown.
Binary file added ..Rcheck/FeatureHashing/Meta/links.rds
Binary file not shown.
Binary file added ..Rcheck/FeatureHashing/Meta/nsInfo.rds
Binary file not shown.
Binary file added ..Rcheck/FeatureHashing/Meta/package.rds
Binary file not shown.
12 changes: 12 additions & 0 deletions ..Rcheck/FeatureHashing/NAMESPACE
@@ -0,0 +1,12 @@
# Generated by roxygen2 (4.0.1): do not edit by hand

export(hash_h)
export(hash_xi)
export(hashed.model.matrix)
export(tag)
import(digest)
importClassesFrom(Matrix,dgCMatrix)
importFrom(Rcpp,evalCpp)
importFrom(methods,checkAtAssignment)
importFrom(methods,new)
useDynLib(FeatureHashing)
27 changes: 27 additions & 0 deletions ..Rcheck/FeatureHashing/R/FeatureHashing
@@ -0,0 +1,27 @@
# File share/R/nspackloader.R
# Part of the R package, http://www.R-project.org
#
# Copyright (C) 1995-2012 The R Core Team
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available at
# http://www.r-project.org/Licenses/

local({
info <- loadingNamespaceInfo()
pkg <- info$pkgname
ns <- .getNamespace(as.name(pkg))
if (is.null(ns))
stop("cannot find namespace environment for ", pkg, domain = NA);
dbbase <- file.path(info$libname, pkg, "R", pkg)
lazyLoad(dbbase, ns, filter = function(n) n != ".__NAMESPACE__.")
})
Binary file added ..Rcheck/FeatureHashing/R/FeatureHashing.rdb
Binary file not shown.
Binary file added ..Rcheck/FeatureHashing/R/FeatureHashing.rdx
Binary file not shown.
Binary file added ..Rcheck/FeatureHashing/data/test.tag.rda
Binary file not shown.
13 changes: 13 additions & 0 deletions ..Rcheck/FeatureHashing/help/AnIndex
@@ -0,0 +1,13 @@
%*%,CSCMatrix,numeric-method CSCMatrix-class
%*%,numeric,CSCMatrix-method CSCMatrix-class
CSCMatrix-class CSCMatrix-class
dim,CSCMatrix-method CSCMatrix-class
dim<-,CSCMatrix-method CSCMatrix-class
hashed.model.matrix hashed.model.matrix
hash_h hashed.model.matrix
hash_xi hashed.model.matrix
tag tag
test.tag test.tag
[,CSCMatrix,missing,numeric,ANY-method CSCMatrix-class
[,CSCMatrix,numeric,missing,ANY-method CSCMatrix-class
[,CSCMatrix,numeric,numeric,ANY-method CSCMatrix-class
Binary file added ..Rcheck/FeatureHashing/help/FeatureHashing.rdb
Binary file not shown.
Binary file added ..Rcheck/FeatureHashing/help/FeatureHashing.rdx
Binary file not shown.
Binary file added ..Rcheck/FeatureHashing/help/aliases.rds
Binary file not shown.
Binary file added ..Rcheck/FeatureHashing/help/paths.rds
Binary file not shown.
45 changes: 45 additions & 0 deletions ..Rcheck/FeatureHashing/html/00Index.html
@@ -0,0 +1,45 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html><head><title>R: Creates a Model Matrix via Feature Hashing With a Formula
Interface</title>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<link rel="stylesheet" type="text/css" href="R.css">
</head><body>
<h1> Creates a Model Matrix via Feature Hashing With a Formula
Interface
<img class="toplogo" src="../../../doc/html/logo.jpg" alt="[R logo]">
</h1>
<hr>
<div align="center">
<a href="../../../doc/html/packages.html"><img src="../../../doc/html/left.jpg" alt="[Up]" width="30" height="30" border="0"></a>
<a href="../../../doc/html/index.html"><img src="../../../doc/html/up.jpg" alt="[Top]" width="30" height="30" border="0"></a>
</div><h2>Documentation for package &lsquo;FeatureHashing&rsquo; version 0.8.1</h2>

<ul><li><a href="../DESCRIPTION">DESCRIPTION file</a>.</li>
</ul>

<h2>Help Pages</h2>


<table width="100%">
<tr><td width="25%"><a href="CSCMatrix-class.html">%*%-method</a></td>
<td>CSCMatrix</td></tr>
<tr><td width="25%"><a href="CSCMatrix-class.html">CSCMatrix-class</a></td>
<td>CSCMatrix</td></tr>
<tr><td width="25%"><a href="CSCMatrix-class.html">dim-method</a></td>
<td>CSCMatrix</td></tr>
<tr><td width="25%"><a href="CSCMatrix-class.html">dim&lt;--method</a></td>
<td>CSCMatrix</td></tr>
<tr><td width="25%"><a href="hashed.model.matrix.html">hashed.model.matrix</a></td>
<td>Create a model matrix with feature hashing</td></tr>
<tr><td width="25%"><a href="hashed.model.matrix.html">hash_h</a></td>
<td>Create a model matrix with feature hashing</td></tr>
<tr><td width="25%"><a href="hashed.model.matrix.html">hash_xi</a></td>
<td>Create a model matrix with feature hashing</td></tr>
<tr><td width="25%"><a href="tag.html">tag</a></td>
<td>Expand concatenated feature</td></tr>
<tr><td width="25%"><a href="test.tag.html">test.tag</a></td>
<td>test.tag</td></tr>
<tr><td width="25%"><a href="CSCMatrix-class.html">[-method</a></td>
<td>CSCMatrix</td></tr>
</table>
</body></html>
57 changes: 57 additions & 0 deletions ..Rcheck/FeatureHashing/html/R.css
@@ -0,0 +1,57 @@
BODY{ background: white;
color: black }

A:link{ background: white;
color: blue }
A:visited{ background: white;
color: rgb(50%, 0%, 50%) }

H1{ background: white;
color: rgb(55%, 55%, 55%);
font-family: monospace;
font-size: x-large;
text-align: center }

H2{ background: white;
color: rgb(40%, 40%, 40%);
font-family: monospace;
font-size: large;
text-align: center }

H3{ background: white;
color: rgb(40%, 40%, 40%);
font-family: monospace;
font-size: large }

H4{ background: white;
color: rgb(40%, 40%, 40%);
font-family: monospace;
font-style: italic;
font-size: large }

H5{ background: white;
color: rgb(40%, 40%, 40%);
font-family: monospace }

H6{ background: white;
color: rgb(40%, 40%, 40%);
font-family: monospace;
font-style: italic }

IMG.toplogo{ vertical-align: middle }

IMG.arrow{ width: 30px;
height: 30px;
border: 0 }

span.acronym{font-size: small}
span.env{font-family: monospace}
span.file{font-family: monospace}
span.option{font-family: monospace}
span.pkg{font-weight: bold}
span.samp{font-family: monospace}

div.vignettes a:hover {
background: rgb(85%, 85%, 85%);
}

Binary file added ..Rcheck/FeatureHashing/libs/FeatureHashing.so
Binary file not shown.
Binary file added ..Rcheck/FeatureHashing/libs/symbols.rds
Binary file not shown.
3 changes: 3 additions & 0 deletions ..Rcheck/FeatureHashing/runTest.R
@@ -0,0 +1,3 @@
library(RUnit)
test <- defineTestSuite("FeatureHashing", dirs = "tests", testFileRegexp = "^test-")
runTestSuite(test)
5 changes: 3 additions & 2 deletions DESCRIPTION
Expand Up @@ -12,14 +12,15 @@ Description: Feature hashing, also called as the hashing trick, is a method to
hash values as indices directly. The method of feature hashing in this package
was proposed in Weinberger et al. (2009). The hashing algorithm is the murmurhash3
from the digest package. Please see the README.md for more information.
License: GPL(>= 3)
License: GPL(>= 3) | file LICENSE
Depends:
R (>= 3.1),
methods
Imports:
Rcpp (>= 0.11),
Matrix,
digest(>= 0.6.8)
digest(>= 0.6.8),
magrittr (>= 1.5)
LinkingTo: Rcpp, digest(>= 0.6.8), BH
Suggests: pack, RUnit
SystemRequirements: C++11
Expand Down
18 changes: 18 additions & 0 deletions FeatureHashing.Rproj
@@ -0,0 +1,18 @@
Version: 1.0

RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default

EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8

RnwWeave: Sweave
LaTeX: pdfLaTeX

BuildType: Package
PackageInstallArgs: --no-multiarch --with-keep.source
PackageCheckArgs: --as-cran
PackageRoxygenize: rd,namespace
2 changes: 2 additions & 0 deletions NAMESPACE
Expand Up @@ -7,6 +7,8 @@ export(tag)
import(digest)
importClassesFrom(Matrix,dgCMatrix)
importFrom(Rcpp,evalCpp)
importFrom(magrittr,"%<>%")
importFrom(magrittr,"%>%")
importFrom(methods,checkAtAssignment)
importFrom(methods,new)
useDynLib(FeatureHashing)
5 changes: 5 additions & 0 deletions R/RcppExports.R
Expand Up @@ -27,8 +27,13 @@ hash_h <- function(src) {
.Call('FeatureHashing_h', PACKAGE = 'FeatureHashing', src)
}

<<<<<<< HEAD
.hashed.model.matrix.dataframe <- function(tf, data, hash_size, transpose, retval, keep_hashing_mapping, is_xi) {
.Call('FeatureHashing_hashed_model_matrix_dataframe', PACKAGE = 'FeatureHashing', tf, data, hash_size, transpose, retval, keep_hashing_mapping, is_xi)
=======
.hashed.model.matrix.dataframe <- function(tf, data, hash_size, transpose, retval, keep_hashing_mapping) {
.Call('FeatureHashing_hashed_model_matrix_dataframe', PACKAGE = 'FeatureHashing', tf, data, hash_size, transpose, retval, keep_hashing_mapping)
>>>>>>> origin/master
}

Xv <- function(m, v, retval) {
Expand Down

0 comments on commit 9da7f9f

Please sign in to comment.