Skip to content

Commit

Permalink
Testing out a few things
Browse files Browse the repository at this point in the history
  • Loading branch information
vietexob committed Nov 24, 2014
1 parent 44ae434 commit 59d7230
Show file tree
Hide file tree
Showing 7 changed files with 15,097 additions and 34,019 deletions.
74 changes: 74 additions & 0 deletions .Rapp.history
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
version()
help()
quit()
library(rj)
quit()
?ln
?log
1/0.05 * (10 * log(3) + log(1/0.05))
quit()
epsilon <- 0.05
delta <- 0.05
vch <- 56
1/epsilon(4*log2(2/delta) + 8*vch*log2(13/epsilon))
1/epsilon * (4*log2(2/delta) + 8*vch*log2(13/epsilon))
vch
delta
epsilon
quit()
install.packages("R.matlab")
library(R.matlab)
?R.matlab
quit()
install.packages("neuralnet")
library(neuralnet)
getwd()
ls()
?multinom
library(nnet)
?multinom
getwd()
library(nnet)
data(iris)
set.seed(1234)
x <- iris[sample(1:nrow(iris)), ]
dim(x)
x <- cbind(x, useless = rnorm(nrow(x)))
?cbind
x[, 1] <- x[, 1] + rnorm(nrow(x))
x[, 2] <- x[, 2] + rnorm(nrow(x))
x[, 3] <- x[, 3] + rnorm(nrow(x))
plot(x, col = x$Species)
names(x)
head(x)
table(x$Species)
train <- x[1:100, ]
test <- x[101:150, ]
library(caret)
install.packages("caret")
library(caret)
model.knn3 <- knn3(Species ~ ., k = 5, data = train)
?knn3
model.knn3
summary(model.knn3)
predict(model.knn3, test, type = "class")
table(true = test$Species, predicted = predict(model.knn3, test, type = "class"))
?nnet
model.nnet <- nnet(Species ~ ., data = train, size = 10)
model.nnet
predict(model_nnet, test)
predict(model.nnet, test, type = "class")
predict(model.nnet, test)
table(true = test$Species, predicted = predict(model.nnet, test, type = "class"))
?nnet
quit()
setwd("./Documents/workspace/601-project-new/")
list.files()
install.packages("randomForest")
library(randomForest)
rfNews()
?randomForest
source("./code/runHCL-SVM.R")
out.filename
ls()
quit()
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.Rproj.user
.Rhistory
.RData
30 changes: 16 additions & 14 deletions code/runHCL-SVM.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,23 @@ Y.train <- read.csv(file = Y.train.filename, header = FALSE)
Y.factor <- as.factor(Y.train$V1)

train.data <- as.matrix(X.train)
test.data <- as.matrix(X.test)
train.pca <- prcomp(train.data, scale. = TRUE, tol = 0)
test.pca <- prcomp(test.data, scale. = TRUE, tol = 0)

# Choose the number of PC's
epsilon <- 0.015
nPrinComps <- 20
nPrinComps <- determineNumPrinComps(train.data, train.pca, epsilon, nPrinComps)
# # Choose the number of PC's
# epsilon <- 0.015
# nPrinComps <- 20
# nPrinComps <- determineNumPrinComps(train.data, train.pca, epsilon, nPrinComps)
nPrinComps <- 500

out.filename <- "./figures/agnes-train-ward.pdf"
# out.filename <- "./figures/agnes-train-ward.pdf"
out.filename <- ""
K <- 4 # choose 4 clusters
train.data.cl <- getAgnesClusters(train.data, train.pca, nPrinComps, out.filename, K)
projTrainData <- train.data.cl[, 1:nPrinComps]
projTestData <- test.pca$x # get the rotated rata
projTestData <- projTestData[, 1:nPrinComps]

# Plot the K clusters projected on the first 2 PC's
out.filename <- paste("./figures/agnes-train-cl-", K, ".pdf", sep = "")
Expand All @@ -39,13 +45,8 @@ mainStr <- paste("Hierarchical Clustering with K =", K)
plot(projTrainData, col = train.data.cl$Cluster, main = mainStr)
dev.off()

test.data <- as.matrix(X.test)
# Use k-NN to predict the class of test data
test.cl <- knn(train.data, test.data, cl = train.data.cl$Cluster, k = 5)

test.pca <- prcomp(test.data, scale. = TRUE, tol = 0)
projTestData <- test.pca$x # get the rotated rata
projTestData <- projTestData[, 1:nPrinComps]
test.cl <- knn(projTrainData, projTestData, cl = train.data.cl$Cluster, k = 5)

# Plot the K clusters projected on the first 2 PC's
out.filename <- paste("./figures/knn-test-cl-", K, ".pdf", sep = "")
Expand All @@ -54,11 +55,12 @@ mainStr <- paste("k-NN Clustering with K =", K)
plot(projTestData, col = test.cl, main = mainStr)
dev.off()

new.train.data <- as.data.frame(train.data)
new.train.data <- cbind(new.train.data, class = Y.factor)
all.preds <- data.frame()
for(k in 1:K) {
subset.train <- train.data[train.data.cl$Cluster == k, ]
subset.Y.factor <- Y.factor[train.data.cl$Cluster == k]
subset.svm <- svm(x = subset.train, y = subset.Y.factor, kernel = "radial")
subset.train <- new.train.data[train.data.cl$Cluster == k, ]
subset.svm <- svm(class ~ ., data = subset.train, kernel = "radial")

subset.test <- test.data[test.cl == k, ]
subset.indices <- which(test.cl == k)
Expand Down
6 changes: 3 additions & 3 deletions code/runPCA-ANN.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ projData <- projData[, 1:nPrinComps]
new.train.data <- projData[1:nrow(train.data), ]
new.train.data <- as.data.frame(new.train.data)
new.train.data <- cbind(new.train.data, class = Y.factor)
model.ann <- nnet(class ~ ., data = new.train.data, size = 20,
linout = TRUE, MaxNWts = 3000)
model.ann <- nnet(class ~ ., data = new.train.data, size = 250,
linout = TRUE, MaxNWts = 300000)

new.test.data <- projData[(nrow(train.data)+1):nrow(projData), ]
preds <- predict(model.ann, new.test.data)
Expand All @@ -43,5 +43,5 @@ preds <- predict(model.ann, new.test.data)
cat.preds <- max.col(preds)
pred.data <- data.frame(Id = 1:nrow(test.data), Category = cat.preds)

out.filename <- "./data/ANN-PCA.csv"
out.filename <- paste("./data/ANN-PCA-", nPrinComps, ".csv", sep = "")
write.csv(pred.data, file = out.filename, row.names = FALSE)
Loading

0 comments on commit 59d7230

Please sign in to comment.