upload homework-3

yijunyang · Oct 30, 2020 · d976f1f · d976f1f
1 parent cd822d5
commit d976f1f
Show file tree

Hide file tree

Showing 10 changed files with 450 additions and 0 deletions.
diff --git a/R/gradient_descent_glm.R b/R/gradient_descent_glm.R
@@ -0,0 +1,75 @@
+#' @title Gradient Descent for GLM (constant step)
+#' @author Yijun Yang
+#' @description Implement a first-order solution for the GLM maximum likelihood problem using only gradient information, avoiding the Hessian matrix.
+#' @param X design matrix
+#' @param Y response vector
+#' @param mu_fun function from eta to the expected value
+#' @param maxit integer maximum number of iterations
+#' @param gamma The step size
+#' @param tol numeric tolerance parameter
+#' @return a list of beta coefficients
+#' @examples
+#' \dontrun{
+#' glm_constant(X, Y, mu_fun = function(eta) 1/(1+exp(-eta)), var_fun = function(eta) eta)
+#' }
+#' @export
+
+glm_constant <- function(X, Y,
+                         mu_fun, var_fun,
+                         maxit = 1e6,
+                         tol = 1e-10,
+                         gamma = 1e-5){
+
+  beta <- matrix(rep(0, ncol(X), ncol = 1))
+
+  for (i in seq_len(maxit)){
+    beta_old <- beta
+    grad <- t(X) %*% (var_fun(Y) - matrix(mu_fun(X %*% beta_old)))
+    beta <- beta_old + gamma * grad
+    if(sqrt(crossprod(beta - beta_old)) < tol) break
+  }
+
+  list(beta = beta)
+
+}
+
+
+#' @title Gradient Descent for GLM (adaptive step)
+#' @author Yijun Yang
+#' @description Implement a first-order solution for the GLM maximum likelihood problem using only gradient information, avoiding the Hessian matrix.
+#' @param X design matrix
+#' @param Y response vector
+#' @param mu_fun function from eta to the expected value
+#' @param maxit integer maximum number of iterations
+#' @param gamma The step size
+#' @param tol numeric tolerance parameter
+#' @param mom momentum parameter
+#' @return a list of beta coefficients
+#' @examples
+#' \dontrun{
+#' glm_adapt(X, Y, mu_fun = function(eta) 1/(1+exp(-eta)), var_fun = function(eta) eta)
+#' }
+#' @export
+
+glm_adapt <- function(X, Y,
+                      mu_fun, var_fun,
+                      maxit = 1e6,
+                      tol = 1e-10,
+                      gamma = 1e-5,
+                      m = 0.9){
+
+  beta <- matrix(rep(0, ncol(X), ncol = 1))
+  mom <- matrix(rep(0, ncol(X), ncol = 1))
+
+  for (i in seq_len(maxit)){
+    beta_old <- beta
+    mom_old <- mom
+    grad <- gamma * t(X) %*% (var_fun(Y) - matrix(mu_fun(X %*% beta_old)))
+    mom <- m*mom_old + grad
+    beta <- beta_old + mom
+    if(sqrt(crossprod(beta - beta_old)) < tol) break
+  }
+
+  list(beta = beta)
+
+}
diff --git a/R/multiclass_logistic.R b/R/multiclass_logistic.R
@@ -0,0 +1,55 @@
+#' @title Multiclass Logistic Regression
+#' @author Yijun Yang
+#' @description Describe and implement a classification model generalizing logistic regression to accommodate more than two classes.
+#' @param form A formula
+#' @param d A dataframe
+#' @return (1) beta coefficients; (2) the misclassification error; (3) a detailed classification table
+#' @examples
+#' \dontrun{
+#' multiclass_logistic(form = species ~ bill_length_mm + bill_depth_mm, d = penguinsi)
+#' }
+#' @export
+
+multiclass_logistic <- function (form, d){
+
+  mms <- make_model_matrices(form, d)
+  X <- mms$X
+  Y <- mms$Y
+
+  level <- levels(as.data.frame(d)[,all.vars(form)[1]])
+  n <- length(level)
+
+  fit = list()
+
+  for (i in 1:n){
+    Y_tmp <- ifelse(Y == level[i], 1, 0)
+    data_tmp <- d
+    data_tmp[,all.vars(form)[1]] <- Y_tmp
+    fit[[i]] <- glm(form, data = data_tmp, family = binomial)
+  }
+
+  # get the coefficients
+  coef <- fit[[1]]$coefficients
+  for (i in 2:n){
+  coef <- as.matrix(rbind(coef,fit[[i]]$coefficients))
+  }
+
+  rownames(coef) <- level
+
+  # get prediction
+  prediction <- matrix(0, nrow = nrow(d), ncol = n)
+  for (i in 1:n){
+    prediction[,i] <- predict(fit[[i]], type="response")
+  }
+
+  Y_hat <- apply(prediction, 1, which.max)
+  Y_hat <- factor(Y_hat, levels = 1:n, labels = level)
+
+  classerror <- mean(Y != Y_hat)
+  classtable <- table(Y_hat, Y)
+
+  ret <- list(Coefficients = coef,
+              MisclassificationError = classerror,
+              ClassificationTable = classtable)
+  ret
+}
diff --git a/data-raw/penguinsi.R b/data-raw/penguinsi.R
@@ -0,0 +1,11 @@
+library(magrittr)
+library(palmerpenguins)
+library(missForest)
+library(tibble)
+data(penguins)
+penguinsi <- penguins %>%
+  as.data.frame() %>%
+  missForest() %$%
+  as_tibble(ximp)
+
+use_data(penguinsi)
diff --git a/data/penguinsi.rda b/data/penguinsi.rda
diff --git a/man/glm_adapt.Rd b/man/glm_adapt.Rd
diff --git a/man/glm_constant.Rd b/man/glm_constant.Rd
diff --git a/man/multiclass_logistic.Rd b/man/multiclass_logistic.Rd
diff --git a/vignettes/homework-1.Rmd b/vignettes/homework-1.Rmd
@@ -0,0 +1,50 @@
+---
+title: "homework-1"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{BIS557 HW1 Vignette}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+# Homework Assignment
+
+This homework is due by the end of the day on September 18th 2020.
+
+0.  If you don't already have one, create a Github handle. Add your name, 
+    netid, and Github handle to [this Google document(https://docs.google.com/document/d/18jyYEJCYTEHuavN8sBjgAt9cBTT6Yakoxnfx5hAiDR0/edit)
+1.  Create an R package called `bis557`.
+2.  Implement the `linear_model()` function and document it. It should
+    take a formula, a data frame and a list of constasts for factor
+    variables as input.
+3.  Add a data.frame called `lm_patho` as a data set to the package. Use
+    the `lm_path.csv` for the data. Create a `data-raw` directory in
+    your package (hint: use `usethis::use_data_raw()` and
+    `usethis::use_data()`) to add it to the package. Don’t forget to
+    document it.
+4.  Add the `test-linear-model.r` file to your regression testing (hint:
+    use `usethis::use_testthat()` to create the directory structure).
+    Make sure your implementation passes the tests.
+5.  Implement gradient descent for ordinary least squares.
+6.  Write test code for your gradient descent function.
+7.  Add the package to [Travis-CI](https://travis-ci.com/) and update
+    the `README.md` file so that the badge points to your Travis build
+    (hint: use `usethis::use_travis()`). You will know it is working
+    when the badge is green and gives the message “passing”. Exra hint:
+    sign up for the Github student pack.
+8.  Add code coverage (with either [Coveralls](https://coveralls.io/) or
+    [CodeCov](https://codecov.io/)). Hint use `usethis::use_coverage()`).
+
+
+# Results
+
+- Two functions: `linear_model` and `gradient_descent` are created with documentation, and uploaded to [github](https://github.com/yijunyang/bis557)  
+- The corresponding test files are also created and uploaded.  
+