Skip to content

Commit

Permalink
upload homework-3
Browse files Browse the repository at this point in the history
  • Loading branch information
yijunyang committed Oct 30, 2020
1 parent cd822d5 commit d976f1f
Show file tree
Hide file tree
Showing 10 changed files with 450 additions and 0 deletions.
75 changes: 75 additions & 0 deletions R/gradient_descent_glm.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#' @title Gradient Descent for GLM (constant step)
#' @author Yijun Yang
#' @description Implement a first-order solution for the GLM maximum likelihood problem using only gradient information, avoiding the Hessian matrix.
#' @param X design matrix
#' @param Y response vector
#' @param mu_fun function from eta to the expected value
#' @param maxit integer maximum number of iterations
#' @param gamma The step size
#' @param tol numeric tolerance parameter
#' @return a list of beta coefficients
#' @examples
#' \dontrun{
#' glm_constant(X, Y, mu_fun = function(eta) 1/(1+exp(-eta)), var_fun = function(eta) eta)
#' }
#' @export

glm_constant <- function(X, Y,
mu_fun, var_fun,
maxit = 1e6,
tol = 1e-10,
gamma = 1e-5){

beta <- matrix(rep(0, ncol(X), ncol = 1))

for (i in seq_len(maxit)){
beta_old <- beta
grad <- t(X) %*% (var_fun(Y) - matrix(mu_fun(X %*% beta_old)))
beta <- beta_old + gamma * grad
if(sqrt(crossprod(beta - beta_old)) < tol) break
}

list(beta = beta)

}


#' @title Gradient Descent for GLM (adaptive step)
#' @author Yijun Yang
#' @description Implement a first-order solution for the GLM maximum likelihood problem using only gradient information, avoiding the Hessian matrix.
#' @param X design matrix
#' @param Y response vector
#' @param mu_fun function from eta to the expected value
#' @param maxit integer maximum number of iterations
#' @param gamma The step size
#' @param tol numeric tolerance parameter
#' @param mom momentum parameter
#' @return a list of beta coefficients
#' @examples
#' \dontrun{
#' glm_adapt(X, Y, mu_fun = function(eta) 1/(1+exp(-eta)), var_fun = function(eta) eta)
#' }
#' @export

glm_adapt <- function(X, Y,
mu_fun, var_fun,
maxit = 1e6,
tol = 1e-10,
gamma = 1e-5,
m = 0.9){

beta <- matrix(rep(0, ncol(X), ncol = 1))
mom <- matrix(rep(0, ncol(X), ncol = 1))

for (i in seq_len(maxit)){
beta_old <- beta
mom_old <- mom
grad <- gamma * t(X) %*% (var_fun(Y) - matrix(mu_fun(X %*% beta_old)))
mom <- m*mom_old + grad
beta <- beta_old + mom
if(sqrt(crossprod(beta - beta_old)) < tol) break
}

list(beta = beta)

}
55 changes: 55 additions & 0 deletions R/multiclass_logistic.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#' @title Multiclass Logistic Regression
#' @author Yijun Yang
#' @description Describe and implement a classification model generalizing logistic regression to accommodate more than two classes.
#' @param form A formula
#' @param d A dataframe
#' @return (1) beta coefficients; (2) the misclassification error; (3) a detailed classification table
#' @examples
#' \dontrun{
#' multiclass_logistic(form = species ~ bill_length_mm + bill_depth_mm, d = penguinsi)
#' }
#' @export

multiclass_logistic <- function (form, d){

mms <- make_model_matrices(form, d)
X <- mms$X
Y <- mms$Y

level <- levels(as.data.frame(d)[,all.vars(form)[1]])
n <- length(level)

fit = list()

for (i in 1:n){
Y_tmp <- ifelse(Y == level[i], 1, 0)
data_tmp <- d
data_tmp[,all.vars(form)[1]] <- Y_tmp
fit[[i]] <- glm(form, data = data_tmp, family = binomial)
}

# get the coefficients
coef <- fit[[1]]$coefficients
for (i in 2:n){
coef <- as.matrix(rbind(coef,fit[[i]]$coefficients))
}

rownames(coef) <- level

# get prediction
prediction <- matrix(0, nrow = nrow(d), ncol = n)
for (i in 1:n){
prediction[,i] <- predict(fit[[i]], type="response")
}

Y_hat <- apply(prediction, 1, which.max)
Y_hat <- factor(Y_hat, levels = 1:n, labels = level)

classerror <- mean(Y != Y_hat)
classtable <- table(Y_hat, Y)

ret <- list(Coefficients = coef,
MisclassificationError = classerror,
ClassificationTable = classtable)
ret
}
11 changes: 11 additions & 0 deletions data-raw/penguinsi.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
library(magrittr)
library(palmerpenguins)
library(missForest)
library(tibble)
data(penguins)
penguinsi <- penguins %>%
as.data.frame() %>%
missForest() %$%
as_tibble(ximp)

use_data(penguinsi)
Binary file added data/penguinsi.rda
Binary file not shown.
46 changes: 46 additions & 0 deletions man/glm_adapt.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

35 changes: 35 additions & 0 deletions man/glm_constant.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 27 additions & 0 deletions man/multiclass_logistic.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

50 changes: 50 additions & 0 deletions vignettes/homework-1.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
---
title: "homework-1"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{BIS557 HW1 Vignette}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---

```{r, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
```

# Homework Assignment

This homework is due by the end of the day on September 18th 2020.

0. If you don't already have one, create a Github handle. Add your name,
netid, and Github handle to [this Google document(https://docs.google.com/document/d/18jyYEJCYTEHuavN8sBjgAt9cBTT6Yakoxnfx5hAiDR0/edit)
1. Create an R package called `bis557`.
2. Implement the `linear_model()` function and document it. It should
take a formula, a data frame and a list of constasts for factor
variables as input.
3. Add a data.frame called `lm_patho` as a data set to the package. Use
the `lm_path.csv` for the data. Create a `data-raw` directory in
your package (hint: use `usethis::use_data_raw()` and
`usethis::use_data()`) to add it to the package. Don’t forget to
document it.
4. Add the `test-linear-model.r` file to your regression testing (hint:
use `usethis::use_testthat()` to create the directory structure).
Make sure your implementation passes the tests.
5. Implement gradient descent for ordinary least squares.
6. Write test code for your gradient descent function.
7. Add the package to [Travis-CI](https://travis-ci.com/) and update
the `README.md` file so that the badge points to your Travis build
(hint: use `usethis::use_travis()`). You will know it is working
when the badge is green and gives the message “passing”. Exra hint:
sign up for the Github student pack.
8. Add code coverage (with either [Coveralls](https://coveralls.io/) or
[CodeCov](https://codecov.io/)). Hint use `usethis::use_coverage()`).


# Results

- Two functions: `linear_model` and `gradient_descent` are created with documentation, and uploaded to [github](https://github.com/yijunyang/bis557)
- The corresponding test files are also created and uploaded.

0 comments on commit d976f1f

Please sign in to comment.