-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
10 changed files
with
450 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
#' @title Gradient Descent for GLM (constant step) | ||
#' @author Yijun Yang | ||
#' @description Implement a first-order solution for the GLM maximum likelihood problem using only gradient information, avoiding the Hessian matrix. | ||
#' @param X design matrix | ||
#' @param Y response vector | ||
#' @param mu_fun function from eta to the expected value | ||
#' @param maxit integer maximum number of iterations | ||
#' @param gamma The step size | ||
#' @param tol numeric tolerance parameter | ||
#' @return a list of beta coefficients | ||
#' @examples | ||
#' \dontrun{ | ||
#' glm_constant(X, Y, mu_fun = function(eta) 1/(1+exp(-eta)), var_fun = function(eta) eta) | ||
#' } | ||
#' @export | ||
|
||
glm_constant <- function(X, Y, | ||
mu_fun, var_fun, | ||
maxit = 1e6, | ||
tol = 1e-10, | ||
gamma = 1e-5){ | ||
|
||
beta <- matrix(rep(0, ncol(X), ncol = 1)) | ||
|
||
for (i in seq_len(maxit)){ | ||
beta_old <- beta | ||
grad <- t(X) %*% (var_fun(Y) - matrix(mu_fun(X %*% beta_old))) | ||
beta <- beta_old + gamma * grad | ||
if(sqrt(crossprod(beta - beta_old)) < tol) break | ||
} | ||
|
||
list(beta = beta) | ||
|
||
} | ||
|
||
|
||
#' @title Gradient Descent for GLM (adaptive step) | ||
#' @author Yijun Yang | ||
#' @description Implement a first-order solution for the GLM maximum likelihood problem using only gradient information, avoiding the Hessian matrix. | ||
#' @param X design matrix | ||
#' @param Y response vector | ||
#' @param mu_fun function from eta to the expected value | ||
#' @param maxit integer maximum number of iterations | ||
#' @param gamma The step size | ||
#' @param tol numeric tolerance parameter | ||
#' @param mom momentum parameter | ||
#' @return a list of beta coefficients | ||
#' @examples | ||
#' \dontrun{ | ||
#' glm_adapt(X, Y, mu_fun = function(eta) 1/(1+exp(-eta)), var_fun = function(eta) eta) | ||
#' } | ||
#' @export | ||
|
||
glm_adapt <- function(X, Y, | ||
mu_fun, var_fun, | ||
maxit = 1e6, | ||
tol = 1e-10, | ||
gamma = 1e-5, | ||
m = 0.9){ | ||
|
||
beta <- matrix(rep(0, ncol(X), ncol = 1)) | ||
mom <- matrix(rep(0, ncol(X), ncol = 1)) | ||
|
||
for (i in seq_len(maxit)){ | ||
beta_old <- beta | ||
mom_old <- mom | ||
grad <- gamma * t(X) %*% (var_fun(Y) - matrix(mu_fun(X %*% beta_old))) | ||
mom <- m*mom_old + grad | ||
beta <- beta_old + mom | ||
if(sqrt(crossprod(beta - beta_old)) < tol) break | ||
} | ||
|
||
list(beta = beta) | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
#' @title Multiclass Logistic Regression | ||
#' @author Yijun Yang | ||
#' @description Describe and implement a classification model generalizing logistic regression to accommodate more than two classes. | ||
#' @param form A formula | ||
#' @param d A dataframe | ||
#' @return (1) beta coefficients; (2) the misclassification error; (3) a detailed classification table | ||
#' @examples | ||
#' \dontrun{ | ||
#' multiclass_logistic(form = species ~ bill_length_mm + bill_depth_mm, d = penguinsi) | ||
#' } | ||
#' @export | ||
|
||
multiclass_logistic <- function (form, d){ | ||
|
||
mms <- make_model_matrices(form, d) | ||
X <- mms$X | ||
Y <- mms$Y | ||
|
||
level <- levels(as.data.frame(d)[,all.vars(form)[1]]) | ||
n <- length(level) | ||
|
||
fit = list() | ||
|
||
for (i in 1:n){ | ||
Y_tmp <- ifelse(Y == level[i], 1, 0) | ||
data_tmp <- d | ||
data_tmp[,all.vars(form)[1]] <- Y_tmp | ||
fit[[i]] <- glm(form, data = data_tmp, family = binomial) | ||
} | ||
|
||
# get the coefficients | ||
coef <- fit[[1]]$coefficients | ||
for (i in 2:n){ | ||
coef <- as.matrix(rbind(coef,fit[[i]]$coefficients)) | ||
} | ||
|
||
rownames(coef) <- level | ||
|
||
# get prediction | ||
prediction <- matrix(0, nrow = nrow(d), ncol = n) | ||
for (i in 1:n){ | ||
prediction[,i] <- predict(fit[[i]], type="response") | ||
} | ||
|
||
Y_hat <- apply(prediction, 1, which.max) | ||
Y_hat <- factor(Y_hat, levels = 1:n, labels = level) | ||
|
||
classerror <- mean(Y != Y_hat) | ||
classtable <- table(Y_hat, Y) | ||
|
||
ret <- list(Coefficients = coef, | ||
MisclassificationError = classerror, | ||
ClassificationTable = classtable) | ||
ret | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
library(magrittr) | ||
library(palmerpenguins) | ||
library(missForest) | ||
library(tibble) | ||
data(penguins) | ||
penguinsi <- penguins %>% | ||
as.data.frame() %>% | ||
missForest() %$% | ||
as_tibble(ximp) | ||
|
||
use_data(penguinsi) |
Binary file not shown.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
--- | ||
title: "homework-1" | ||
output: rmarkdown::html_vignette | ||
vignette: > | ||
%\VignetteIndexEntry{BIS557 HW1 Vignette} | ||
%\VignetteEngine{knitr::rmarkdown} | ||
%\VignetteEncoding{UTF-8} | ||
--- | ||
|
||
```{r, include = FALSE} | ||
knitr::opts_chunk$set( | ||
collapse = TRUE, | ||
comment = "#>" | ||
) | ||
``` | ||
|
||
# Homework Assignment | ||
|
||
This homework is due by the end of the day on September 18th 2020. | ||
|
||
0. If you don't already have one, create a Github handle. Add your name, | ||
netid, and Github handle to [this Google document(https://docs.google.com/document/d/18jyYEJCYTEHuavN8sBjgAt9cBTT6Yakoxnfx5hAiDR0/edit) | ||
1. Create an R package called `bis557`. | ||
2. Implement the `linear_model()` function and document it. It should | ||
take a formula, a data frame and a list of constasts for factor | ||
variables as input. | ||
3. Add a data.frame called `lm_patho` as a data set to the package. Use | ||
the `lm_path.csv` for the data. Create a `data-raw` directory in | ||
your package (hint: use `usethis::use_data_raw()` and | ||
`usethis::use_data()`) to add it to the package. Don’t forget to | ||
document it. | ||
4. Add the `test-linear-model.r` file to your regression testing (hint: | ||
use `usethis::use_testthat()` to create the directory structure). | ||
Make sure your implementation passes the tests. | ||
5. Implement gradient descent for ordinary least squares. | ||
6. Write test code for your gradient descent function. | ||
7. Add the package to [Travis-CI](https://travis-ci.com/) and update | ||
the `README.md` file so that the badge points to your Travis build | ||
(hint: use `usethis::use_travis()`). You will know it is working | ||
when the badge is green and gives the message “passing”. Exra hint: | ||
sign up for the Github student pack. | ||
8. Add code coverage (with either [Coveralls](https://coveralls.io/) or | ||
[CodeCov](https://codecov.io/)). Hint use `usethis::use_coverage()`). | ||
|
||
|
||
# Results | ||
|
||
- Two functions: `linear_model` and `gradient_descent` are created with documentation, and uploaded to [github](https://github.com/yijunyang/bis557) | ||
- The corresponding test files are also created and uploaded. | ||
|
Oops, something went wrong.