<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc" style="margin-top: 1em;"><ul class="toc-item"><li><span><a href="#Motivation" data-toc-modified-id="Motivation-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Motivation</a></span><ul class="toc-item"><li><ul class="toc-item"><li><span><a href="#Generate-a-sample-dataset" data-toc-modified-id="Generate-a-sample-dataset-1.0.1"><span class="toc-item-num">1.0.1&nbsp;&nbsp;</span>Generate a sample dataset</a></span></li><li><span><a href="#lapply-is-equivalent-to-the-following" data-toc-modified-id="lapply-is-equivalent-to-the-following-1.0.2"><span class="toc-item-num">1.0.2&nbsp;&nbsp;</span>lapply is equivalent to the following</a></span></li></ul></li><li><span><a href="#extra-argumentを使うのは必ずしもうまくいかない" data-toc-modified-id="extra-argumentを使うのは必ずしもうまくいかない-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>extra argumentを使うのは必ずしもうまくいかない</a></span></li><li><span><a href="#コードで似たような表現を繰り返すためにbugが起こりやすい例" data-toc-modified-id="コードで似たような表現を繰り返すためにbugが起こりやすい例-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>コードで似たような表現を繰り返すためにbugが起こりやすい例</a></span></li><li><span><a href="#以下のようにしたほうが良い" data-toc-modified-id="以下のようにしたほうが良い-1.3"><span class="toc-item-num">1.3&nbsp;&nbsp;</span>以下のようにしたほうが良い</a></span></li></ul></li><li><span><a href="#anonymous-function" data-toc-modified-id="anonymous-function-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>anonymous function</a></span></li></ul></div>

# Motivation

### Generate a sample dataset

In [3]:
set.seed(1014)
df <- data.frame(replicate(6, sample(c(1:10, -99), 6, rep = TRUE)))
names(df) <- letters[1:6]
df

a,b,c,d,e,f
1,6,1,5,-99,1
10,4,4,-99,9,3
7,9,5,4,1,4
2,9,3,8,6,8
1,10,5,9,8,6
6,2,1,3,8,5


### lapply is equivalent to the following

In [10]:
my_lapply = function(x , f, ...) {
    out <- vector("list", length(x))
    for (i in seq_along(x)) {
      out[[i]] <- f(x[[i]], ...)
    }
    out
    }

In [7]:
fix_missing <- function(x) {
  x[x == -99] <- NA
  x
}

In [8]:
lapply(df , fix_missing)

In [11]:
my_lapply(df , fix_missing)

In [None]:
missing_fixer <- function(na_value) {
  function(x) {
    x[x == na_value] <- NA
    x
  }
}
fix_missing_99 <- missing_fixer(-99)
fix_missing_999 <- missing_fixer(-999)

In [14]:
fix_missing_99(c(-99, -999))

In [15]:
fix_missing_999(c(-99,-999))

## extra argumentを使うのは必ずしもうまくいかない

extra argumentを使う例：

In [16]:
fix_missing <- function(x, na.value) {
  x[x == na.value] <- NA
  x
}

## コードで似たような表現を繰り返すためにbugが起こりやすい例

In [17]:
summary <- function(x) {
  c(mean(x), median(x), sd(x), mad(x), IQR(x))
}
lapply(df, summary)

In [18]:
summary <- function(x) {
 c(mean(x, na.rm = TRUE),
   median(x, na.rm = TRUE),
   sd(x, na.rm = TRUE),
   mad(x, na.rm = TRUE),
   IQR(x, na.rm = TRUE))
}

In [20]:
lapply(df , summary)

## 以下のようにしたほうが良い

In [22]:
summary <- function(x) {
  funs <- c(mean, median, sd, mad, IQR)
  lapply(funs, function(f) f(x, na.rm = TRUE))
}

In [27]:
summary(c(1,2))

In [28]:
lapply(df, summary)

# anonymous function

In [1]:
options(repos = structure(c(CRAN = "http://cran.ism.ac.jp/")))

In [4]:
# Generate a sample dataset
set.seed(1014)
df <- data.frame(replicate(6, sample(c(1:10, -99), 6, rep = TRUE)))
names(df) <- letters[1:6]
df

a,b,c,d,e,f
1,6,1,5,-99,1
10,4,4,-99,9,3
7,9,5,4,1,4
2,9,3,8,6,8
1,10,5,9,8,6
6,2,1,3,8,5


In [6]:
replicate(6, sample(c(1:10, -99), 6, rep = TRUE))

0,1,2,3,4,5
8,8,-99,6,10,-99
-99,8,5,9,4,6
2,2,6,3,7,6
3,9,4,8,4,4
8,9,2,1,7,5
6,-99,6,4,3,5


In [7]:
fix_missing <- function(x) {
  x[x == -99] <- NA
  x
}
df[] <- lapply(df, fix_missing)

In [8]:
df

a,b,c,d,e,f
1,6,1,5.0,,1
10,4,4,,9.0,3
7,9,5,4.0,1.0,4
2,9,3,8.0,6.0,8
1,10,5,9.0,8.0,6
6,2,1,3.0,8.0,5


In [11]:
library(dplyr)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union



In [12]:
df$a %>% length


In [15]:
tmp = df

In [18]:
data.frame(lapply(df , length))

a,b,c,d,e,f
6,6,6,6,6,6


In [17]:
tmp

a,b,c,d,e,f
6,6,6,6,6,6
6,6,6,6,6,6
6,6,6,6,6,6
6,6,6,6,6,6
6,6,6,6,6,6
6,6,6,6,6,6


In [19]:
missing_fixer <- function(na_value) {
  function(x) {
    x[x == na_value] <- NA
    x
  }
}
fix_missing_99 <- missing_fixer(-99)
fix_missing_999 <- missing_fixer(-999)

fix_missing_99(c(-99, -999))