Skip to content

Commit c35be9d

Browse files
jameslambhcho3
andauthored
[R] replace uses of T and F with TRUE and FALSE (dmlc#5778)
* [R-package] replace uses of T and F with TRUE and FALSE * enable linting * Remove skip Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
1 parent cb7f7e5 commit c35be9d

15 files changed

+32
-33
lines changed

R-package/demo/basic_walkthrough.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ print(paste("test-error=", err))
100100

101101
# You can dump the tree you learned using xgb.dump into a text file
102102
dump_path = file.path(tempdir(), 'dump.raw.txt')
103-
xgb.dump(bst, dump_path, with_stats = T)
103+
xgb.dump(bst, dump_path, with_stats = TRUE)
104104

105105
# Finally, you can check which features are the most important.
106106
print("Most important features (look at column Gain):")

R-package/demo/caret_wrapper.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ require(e1071)
99
# Load Arthritis dataset in memory.
1010
data(Arthritis)
1111
# Create a copy of the dataset with data.table package (data.table is 100% compliant with R dataframe but its syntax is a lot more consistent and its performance are really good).
12-
df <- data.table(Arthritis, keep.rownames = F)
12+
df <- data.table(Arthritis, keep.rownames = FALSE)
1313

1414
# Let's add some new categorical features to see if it helps. Of course these feature are highly correlated to the Age feature. Usually it's not a good thing in ML, but Tree algorithms (including boosted trees) are able to select the best features, even in case of highly correlated features.
1515
# For the first feature we create groups of age by rounding the real age. Note that we transform it to factor (categorical data) so the algorithm treat them as independant values.

R-package/demo/create_sparse_matrix.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ if (!require(vcd)) {
1919
data(Arthritis)
2020

2121
# create a copy of the dataset with data.table package (data.table is 100% compliant with R dataframe but its syntax is a lot more consistent and its performance are really good).
22-
df <- data.table(Arthritis, keep.rownames = F)
22+
df <- data.table(Arthritis, keep.rownames = FALSE)
2323

2424
# Let's have a look to the data.table
2525
cat("Print the dataset\n")

R-package/demo/interaction_constraints.R

+4-4
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,18 @@ treeInteractions <- function(input_tree, input_max_depth){
1919
setorderv(parents_left, 'ID_merge')
2020
setorderv(parents_right, 'ID_merge')
2121

22-
trees <- merge(trees, parents_left, by='ID_merge', all.x=T)
22+
trees <- merge(trees, parents_left, by='ID_merge', all.x=TRUE)
2323
trees[!is.na(i.id), c(paste0('parent_', i-1), paste0('parent_feat_', i-1)):=list(i.id, i.feature)]
2424
trees[, c('i.id','i.feature'):=NULL]
2525

26-
trees <- merge(trees, parents_right, by='ID_merge', all.x=T)
26+
trees <- merge(trees, parents_right, by='ID_merge', all.x=TRUE)
2727
trees[!is.na(i.id), c(paste0('parent_', i-1), paste0('parent_feat_', i-1)):=list(i.id, i.feature)]
2828
trees[, c('i.id','i.feature'):=NULL]
2929
}
3030

3131
# Extract nodes with interactions
3232
interaction_trees <- trees[!is.na(Split) & !is.na(parent_1),
33-
c('Feature',paste0('parent_feat_',1:(input_max_depth-1))), with=F]
33+
c('Feature',paste0('parent_feat_',1:(input_max_depth-1))), with=FALSE]
3434
interaction_trees_split <- split(interaction_trees, 1:nrow(interaction_trees))
3535
interaction_list <- lapply(interaction_trees_split, as.character)
3636

@@ -96,7 +96,7 @@ x1 <- sort(unique(x[['V1']]))
9696
for (i in 1:length(x1)){
9797
testdata <- copy(x[, -c('V1')])
9898
testdata[['V1']] <- x1[i]
99-
testdata <- testdata[, paste0('V',1:10), with=F]
99+
testdata <- testdata[, paste0('V',1:10), with=FALSE]
100100
pred <- predict(bst3, as.matrix(testdata))
101101

102102
# Should not print out anything due to monotonic constraints

R-package/demo/tweedie_regression.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ exclude <- c('POLICYNO', 'PLCYDATE', 'CLM_FREQ5', 'CLM_AMT5', 'CLM_FLAG', 'IN_Y
1313
# retains the missing values
1414
# NOTE: this dataset is comes ready out of the box
1515
options(na.action = 'na.pass')
16-
x <- sparse.model.matrix(~ . - 1, data = dt[, -exclude, with = F])
16+
x <- sparse.model.matrix(~ . - 1, data = dt[, -exclude, with = FALSE])
1717
options(na.action = 'na.omit')
1818

1919
# response

R-package/tests/testthat/test_helpers.R

+5-5
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ flag_32bit = .Machine$sizeof.pointer != 8
1212

1313
set.seed(1982)
1414
data(Arthritis)
15-
df <- data.table(Arthritis, keep.rownames = F)
15+
df <- data.table(Arthritis, keep.rownames = FALSE)
1616
df[,AgeDiscret := as.factor(round(Age / 10,0))]
1717
df[,AgeCat := as.factor(ifelse(Age > 30, "Old", "Young"))]
1818
df[,ID := NULL]
@@ -47,7 +47,7 @@ test_that("xgb.dump works", {
4747
if (!flag_32bit)
4848
expect_length(xgb.dump(bst.Tree), 200)
4949
dump_file = file.path(tempdir(), 'xgb.model.dump')
50-
expect_true(xgb.dump(bst.Tree, dump_file, with_stats = T))
50+
expect_true(xgb.dump(bst.Tree, dump_file, with_stats = TRUE))
5151
expect_true(file.exists(dump_file))
5252
expect_gt(file.size(dump_file), 8000)
5353

@@ -160,16 +160,16 @@ test_that("SHAPs sum to predictions, with or without DART", {
160160
objective = "reg:squarederror",
161161
eval_metric = "rmse"),
162162
if (booster == "dart")
163-
list(rate_drop = .01, one_drop = T)),
163+
list(rate_drop = .01, one_drop = TRUE)),
164164
data = d,
165165
label = y,
166166
nrounds = nrounds)
167167

168168
pr <- function(...)
169169
predict(fit, newdata = d, ...)
170170
pred <- pr()
171-
shap <- pr(predcontrib = T)
172-
shapi <- pr(predinteraction = T)
171+
shap <- pr(predcontrib = TRUE)
172+
shapi <- pr(predinteraction = TRUE)
173173
tol = 1e-5
174174

175175
expect_equal(rowSums(shap), pred, tol = tol)

R-package/tests/testthat/test_interactions.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ test_that("SHAP contribution values are not NAN", {
107107

108108
shaps <- as.data.frame(predict(fit,
109109
newdata = as.matrix(subset(d, fold == 1)[, ivs]),
110-
predcontrib = T))
110+
predcontrib = TRUE))
111111
result <- cbind(shaps, sum = rowSums(shaps), pred = predict(fit,
112112
newdata = as.matrix(subset(d, fold == 1)[, ivs])))
113113

R-package/tests/testthat/test_lint.R

+3-4
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
context("Code is of high quality and lint free")
22
test_that("Code Lint", {
33
skip_on_cran()
4-
skip_on_travis()
5-
skip_if_not_installed("lintr")
64
my_linters <- list(
75
absolute_paths_linter=lintr::absolute_paths_linter,
86
assignment_linter=lintr::assignment_linter,
@@ -21,7 +19,8 @@ test_that("Code Lint", {
2119
spaces_inside_linter=lintr::spaces_inside_linter,
2220
spaces_left_parentheses_linter=lintr::spaces_left_parentheses_linter,
2321
trailing_blank_lines_linter=lintr::trailing_blank_lines_linter,
24-
trailing_whitespace_linter=lintr::trailing_whitespace_linter
22+
trailing_whitespace_linter=lintr::trailing_whitespace_linter,
23+
true_false=lintr::T_and_F_symbol_linter
2524
)
26-
# lintr::expect_lint_free(linters=my_linters) # uncomment this if you want to check code quality
25+
lintr::expect_lint_free(linters=my_linters) # uncomment this if you want to check code quality
2726
})

R-package/vignettes/discoverYourData.Rmd

+1-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ The first step is to load `Arthritis` dataset in memory and wrap it with `data.t
6363

6464
```{r, results='hide'}
6565
data(Arthritis)
66-
df <- data.table(Arthritis, keep.rownames = F)
66+
df <- data.table(Arthritis, keep.rownames = FALSE)
6767
```
6868

6969
> `data.table` is 100% compliant with **R** `data.frame` but its syntax is more consistent and its performance for large dataset is [best in class](http://stackoverflow.com/questions/21435339/data-table-vs-dplyr-can-one-do-something-well-the-other-cant-or-does-poorly) (`dplyr` from **R** and `Pandas` from **Python** [included](https://github.com/Rdatatable/data.table/wiki/Benchmarks-%3A-Grouping)). Some parts of **Xgboost** **R** package use `data.table`.

R-package/vignettes/xgboostPresentation.Rmd

+1-1
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@ xgb.plot.importance(importance_matrix = importance_matrix)
363363
You can dump the tree you learned using `xgb.dump` into a text file.
364364

365365
```{r dump, message=T, warning=F}
366-
xgb.dump(bst, with_stats = T)
366+
xgb.dump(bst, with_stats = TRUE)
367367
```
368368

369369
You can plot the trees from your model using ```xgb.plot.tree``

demo/data/gen_autoclaims.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,5 @@ data$STATE = as.factor(data$STATE)
1414
data$CLASS = as.factor(data$CLASS)
1515
data$GENDER = as.factor(data$GENDER)
1616

17-
data.dummy <- dummy.data.frame(data, dummy.class='factor', omit.constants=T);
17+
data.dummy <- dummy.data.frame(data, dummy.class='factor', omit.constants=TRUE);
1818
write.table(data.dummy, 'autoclaims.csv', sep=',', row.names=F, col.names=F, quote=F)

demo/kaggle-otto/otto_train_pred.R

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
require(xgboost)
22
require(methods)
33

4-
train = read.csv('data/train.csv',header=TRUE,stringsAsFactors = F)
5-
test = read.csv('data/test.csv',header=TRUE,stringsAsFactors = F)
4+
train = read.csv('data/train.csv',header=TRUE,stringsAsFactors = FALSE)
5+
test = read.csv('data/test.csv',header=TRUE,stringsAsFactors = FALSE)
66
train = train[,-1]
77
test = test[,-1]
88

demo/kaggle-otto/understandingXGBoostModel.Rmd

+8-8
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ require(xgboost)
3030
require(methods)
3131
require(data.table)
3232
require(magrittr)
33-
train <- fread('data/train.csv', header = T, stringsAsFactors = F)
34-
test <- fread('data/test.csv', header=TRUE, stringsAsFactors = F)
33+
train <- fread('data/train.csv', header = T, stringsAsFactors = FALSE)
34+
test <- fread('data/test.csv', header=TRUE, stringsAsFactors = FALSE)
3535
```
3636
> `magrittr` and `data.table` are here to make the code cleaner and much more rapid.
3737
@@ -42,13 +42,13 @@ Let's explore the dataset.
4242
dim(train)
4343
4444
# Training content
45-
train[1:6,1:5, with =F]
45+
train[1:6,1:5, with =FALSE]
4646
4747
# Test dataset dimensions
4848
dim(test)
4949
5050
# Test content
51-
test[1:6,1:5, with =F]
51+
test[1:6,1:5, with =FALSE]
5252
```
5353
> We only display the 6 first rows and 5 first columns for convenience
5454
@@ -70,7 +70,7 @@ According to its description, the **Otto** challenge is a multi class classifica
7070

7171
```{r searchLabel}
7272
# Check the content of the last column
73-
train[1:6, ncol(train), with = F]
73+
train[1:6, ncol(train), with = FALSE]
7474
# Save the name of the last column
7575
nameLastCol <- names(train)[ncol(train)]
7676
```
@@ -86,7 +86,7 @@ For that purpose, we will:
8686

8787
```{r classToIntegers}
8888
# Convert from classes to numbers
89-
y <- train[, nameLastCol, with = F][[1]] %>% gsub('Class_','',.) %>% {as.integer(.) -1}
89+
y <- train[, nameLastCol, with = FALSE][[1]] %>% gsub('Class_','',.) %>% {as.integer(.) -1}
9090
9191
# Display the first 5 levels
9292
y[1:5]
@@ -95,7 +95,7 @@ y[1:5]
9595
We remove label column from training dataset, otherwise **XGBoost** would use it to guess the labels!
9696

9797
```{r deleteCols, results='hide'}
98-
train[, nameLastCol:=NULL, with = F]
98+
train[, nameLastCol:=NULL, with = FALSE]
9999
```
100100

101101
`data.table` is an awesome implementation of data.frame, unfortunately it is not a format supported natively by **XGBoost**. We need to convert both datasets (training and test) in `numeric` Matrix format.
@@ -163,7 +163,7 @@ Each *split* is done on one feature only at one value.
163163
Let's see what the model looks like.
164164

165165
```{r modelDump}
166-
model <- xgb.dump(bst, with.stats = T)
166+
model <- xgb.dump(bst, with.stats = TRUE)
167167
model[1:10]
168168
```
169169
> For convenience, we are displaying the first 10 lines of the model only.

doc/R-package/discoverYourData.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ The first step is to load `Arthritis` dataset in memory and wrap it with `data.t
5252

5353
```r
5454
data(Arthritis)
55-
df <- data.table(Arthritis, keep.rownames = F)
55+
df <- data.table(Arthritis, keep.rownames = FALSE)
5656
```
5757

5858
> `data.table` is 100% compliant with **R** `data.frame` but its syntax is more consistent and its performance for large dataset is [best in class](http://stackoverflow.com/questions/21435339/data-table-vs-dplyr-can-one-do-something-well-the-other-cant-or-does-poorly) (`dplyr` from **R** and `Pandas` from **Python** [included](https://github.com/Rdatatable/data.table/wiki/Benchmarks-%3A-Grouping)). Some parts of **Xgboost** **R** package use `data.table`.

doc/R-package/xgboostPresentation.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -489,7 +489,7 @@ You can dump the tree you learned using `xgb.dump` into a text file.
489489

490490

491491
```r
492-
xgb.dump(bst, with_stats = T)
492+
xgb.dump(bst, with_stats = TRUE)
493493
```
494494

495495
```

0 commit comments

Comments
 (0)