-
Notifications
You must be signed in to change notification settings - Fork 0
/
kobe.R
155 lines (145 loc) · 4.93 KB
/
kobe.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# Library
library(tidyverse) # Data wrangle
library(lubridate) # Date
# Working directory
working_dir <- "C:/Users/Public/medium_kobe/"
file_name <- "kobe.csv"
file_dir <- paste0(working_dir, file_name)
data <- read.csv(file_dir, stringsAsFactors = F)
# Pre-setting
data <- mutate(data, shot_point = as.numeric(substring(shot_type, 1, 1)),
shot_gain = shot_point * shot_made_flag,
total_seconds_remaining = minutes_remaining * 60 + seconds_remaining)
data[, "game_date"] <- as.Date(data[, "game_date"] ,"%Y/%m/%d")
# Na-checking
apply(data, 2, FUN = function(x) sum(is.na(x)))
#(1)
data_1 <-
data %>%
select(opponent, shot_type, shot_made_flag) %>%
filter(opponent == "HOU") %>%
group_by(shot_type) %>%
summarise(FGp = sum(shot_made_flag)/length(shot_made_flag),
shoot_number = n())
#(2)
data_2 <-
data %>%
select(game_id, opponent, shot_gain) %>%
group_by(game_id, opponent) %>%
summarise(game_score = sum(shot_gain)) %>%
group_by(opponent) %>%
summarise(mean_score = mean(game_score)) %>%
arrange(mean_score)
#(3)
data_3 <-
data %>%
select(game_id, opponent, playoffs, period, total_seconds_remaining, shot_gain) %>%
filter(playoffs == 1, period == 4, total_seconds_remaining <= 3*60) %>%
group_by(game_id, opponent) %>%
summarise(total_scores = sum(shot_gain)) %>%
arrange(desc(total_scores))
#(4)
data_4 <-
data %>%
select(season, opponent, playoffs, period, total_seconds_remaining, action_type, shot_made_flag) %>%
filter(playoffs == 1, period == 4, total_seconds_remaining <= 1*60, action_type == "Jump Shot") %>%
group_by(season) %>%
summarise(FGp = sum(shot_made_flag)/ length(shot_made_flag)) %>%
print.data.frame()
#(5)
continuous_detect <- function (x) {
#check
if (class(x)[1] == "tbl_df"){
x <- pull(x)
}
ini <- 1; con_df <- c(); end <- length(x)
while (ini != end) {
if ( x[ini] == 0) {
ini <- ini + 1
}else{
start <- ini
while (x[ini] == 1 ) {
ini <- ini + 1
}
df <- data.frame(start_index = start, end_index = ini - 1, interval = ini - start )
con_df <- rbind(con_df, df)
}
}
con_df
}
data_5_intme <-
data %>%
select(game_id, game_date, shot_made_flag) %>%
group_by(game_id, game_date) %>%
summarise(FGp = sum(shot_made_flag)/ n()) %>%
arrange(game_date) %>%
mutate(meet_goal = if_else(FGp < 0.33, 0, 1) )
dt <- continuous_detect(data_5_intme[,4])
date <- dt %>% arrange(desc(interval)) %>% head(3)
calender <- lapply(1:nrow(date), function(i) as.character(c(data_5_intme$game_date[date[i,1]], data_5_intme$game_date[date[i,2]])))
data_5 <- as.data.frame(cbind(do.call(rbind,calender), date$interval))
colnames(data_5) <- c("starting_date", "ending_date", "interval")
data_5
#(6)
FsurpassS_id <-
data %>%
select(game_date, period, shot_gain) %>%
mutate(half = ifelse(period <= 2, "first", "second")) %>%
group_by(game_date, half) %>%
summarise(half_scores = sum(shot_gain)) %>%
group_by(game_date) %>%
summarise(gains_diff = half_scores[1] - half_scores[2]) %>%
filter(gains_diff > 0)
data_6 <-
data %>%
select(game_date, period, opponent, shot_gain, shot_made_flag) %>%
filter(game_date %in% FsurpassS_id$game_date, period %in% 1:4) %>%
group_by(game_date, opponent) %>%
summarise(FGp = sum(shot_made_flag)/n(),
points = sum(shot_gain)) %>%
inner_join(FsurpassS_id) %>%
arrange(FGp)
#(7)
continuous_time <- 3
continuous_max_intvl <- function (x) {
ini <- 1; end <- length(x); interval <- 0;interval_list <- c()
if ( end < 2) {
if ( x[1] == 0){
interval <- interval + 1
}
interval
}else{
interval_list <- c(interval_list, interval)
while (ini != end + 1 ) {
if ( x[ini] == 1) {
ini <- ini + 1
}else{
interval <- 0
while ((x[ini] == 0) & (ini != end + 1)){
interval <- interval + 1
ini <- ini + 1
}
interval_list <- c(interval_list, interval)
}
}
max(interval_list)
}
}
data_7_ini <-
data %>%
select(game_date, period, minutes_remaining, shot_made_flag) %>%
arrange(game_date, period) %>%
group_split(game_date)
total_intvl <-unlist(lapply(1:length(data_7_ini), function(i) continuous_max_intvl(data_7_ini[[i]]$shot_made_flag) ))
total_date <- unlist(lapply(1:length(data_7_ini), function(i) as.character(data_7_ini[[i]]$game_date[1])))
total_date <- as.Date(total_date ,"%Y-%m-%d")
date_intvl <- data.frame(game_date = total_date, interval = total_intvl)
index <- date_intvl %>% arrange(desc(interval)) %>% head(continuous_time)
data_7 <-
data %>%
select(game_date, opponent, shot_gain) %>%
filter(game_date %in% index$game_date) %>%
group_by(game_date, opponent) %>%
summarise(points = sum(shot_gain)) %>%
inner_join(index) %>%
arrange(desc(interval))