In [1]:
library(lubridate)
library(tidyverse)
library(dplyr)
library(data.table)
library(glmnet)
library(R330)

In [2]:
ass2 <- as.data.frame.matrix(fread("../input/mahw2-attempt1/ass2_full.csv"))
attempt1 <- as.data.frame.matrix(fread("../input/mahw2-attempt1/attempt1.csv"))
ass2[1:5,]

In [3]:
meta <- left_join(ass2, attempt1, on=c("contact_id"))
meta$donation <- as.double(meta$donation)
meta$amount <- as.double(meta$amount)
meta$loyal[is.na(meta$loyal)] <- 0
meta$amount[is.na(meta$amount)] <- 0
#meta[is.na(meta)] <- -1
meta[1:10,]

In [8]:
train = meta[meta$calibration ==1,]
test = meta[meta$calibration !=1,]
nrow(train)
nrow(test)

In [15]:
for (i in 1:ncol(meta)) {
    if (unique(is.na(meta[,i])) == TRUE) {
        print(i)
    }
    
}

In [19]:
train <- drop_na(subset(train, select= -c(contact_id, calibration, donation, act_date)))
test <- drop_na(subset(test, select=-c(contact_id, calibration, donation, act_date)))
train[1:5,]

# Solicit

In [186]:
unique(is.na(train$amount))

In [20]:
xtrain_solicit = cbind(train$recency, train$frequency, train$recency * train$frequency, log(train$recency), log(train$frequency))
ytrain_solicit = train$loyal
xtest_solicit = cbind(test$recency, test$frequency, test$recency * test$frequency, log(test$recency), log(test$frequency))
ytest_solicit = test$loyal

In [21]:
fits_solicit <- list()
for (i in 0:10) { 
  name <- paste0("alpha", i/10)
  
  fits_solicit[[name]] <- cv.glmnet(xtrain_solicit, ytrain_solicit, alpha=i/10, type.measure="mse", trace.it=1, nfolds=10, family="binomial")
}

In [22]:
results_solicit <- data.frame()
for (i in 0:10) { #10
  name <- paste0("alpha", i/10)
  
  predicted <- 
    predict(fits_solicit[[name]], 
      s=fits_solicit[[name]]$lambda.1se, newx=xtest_solicit)
  
  mse <- mean((ytest_solicit - predicted)^2)
  
  temp <- data.frame(alpha=i/10, mse=mse, fit.name=name)
  results_solicit <- rbind(results_solicit, temp)
}

results_solicit

In [23]:
vals_solicit <- predict(fits_solicit[["alpha0.7"]],s=fits_solicit[["alpha0.7"]]$lambda.1se, xtest_solicit, type = "response")
vals_solicit

# Amount

In [24]:
xtrain_amount = cbind(train$recency, train$frequency, train$recency * train$frequency, log(train$recency), log(train$frequency))
ytrain_amount = train$avgamount
xtest_amount = cbind(test$recency, test$frequency, test$recency * test$frequency, log(test$recency), log(test$frequency))
ytest_amount = test$avgamount

In [25]:
model = lm(formula = amount ~ recency + log(recency) + frequency + log(frequency) + recency*frequency + avgamount + log(avgamount) + firstdonation + log(firstdonation), data = train)
print(summary(model))

In [26]:
avg <- predict(model, newx=xtest_amount)
avg

In [144]:
fits_amount <- list()
for (i in 0:10) { 
  name <- paste0("alpha", i/10)
  
  
  fits_amount[[name]] <- cross.val(model, nfolds=10)
}

In [137]:
results_amount <- data.frame()
for (i in 0:10) { #10
  name <- paste0("alpha", i/10)
  
  predicted <- 
    predict(fits_amount[[name]], 
      s=fits_amount[[name]]$lambda.1se, newx=xtest_amount)
  
  mse <- mean((ytest_amount - predicted)^2)
  
  temp <- data.frame(alpha=i/10, mse=mse, fit.name=name)
  results_amount <- rbind(results_amount, temp)
}

results_amount

In [139]:
vals_avg = predict(fits_amount[["alpha0.1"]],s=fits_amount[["alpha0.1"]]$lambda.1se, xtest_amount, type = "response")
vals_avg

# Making a DF

In [27]:
df <- data.frame(matrix(ncol = 3, nrow = 61744))
colnames(df) <- c('contact_id', 'solicit', 'avgamount')

In [30]:
df$contact_id <- meta[meta$calibration !=1, "contact_id"]
df[1:43731, "solicit"] <- vals_solicit
df[1:43976, "avgamount"] <- avg
df[is.na(df) ] <- 0
df$threshold <- df$solicit * df$avgamount
df$sumbit <- as.integer(df$threshold>2)
df

In [32]:
final_df = df[,c(1,5)]
final_df[1:10,]

In [205]:
write.table(final_df,"submission.txt",sep="\t", row.names =FALSE, col.names = FALSE) 

In [None]:
N <- round(nrow(meta)*0.7, digits = 0)
xtrain <- data.matrix(subset(meta[1:N,], select = -c(loyal)))
ytrain <- data.matrix(subset(meta[1:N,], select = c(loyal)))
xtest  <- data.matrix(subset(meta[(N+1):nrow(meta),], select = -c(loyal)))
ytest  <- data.matrix(subset(meta[(N+1):nrow(meta),], select = c(loyal)))