<a href="https://colab.research.google.com/github/woncoh1/sss2csv/blob/main/sss2csv_nested.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# [User Guide](https://github.com/woncoh1/sas2csv)
1. [Define Constants](#define)
2. Runtime → Run all (Ctrl+F9)
3. Enter [Authentication Code](#auth)
4. [Convert Files](#convert)

<a name="define"></a>
# Define Constants

In [None]:
#@title Enter Main Folder ID {run: 'auto'}
FOLDER_ID <- '' #@param {type: 'string'}

In [None]:
#@title Select File Extension {run: 'auto'}
EXTENSION <- 'sas7bdat' #@param ['sas7bdat', 'sav', 'dta']

In [None]:
#@title Select Character Encoding {run: 'auto'}
ENCODING <- 'cp949' #@param ['utf-8', 'latin1', 'cp949', 'euc-kr']

In [None]:
#@title Delete all Input Files? {run: 'auto'}
#@markdown **WARNING**:
#@markdown - Selecting "Yes" will move all raw files to trash
#@markdown - You need file-level owner permission to move files to trash
DELETE_ALL <- 'No' #@param ['Yes', 'No']
DELETES <- c(
    No=FALSE,
    Yes=TRUE
)
DELETE_RAW <- DELETES[[DELETE_ALL]]

# Import Libraries

In [None]:
library(tidyverse)
library(haven)
library(glue)
library(googledrive)

# Authenticate Drive
- In order to access files in Google Drive, we need authentication, which requires an interactive R session
- Unfortunately, R session in Colab is non-interactive by default, so we need to monkey-patch, as described [here](https://towardsdatascience.com/how-to-use-r-in-google-colab-b6e02d736497#2176):

In [None]:
COLAB_KERNEL = '/usr/local/lib/python3.7/dist-packages/google/colab/_ipython.py'
is_colab = file.exists(COLAB_KERNEL)
if (is_colab) {
    install.packages('R.utils')
    library('R.utils')
    library('httr')
    reassignInPackage('is_interactive', pkgName='httr', function() TRUE)
    options(rlang_interactive=TRUE)
}

<a name="auth"></a>
## Authentication Code

In [None]:
drive_auth(use_oob=TRUE, cache=TRUE)

# List Folders

In [None]:
with_drive_quiet(
    folders <- FOLDER_ID %>%
        as_id() %>%
        as_dribble() %>%
        drive_ls(type='folder', trashed=FALSE) %>%
        arrange(name)
)

In [None]:
folders %>% select(id, name)

# List Files

In [None]:
get_query <- function(extension) {
    glue("fileExtension contains '{extension}'")
}

In [None]:
get_files <- function(id, extension) {
    id %>%
    as_id() %>%
    as_dribble() %>%
    drive_ls(q=get_query(extension), trashed=FALSE) %>%
    arrange(name)
}

In [None]:
with_drive_quiet(
    files <- folders$id %>%
        map(get_files, EXTENSION) %>%
        bind_rows()
)

In [None]:
files %>% select(id, name)

<a name="convert"></a>
# Convert Files

In [None]:
reads <- c(
    sas7bdat=read_sas,
    sav=read_sav,
    dta=read_dta
)

In [None]:
for (file in seq_along(files$id)) {
    after_dot <- '\\..*'
    raw <- files[file,]$id
    inp <- files[file,]$name
    out <- inp %>% str_replace(after_dot, '.csv')
    parent <- files[file,]$drive_resource[[1]]$parents[[1]] %>% as_id()
    raw %>% drive_download(overwrite=TRUE)
    inp %>% reads[[EXTENSION]](encoding=ENCODING) %>% write_excel_csv(out)
    out %>% drive_upload(path=parent, overwrite=TRUE)
    if (DELETE_RAW) raw %>% drive_trash()
    paste('rm', inp) %>% system(intern=TRUE)
    paste('rm', out) %>% system(intern=TRUE)
    cat(paste(file, '/', nrow(files), 'files\n'))
    cat(paste(round(file/nrow(files)*100, digits=2), '% completed\n\n'))
}

# References
1. https://developers.google.com/drive/api/v2/reference/files/trash
2. https://developers.google.com/drive/api/guides/ref-roles