In [7]:
options(warn = 0)
library(dplyr)

In [2]:
input_file = "../input/UCI HAR Dataset"

# Checking if archieve already exists.
if (!file.exists(input_file)) {
  fileURL <- "https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip"
  zip_file = "../input/Coursera_DS3_Final.zip"
  download.file(fileURL, destfile = zip_file, method = "curl")
  unzip(zip_file, exdir = "../input/")
}

In [10]:
features <- read.table(paste(input_file, "features.txt", sep = "/"), col.names = c("n", "functions"))
activities <- read.table(paste(input_file, "activity_labels.txt", sep="/"), col.names = c("code", "activity"))

x_test <- read.table(paste(input_file, "test", "X_test.txt", sep="/"), col.names = features$functions)
x_train <- read.table(paste(input_file, "train", "X_train.txt", sep="/"), col.names = features$functions)

y_test <- read.table(paste(input_file, 'test', "y_test.txt", sep="/"), col.names = "code")
y_train <- read.table(paste(input_file, 'train', "y_train.txt", sep="/"), col.names = "code")

subject_test <- read.table(paste(input_file,'test', 'subject_test.txt', sep='/'), col.names = "subject")
subject_train <- read.table(paste(input_file,'train', 'subject_train.txt', sep='/'), col.names = "subject")

In [11]:
X <- rbind(x_train, x_test)
Y <- rbind(y_train, y_test)
Subject <- rbind(subject_train, subject_test)
Merged_Data <- cbind(Subject, Y, X)

TidyData <- Merged_Data %>% select(subject, code, contains("mean"), contains("std"))
TidyData$code <- activities[TidyData$code, 2]

names(TidyData)[2] <- "activity"
names(TidyData) <- gsub("Acc", "Accelerometer", names(TidyData))
names(TidyData) <- gsub("Gyro", "Gyroscope", names(TidyData))
names(TidyData) <- gsub("BodyBody", "Body", names(TidyData))
names(TidyData) <- gsub("Mag", "Magnitude", names(TidyData))
names(TidyData) <- gsub("^t", "Time", names(TidyData))
names(TidyData) <- gsub("^f", "Frequency", names(TidyData))
names(TidyData) <- gsub("tBody", "TimeBody", names(TidyData))
names(TidyData) <- gsub("-mean()", "Mean", names(TidyData), ignore.case = TRUE)
names(TidyData) <- gsub("-std()", "STD", names(TidyData), ignore.case = TRUE)
names(TidyData) <- gsub("-freq()", "Frequency", names(TidyData), ignore.case = TRUE)
names(TidyData) <- gsub("angle", "Angle", names(TidyData))
names(TidyData) <- gsub("gravity", "Gravity", names(TidyData))


In [12]:
FinalData <- TidyData %>%
  group_by(subject, activity) %>%
  summarise_all(funs(mean))


"`funs()` was deprecated in dplyr 0.8.0.
Please use a list of either functions or lambdas: 

  # Simple named list: 
  list(mean = mean, median = median)

  # Auto named with `tibble::lst()`: 
  tibble::lst(mean, median)

  # Using lambdas
  list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))


In [15]:
write.table(FinalData, "../output/FinalData.txt", row.name = FALSE)