In [1]:
data_file = "../input/household_power_consumption.txt"

# getting the data if not available
if (!file.exists(data_file)) {
    print("Data file not found")
    data_url <- "https://d396qusza40orc.cloudfront.net/exdata%2Fdata%2Fhousehold_power_consumption.zip" 
    zip_file = "../input/data.zip"
    download.file(url = data_url, destfile = zip_file)
    unzip(zip_file, exdir = "../input/")
}

In [2]:
headers <- read.csv(file = data_file, header = TRUE, sep = ";", nrows = 1)
df <- read.csv(file = data_file,
        header = TRUE, sep = ";",
        skip = 66636,   # ToDo: calculate skip
        nrows = 69516 - 66636,  # ToDo: calculate nrows
        )

# cleaning the data
names(df) <- names(headers)

In [3]:
df$Date_Time <- paste(df$Date, df$Time, sep = " ")
df$Date_Time <- strptime(df$Date_Time, format = "%d/%m/%Y %H:%M:%S")

df <- subset(df, select = -c(Date, Time))

In [8]:
print(df)

     Global_active_power Global_reactive_power Voltage Global_intensity
1                  0.326                 0.128  243.15              1.4
2                  0.326                 0.130  243.32              1.4
3                  0.324                 0.132  243.51              1.4
4                  0.324                 0.134  243.90              1.4
5                  0.322                 0.130  243.16              1.4
6                  0.320                 0.126  242.29              1.4
7                  0.320                 0.126  242.46              1.4
8                  0.320                 0.126  242.63              1.4
9                  0.320                 0.128  242.70              1.4
10                 0.236                 0.000  242.89              1.0
11                 0.226                 0.000  243.00              1.0
12                 0.224                 0.000  242.50              1.0
13                 0.224                 0.000  242.18          

In [None]:
png(filename = "figures/plot1.png")

hist(df$Global_active_power,
    # breaks = 10,
    col = "red",
    main = "Global Active Power",
    xlab = "Global Active Power (killowatts)",
    ylab = "Frequency",
)

dev.off()

In [None]:
source("get_df.R")

append_to_current_plot <- FALSE
if (dev.cur() != 3) {
    append_to_current_plot <- TRUE
}

if (!append_to_current_plot) {
    png(filename = "figures/plot2.png")
}

plot(
    x = df$Date_Time,
    y = df$Global_active_power,
    type = "l",
    xlab = "",
    ylab = "Global Active Power (killowatts)"
)

if (!append_to_current_plot) {
    dev.off()
}

In [None]:
append_to_current_plot <- FALSE
if (dev.cur() != 3) {
    append_to_current_plot <- TRUE
}

if (!append_to_current_plot) {
    png(filename = "figures/plot3.png")
}

x <- df$Date_Time
y1 <- df$Sub_metering_1
y2 <- df$Sub_metering_2
y3 <- df$Sub_metering_3

plot(
    x = df$Date_Time,
    y = df$Sub_metering_1,
    xlab = "",
    ylab = "Energy sub metering",
    type = "n",
)

lines(
    x = df$Date_Time,
    y = df$Sub_metering_1,
    type = "l",
    col = "black"
)

lines(
    x = df$Date_Time,
    y = df$Sub_metering_2,
    type = "l",
    col = "red"
)

lines(
    x = df$Date_Time,
    y = df$Sub_metering_3,
    type = "l",
    col = "blue"
)

legend("topright",
    legend = c("Sub_metering_1", "Sub_metering_2", "Sub_metering_3"),
    col = c("black", "red", "blue"),
    lty = c(1, 1, 1),
    # lwd = c(1, 1, 1),
    # bty = "n",
    # cex = 0.8,
    # pch = c(24, 24, 24)
)
if (!append_to_current_plot) {
    dev.off()
}

In [None]:
png("figures/plot4.png")

par(mfrow = c(2, 2))

plot(
    x = df$Date_Time,
    y = df$Voltage,
    col = "black",
    type = "l",
    xlab = "datetime",
    ylab = "Voltage",
)

plot(
    x = df$Date_Time,
    y = df$Global_reactive_power,
    col = "black",
    type = "l",
    xlab = "datetime",
    ylab = "Global_reactive_power",
)

dev.off()