In [None]:
# Library Dependencies
library(tidyverse)
library(ggplot2)
library(gridExtra)
library(grid)
library(ggcorrplot)  #Allows to render correlation plot

In [None]:
# Cleaning the data
rice_data <- read.csv("Rice_Cammeo_Osmancik.arff", header=FALSE, comment.char = "@")
rice_data <- rice_data[-c(0, 1, 2, 3, 4), ] |> 
    type.convert(as.is= TRUE)
colnames(rice_data) <- c("area", "perimeter", "major_axis_length", "minor_axis_length", "eccentricity", 
                            "convex_area", "extent", "class")

In [None]:
binary_data <- rice_data |>
     mutate(class = ifelse(class == "Cammeo",1,0))|>
     cor(use="complete.obs") 

ggcorrplot(binary_data, 
           hc.order = TRUE, 
           type = "lower",
           lab = TRUE)

In [None]:
mal_plot <- ggplot(rice_data, aes(x = class, y = major_axis_length)) +
    geom_boxplot(fill = "lightblue") +
    xlab("Class of Rice") +
    ggtitle("Major Axis Length VS Rice Class")

peri_plot <- ggplot(rice_data, aes(x = class, y = perimeter)) +
    geom_boxplot(fill = "lightgreen") +
    xlab("Class of Rice") +
    ggtitle("Perimeter VS Rice Class")

grid.arrange(peri_plot, mal_plot, ncol = 2)

In [114]:
# Calculating the mean and standard deviation for each class
summary_rice <- rice_data |>
    group_by(class) |>
    summarize(mean_perimeter = mean(perimeter), mean_mal = mean(major_axis_length), 
              sd_perimeter = sd(perimeter), sd_mal = sd(major_axis_length))

summary_rice

class,mean_perimeter,mean_mal,sd_perimeter,sd_mal
<chr>,<dbl>,<dbl>,<dbl>,<dbl>
Cammeo,487.4389,205.4786,22.18152,10.333854
Osmancik,429.4155,176.2878,20.15439,9.362405
