# Automated Exploratory Data Analysis 

# This automation is designed to eliminate the repetitive tasks from Exploratory Data Analysis in Python
# Updates:
# Gracefull Error Handling
# 

In [None]:
import nbformat
from nbformat.v4 import new_notebook, new_code_cell, new_markdown_cell
import os

data_file = input("Please enter the address of the data file within Jupyter: ")

def get_read_function(file_ext):
    if file_ext == '.csv':
        return "pd.read_csv"
    elif file_ext in ['.xls', '.xlsx']:
        return "pd.read_excel"
    elif file_ext == '.json':
        return "pd.read_json"
    else:
        raise ValueError("Unsupported file format!")

read_function = get_read_function(os.path.splitext(data_file)[1])

nb = new_notebook()
nb.cells.append(new_markdown_cell(f"# Data Analysis for {data_file.split('/')[-1].split('.')[0]}"))

nb.cells.append(new_code_cell("""# Import the necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import statsmodels.api as sm"""))

nb.cells.append(new_code_cell(f"""# Import the data to a pandas dataframe called df
df = {read_function}('{data_file}')"""))

nb.cells.append(new_code_cell("# Display the first few rows of the dataframe\ndf.head()"))
nb.cells.append(new_code_cell("# Display dataframe info\ndf.info()"))
nb.cells.append(new_code_cell("# Display descriptive statistics\ndf.describe()"))

# Adjusted cell for histogram plots to include only numeric columns
nb.cells.append(new_code_cell("""# Plot histograms for numeric columns only
numeric_df = df.select_dtypes(include=[np.number])
if not numeric_df.empty:
    numeric_df.hist(figsize=(10,10))
    plt.show()
else:
    print("No numeric columns to display.")"""))

# Adjusted cell for correlation heatmap to include only numeric columns
nb.cells.append(new_code_cell("""# Create a correlation heatmap for numeric columns only
numeric_df = df.select_dtypes(include=[np.number])
if not numeric_df.empty:
    sns.heatmap(numeric_df.corr(), annot=True, cmap='coolwarm')
    plt.show()
else:
    print("No numeric columns to display.")"""))

# Adjusted cell for boxplots to include only numeric columns
nb.cells.append(new_code_cell("""# Create boxplots for each numerical variable
numeric_df = df.select_dtypes(include=[np.number])
if not numeric_df.empty:
    numeric_df.plot(kind='box', subplots=True, layout=(3,3), figsize=(10,10))
    plt.show()
else:
    print("No numeric columns to plot.")"""))

# Adjusted cell for scatterplot matrix to include only numeric columns
nb.cells.append(new_code_cell("""# Create scatterplot matrix for numeric columns only
numeric_df = df.select_dtypes(include=[np.number])
if not numeric_df.empty:
    pd.plotting.scatter_matrix(numeric_df, figsize=(10,10))
    plt.show()
else:
    print("No numeric columns to display.")"""))

# Adjusted cell for countplots to include only categorical variables
nb.cells.append(new_code_cell("""# Create countplots for categorical variables
categorical_df = df.select_dtypes(include=['object', 'category'])
for col in categorical_df.columns:
    sns.countplot(x=col, data=categorical_df)
    plt.show()"""))

nb.cells.append(new_code_cell("# Check for null values\ndf.isnull().sum()"))

nb.cells.append(new_code_cell("""# List unique values for non-numerical columns
categorical_df = df.select_dtypes(include=['object', 'category'])
for col in categorical_df.columns:
    print(f"{col}: {categorical_df[col].unique()}")"""))

nb.cells.append(new_code_cell("""# Perform the ADF test for each numerical variable
numeric_df = df.select_dtypes(include=[np.number])
for col in numeric_df.columns:
    result = sm.tsa.stattools.adfuller(numeric_df[col].dropna())
    print(f"ADF test for {col}")
    print(f"ADF Statistic: {result[0]}")
    print(f"p-value: {result[1]}")
    print("Critical Values:")
    for key, value in result[4].items():
        print(f"\t{key}: {value}")
    print("")"""))

notebook_name = data_file.split('/')[-1].split('.')[0] + ".ipynb"
with open(notebook_name, "w", encoding="utf-8") as f:
    nbformat.write(nb, f)

print(f"New notebook {notebook_name} created with full functionality and enhancements.")

