# Python Data Analysis Auto Starter

In [5]:
# Ask the user to input the address of the data file within Juypiter
data_file = input("Please enter the address of the data file within Juypiter: ")

# Import the necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline



# Create a new Juypiter notebook titled after the data file name (excluding the extension)
import nbformat
from nbformat.v4 import new_notebook, new_code_cell, new_markdown_cell

# Create a new notebook object
nb = new_notebook()

# Add some cells with code and markdown - Header
nb.cells.append(new_markdown_cell(f"# Data Analysis for {data_file.split('/')[-1].split('.')[0]}"))

# Add a cell with code and comment - Import Libraries
cell = new_code_cell("# Import the necessary libraries")
cell.source += "\nimport pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\n%matplotlib inline"
nb.cells.append(cell)

# Add a cell with code and comment - Read CSV
cell = new_code_cell("# Import the data to a pandas dataframe called df")
cell.source += f"\ndf = pd.read_csv('{data_file}')"
nb.cells.append(cell)

# Add a cell with code and comment - Head
cell = new_code_cell("# Run df.head()")
cell.source += "\ndf.head()"
nb.cells.append(cell)

# Add a cell with code and comment - Info
cell = new_code_cell("# Run df.info()")
cell.source += "\ndf.info()"
nb.cells.append(cell)

# Add a cell with code and comment - Describe
cell = new_code_cell("# Run df.describe()")
cell.source += "\ndf.describe()"
nb.cells.append(cell)

# Add a cell with code and comment - Historgrams
cell = new_code_cell("# Run df.hist()")
cell.source += "\ndf.hist(figsize=(10,10))\nplt.show()"
nb.cells.append(cell)

# Add a cell with code and comment - Correlation Heatmap
cell = new_code_cell("# Create a correlation heatmap")
cell.source += "\nimport seaborn as sns\nsns.heatmap(df.corr(), annot=True, cmap='coolwarm')\nplt.show()"
nb.cells.append(cell)

# Add a cell with code and comment - BoxPlots
cell = new_code_cell("# Create a boxplot for each numerical variable")
cell.source += "\ndf.plot(kind='box', subplots=True, layout=(3,3), figsize=(10,10))\nplt.show()"
nb.cells.append(cell)

# Add a cell with code and comment - ScatterPlots
cell = new_code_cell("# Create a scatterplot matrix for each pair of numerical variables")
cell.source += "\npd.plotting.scatter_matrix(df, figsize=(10,10))\nplt.show()"
nb.cells.append(cell)

# Add a cell with code and comment - CountPlots
cell = new_code_cell("# Create a countplot for each categorical variable")
cell.source += "\nfor col in df.columns:\n    if df[col].dtype == object:\n        sns.countplot(x=col, data=df)\n        plt.show()"
nb.cells.append(cell)


# Add a cell with code and comment - Null Value Check
cell = new_code_cell("# Run a check for null values in each column and print the counts")
cell.source += "\ndf.isnull().sum()"
nb.cells.append(cell)

# Add a cell with code and comment - List Unique Values for Non Numerical Columns
cell = new_code_cell("# List unique values for non numerical columns")
cell.source += "\nfor col in df.columns:\n    if df[col].dtype == object:\n        print(col, df[col].unique())"
nb.cells.append(cell)

# Add a cell with code and comment - ADF Test
cell = new_code_cell("# Perform the ADF test for each numerical variable")
cell.source += "\nimport statsmodels.api as sm\nfor col in df.columns:\n    if df[col].dtype != object:\n        result = sm.tsa.stattools.adfuller(df[col])\n        print(f'ADF test for {col}')\n        print('ADF Statistic: %f' % result[0])\n        print('p-value: %f' % result[1])\n        print('Critical Values:')\n        for key, value in result[4].items():\n            print('\t%s: %.3f' % (key, value))\n        print('')"
nb.cells.append(cell)

# Save the notebook with the same name as the data file name (excluding the extension)
with open(data_file.split('/')[-1].split('.')[0] + ".ipynb", "w", encoding="utf-8") as f:
    nbformat.write(nb, f)

print(f"New notebook {data_file.split('/')[-1].split('.')[0] + '.ipynb'} created.")


Please enter the address of the data file within Juypiter: Rainfall_data.csv
New notebook Rainfall_data.ipynb created.
