In [1]:
import pandas as pd

# Load dataset
df = pd.read_csv("IRIS.csv")   # or "IRIS.csv" based on your file name

# Display first few rows (optional)
print(df.head())

   sepal_length  sepal_width  petal_length  petal_width      species
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa


In [2]:
# List of species
species_list = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']

In [4]:
for sp in species_list:
    print("\n==============================")
    print(f"Statistical Summary for {sp}")
    print("==============================")
    
    # Filter species data
    sp_df = df[df['species'] == sp]
    numeric = sp_df.select_dtypes(include='number')   # only numeric columns
    
    if numeric.empty:
        print("No numeric columns found for this species.")
        continue

    # Describe (numeric only)
    print("\n--- describe() (numeric columns) ---")
    print(numeric.describe().T)   # transpose for nicer reading

    # Percentiles (25, 50, 75) â€” numeric only
    print("\n--- Percentiles (25, 50, 75) ---")
    print(numeric.quantile([0.25, 0.50, 0.75]).T)

    # Compact table of other stats using agg
    stats = numeric.agg(['mean', 'std', 'var', 'min', 'max', 'count']).T
    stats = stats.rename(columns={'std': 'std_dev', 'var': 'variance'})
    print("\n--- Mean / Std / Variance / Min / Max / Count ---")
    print(stats)


Statistical Summary for Iris-setosa

--- describe() (numeric columns) ---
              count   mean       std  min    25%  50%    75%  max
sepal_length   50.0  5.006  0.352490  4.3  4.800  5.0  5.200  5.8
sepal_width    50.0  3.418  0.381024  2.3  3.125  3.4  3.675  4.4
petal_length   50.0  1.464  0.173511  1.0  1.400  1.5  1.575  1.9
petal_width    50.0  0.244  0.107210  0.1  0.200  0.2  0.300  0.6

--- Percentiles (25, 50, 75) ---
               0.25  0.50   0.75
sepal_length  4.800   5.0  5.200
sepal_width   3.125   3.4  3.675
petal_length  1.400   1.5  1.575
petal_width   0.200   0.2  0.300

--- Mean / Std / Variance / Min / Max / Count ---
               mean   std_dev  variance  min  max  count
sepal_length  5.006  0.352490  0.124249  4.3  5.8   50.0
sepal_width   3.418  0.381024  0.145180  2.3  4.4   50.0
petal_length  1.464  0.173511  0.030106  1.0  1.9   50.0
petal_width   0.244  0.107210  0.011494  0.1  0.6   50.0

Statistical Summary for Iris-versicolor

--- describe() (nu