In [122]:
# import libraries
import pandas as pd
import altair as alt
from ucimlrepo import fetch_ucirepo 

In [123]:
# fetch dataset 
iris = fetch_ucirepo(id=53) 
  
# data (as pandas dataframes) 
X = iris.data.features 
y = iris.data.targets 

# create a dataframe containing features and target variable
iris_df = pd.concat([X,y], axis=1)

In [124]:
# display basic information
print(iris_df.info())
print(iris_df.describe())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal length  150 non-null    float64
 1   sepal width   150 non-null    float64
 2   petal length  150 non-null    float64
 3   petal width   150 non-null    float64
 4   class         150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB
None
       sepal length  sepal width  petal length  petal width
count    150.000000   150.000000    150.000000   150.000000
mean       5.843333     3.054000      3.758667     1.198667
std        0.828066     0.433594      1.764420     0.763161
min        4.300000     2.000000      1.000000     0.100000
25%        5.100000     2.800000      1.600000     0.300000
50%        5.800000     3.000000      4.350000     1.300000
75%        6.400000     3.300000      5.100000     1.800000
max        7.900000     4.400000      6.900000     2.50000

In [None]:
# Melt the dataset to a long format for Altair
iris_melted = iris_df.melt(id_vars=["class"], var_name="Feature", value_name="Value")

# Create the Altair Boxplot plot for each feature, grouped by species
box_plot_features = alt.Chart(
    iris_melted,
    title = alt.Title("Box Plot by Species",
            subtitle="Distribution by Species")
).mark_boxplot().encode(
    x=alt.X("class").type("nominal").title(""),
    y=alt.Y("Value").type("quantitative").title("Measurement"),
    color=alt.Color("class").type("nominal").title("Class").scale(scheme="tableau20"),
    column=alt.Column("Feature").type("nominal").title(" "),
    tooltip=["Feature", "Value", "class"]
).properties(
    width=200,
    height=400,
)

box_plot_features

In [None]:
# Create a scatterplot matrix using the features
alt.Chart(iris_df).mark_circle().encode(
    alt.X(alt.repeat("column")).type('quantitative'),
    alt.Y(alt.repeat("row")).type('quantitative'),
    color= alt.Color('class').type('nominal').scale(scheme="tableau20")
).properties(
    width=200,
    height=200
).repeat(
    row=['sepal length', 'sepal width','petal length', 'petal width'],
    column=['petal length', 'petal width','sepal length', 'sepal width']
).interactive()

In [131]:
# Compute the correlation matrix
corr_matrix = iris_df.drop(columns=["class"]).corr().reset_index().melt(id_vars="index")

# Rename columns for better readability
corr_matrix.columns = ["Feature1", "Feature2", "Correlation"]

# Create a heatmap using Altair
heatmap = alt.Chart(corr_matrix).mark_rect().encode(
    x=alt.X("Feature1:N", title="Features"),
    y=alt.Y("Feature2:N", title="Features"),
    color=alt.Color("Correlation:Q", scale=alt.Scale(scheme="tealblues"), title="Correlation"),
    tooltip=["Feature1", "Feature2", "Correlation"]
).properties(
    width=400,
    height=400,
    title="Feature Correlation Heatmap"
)

In [133]:
heatmap

In [135]:
iris_df.drop(columns=["class"]).corr().reset_index()

Unnamed: 0,index,sepal length,sepal width,petal length,petal width
0,sepal length,1.0,-0.109369,0.871754,0.817954
1,sepal width,-0.109369,1.0,-0.420516,-0.356544
2,petal length,0.871754,-0.420516,1.0,0.962757
3,petal width,0.817954,-0.356544,0.962757,1.0


In [138]:
corr_matrix

Unnamed: 0,Feature1,Feature2,Correlation
0,sepal length,sepal length,1.0
1,sepal width,sepal length,-0.109369
2,petal length,sepal length,0.871754
3,petal width,sepal length,0.817954
4,sepal length,sepal width,-0.109369
5,sepal width,sepal width,1.0
6,petal length,sepal width,-0.420516
7,petal width,sepal width,-0.356544
8,sepal length,petal length,0.871754
9,sepal width,petal length,-0.420516
