In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly as pltly
import plotly.express as px

# https://www.archive.ics.uci.edu/dataset/16/breast+cancer+wisconsin+prognostic

In [3]:
# Importing data from the source
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
breast_cancer_wisconsin_prognostic = fetch_ucirepo(id=16) 
  
# data (as pandas dataframes) 
X = breast_cancer_wisconsin_prognostic.data.features 
y = breast_cancer_wisconsin_prognostic.data.targets 

In [4]:
# joining X and y data frames to one dataframe
df = pd.concat([X, y], axis=1)
df["volume"] = (4/3) * 3.14 * df["radius1"] * df["radius2"] * df["radius3"]
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 198 entries, 0 to 197
Data columns (total 35 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Time                198 non-null    int64  
 1   radius1             198 non-null    float64
 2   texture1            198 non-null    float64
 3   perimeter1          198 non-null    float64
 4   area1               198 non-null    float64
 5   smoothness1         198 non-null    float64
 6   compactness1        198 non-null    float64
 7   concavity1          198 non-null    float64
 8   concave_points1     198 non-null    float64
 9   symmetry1           198 non-null    float64
 10  fractal_dimension1  198 non-null    float64
 11  radius2             198 non-null    float64
 12  texture2            198 non-null    float64
 13  perimeter2          198 non-null    float64
 14  area2               198 non-null    float64
 15  smoothness2         198 non-null    float64
 16  compactn

In [5]:
fig0 = px.box(df, 
             y="Outcome", 
             x='Time', 
             log_x=False, 
             points='all', 
             notched=True,
             color='Outcome',
             labels={'Outcome':'N as no-recur and R as recur', 'volume': 'Tumor size or volume'}, 
             title = 'Breast cancer prognostic - no-recurrence, recurrences VS Time', 
             hover_name='Outcome')

fig0.show()

In [6]:
fig1 = px.box(df, 
             y="Outcome", 
             x='volume', 
             log_x=True, 
             points='all', 
             notched=True,
             color='Outcome',
             labels={'Outcome':'N as no-recur and R as recur', 'volume': 'Tumor size or volume'}, 
             title = 'Breast cancer prognostic - no-recurrence, recurrences and lump size', 
             hover_name='Outcome')

fig1.show()

In [7]:
fig2 = px.box(df, 
             y="Outcome", 
             x='tumor_size', 
             log_x=True, 
             points='all', 
             notched=True,
             color='Outcome',
             labels={'Outcome':'N as no-recur and R as recur', 'volume': 'Tumor size or volume'}, 
             title = 'Breast cancer prognostic - no-recurrence, recurrences and lump size', 
             hover_name='Outcome')

fig2.show()

In [8]:
fig3 = px.box(df, 
             y="Outcome", 
             x='lymph_node_status', 
             #log_x=True, 
             points='all', 
             notched=True,
             color='Outcome',
             labels={'Outcome':'N as no-recur and R as recur', 'volume': 'Tumor size or volume'}, 
             title = 'Breast cancer prognostic - no-recurrence, recurrences and lump size', 
             hover_name='Outcome')

fig3.show()

In [9]:
fig4 = px.violin(df, 
                 y="volume", 
                 points='all', 
                 box=True, 
                 color='Outcome',
                 labels={'Outcome':'N as no-recur and R as recur', 'volume': 'Tumor size'}, 
                 title = 'Breast cancer prognostic - no-recurrence, recurrences and lump size', 
                 hover_name='Outcome')
fig4.show()

In [10]:
fig5 = px.violin(df, 
                 y="Time", 
                 points='all', 
                 box=True, 
                 color='Outcome',
                 labels={'Outcome':'N as no-recur and R as recur', 'volume': 'Time'}, 
                 title = 'Breast cancer prognostic - no-recurrence, recurrences and Time', 
                 hover_name='Outcome')
fig5.show()

In [11]:

fig6 = px.ecdf(df, x="volume", color="Outcome", log_x=True,
               labels={'Outcome':'N as no-recur and R as recur', 'volume': 'Tumor size'}, 
                title = 'Breast cancer prognostic - no-recurrence, recurrences and lump size', 
                hover_name='Outcome',
                markers = False,
                lines = True,
                marginal="rug")
fig6.show()

In [12]:
fig7 = px.histogram(df, x="volume", color="Outcome", log_x=False,
               labels={'Outcome':'N as no-recur and R as recur', 'volume': 'Tumor size'}, 
                title = 'Breast cancer prognostic - no-recurrence, recurrences and lump size', 
                hover_name='Outcome',                        
                marginal="box"
                
                )
fig7.update_layout(bargap=0.1)
fig7.update_layout(
    title='Cancer size VS Label/Outcome',
    xaxis_title='Tumor size',
    yaxis_title='Count',
)
fig7.show()

In [13]:
fig8 = px.scatter(df, x="Time", y="tumor_size", color="Outcome", size="tumor_size")
fig8.update_layout(
    title='Cancer Time VS Tumor size - Label/Outcome',
    xaxis_title='Time',
    yaxis_title='Tumor size',
)
fig8.show()

In [14]:
fig9 = px.scatter(df, x="lymph_node_status", y="tumor_size", color="Outcome", size="Time")
fig9.update_layout(
        title='Size is Time - Lymph Node status VS Tumor size',
        xaxis_title='Lymph Node status',
        yaxis_title='Tumor size')
fig9.show()

In [15]:
for looper in range(1, 4, 1):
    plot = "radius"+str(looper)
    print(plot)
    fig10 = px.scatter(df, x="lymph_node_status", y=plot, color="Outcome", size="Time")
    fig10.update_layout(
        title='Size is Time - Lymph Node VS Radius'+str(looper),
        xaxis_title='Lymph Node Status',
        yaxis_title='Radius'+str(looper),
)
    fig10.show()

radius1


radius2


radius3


In [16]:
import plotly.graph_objects as go
import pandas as pd


# Create a Plotly figure
fig = go.Figure()

# Define colors for different classes
colors = {'R': 'red', 'N': 'blue'}  # Map outcome classes to colors

# Add traces for each class
for outcome, color in colors.items():
    df_subset = df[df['Outcome'] == outcome]  # Subset data by outcome class
    fig.add_trace(go.Scatter(x=df_subset['Time'], y=df_subset['radius1'],
                             mode='markers',
                             line=dict(color=color),  # Specify line color
                             marker=dict(color=color),  # Specify marker color
                             name=f'Outcome: {outcome}'))

# Update layout
fig.update_layout(title='Feature Evolution Over Time',
                  xaxis_title='Time',
                  yaxis_title='Feature Value')

# Add legend
fig.update_layout(legend=dict(title='Outcomes'))

# Show the plot
fig.show()


In [17]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create a figure with subplots
fig11 = make_subplots(rows=3, cols=1)

# Add traces to the subplots
fig11.add_trace(go.Scatter(x=df['Time'], y=df['radius1'], mode='markers'), row=1, col=1)
fig11.add_trace(go.Scatter(x=df['Time'], y=df['radius2'], mode='markers'), row=2, col=1)
fig11.add_trace(go.Scatter(x=df['Time'], y=df['radius3'], mode='markers'), row=3, col=1)

# Update layout
fig11.update_layout(title_text="Subplots Example", showlegend=False)

fig11.update_layout(
    title='Time VS Radius',
    xaxis_title='Time',
    yaxis_title='Radius',
)

# Show the plot
fig11.show()


In [18]:
import pygwalker as pyg
walker = pyg.walk(df)

Box(children=(HTML(value='<div id="ifr-pyg-000616348ac902ecbAtsUvyoZMLThjfC" style="height: auto">\n    <head>…