In [157]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly as pltly
import plotly.express as px

# https://www.archive.ics.uci.edu/dataset/16/breast+cancer+wisconsin+prognostic

In [158]:
# Importing data from the source
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
breast_cancer_wisconsin_prognostic = fetch_ucirepo(id=16) 
  
# data (as pandas dataframes) 
X = breast_cancer_wisconsin_prognostic.data.features 
y = breast_cancer_wisconsin_prognostic.data.targets 

In [159]:
# joining X and y data frames to one dataframe
df = pd.concat([X, y], axis=1)
df["volume"] = (4/3) * 3.14 * df["radius1"] * df["radius2"] * df["radius3"]
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 198 entries, 0 to 197
Data columns (total 35 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Time                198 non-null    int64  
 1   radius1             198 non-null    float64
 2   texture1            198 non-null    float64
 3   perimeter1          198 non-null    float64
 4   area1               198 non-null    float64
 5   smoothness1         198 non-null    float64
 6   compactness1        198 non-null    float64
 7   concavity1          198 non-null    float64
 8   concave_points1     198 non-null    float64
 9   symmetry1           198 non-null    float64
 10  fractal_dimension1  198 non-null    float64
 11  radius2             198 non-null    float64
 12  texture2            198 non-null    float64
 13  perimeter2          198 non-null    float64
 14  area2               198 non-null    float64
 15  smoothness2         198 non-null    float64
 16  compactn

In [160]:
fig1 = px.box(df, 
             y="Outcome", 
             x='volume', 
             log_x=True, 
             points='all', 
             notched=True,
             color='Outcome',
             labels={'Outcome':'N as no-recur and R as recur', 'volume': 'Tumor size or volume'}, 
             title = 'Breast cancer prognostic - no-recurrence, recurrences and lump size', 
             hover_name='Outcome')

fig1.show()

In [161]:
fig2 = px.violin(df, 
                 y="volume", 
                 points='all', 
                 box=True, 
                 color='Outcome',
                 labels={'Outcome':'N as no-recur and R as recur', 'volume': 'Tumor size'}, 
                 title = 'Breast cancer prognostic - no-recurrence, recurrences and lump size', 
                 hover_name='Outcome')
fig2.show()

In [162]:

fig3 = px.ecdf(df, x="volume", color="Outcome", log_x=True,
               labels={'Outcome':'N as no-recur and R as recur', 'volume': 'Tumor size'}, 
                title = 'Breast cancer prognostic - no-recurrence, recurrences and lump size', 
                hover_name='Outcome',
                markers = False,
                lines = True,
                marginal="rug")
fig3.show()

In [183]:
fig4 = px.histogram(df, x="volume", color="Outcome", log_x=False,
               labels={'Outcome':'N as no-recur and R as recur', 'volume': 'Tumor size'}, 
                title = 'Breast cancer prognostic - no-recurrence, recurrences and lump size', 
                hover_name='Outcome',                        
                marginal="box",
                
                )
fig4.update_layout(bargap=0.1)
fig4.show()