In [1]:
import subprocess

from ucimlrepo import fetch_ucirepo 
import pandas as pd
import numpy as np
from IPython.display import Image
import matplotlib.pyplot as plt

In [2]:

breast_cancer_wisconsin_diagnostic = fetch_ucirepo(id=17) 
  
# # data (as pandas dataframes) 
X = breast_cancer_wisconsin_diagnostic.data.features 
y = breast_cancer_wisconsin_diagnostic.data.targets 
  
# metadata 
print(breast_cancer_wisconsin_diagnostic.metadata) 
  
# variable information 
print(breast_cancer_wisconsin_diagnostic.variables) 


{'uci_id': 17, 'name': 'Breast Cancer Wisconsin (Diagnostic)', 'repository_url': 'https://archive.ics.uci.edu/dataset/17/breast+cancer+wisconsin+diagnostic', 'data_url': 'https://archive.ics.uci.edu/static/public/17/data.csv', 'abstract': 'Diagnostic Wisconsin Breast Cancer Database.', 'area': 'Health and Medicine', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 569, 'num_features': 30, 'feature_types': ['Real'], 'demographics': [], 'target_col': ['Diagnosis'], 'index_col': ['ID'], 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 1993, 'last_updated': 'Fri Nov 03 2023', 'dataset_doi': '10.24432/C5DW2B', 'creators': ['William Wolberg', 'Olvi Mangasarian', 'Nick Street', 'W. Street'], 'intro_paper': {'title': 'Nuclear feature extraction for breast tumor diagnosis', 'authors': 'W. Street, W. Wolberg, O. Mangasarian', 'published_in': 'Electronic imaging', 'year': 1993, 'url': 'https://www.semanticscholar.org/paper/53

In [3]:
df = pd.DataFrame(X, columns=breast_cancer_wisconsin_diagnostic.feature_names)


In [4]:
summary_stats = df.describe().transpose()[['count', 'mean', 'std', 'min', 'max']].round(2)

In [5]:
summary_stats

Unnamed: 0,count,mean,std,min,max
radius1,569.0,14.13,3.52,6.98,28.11
texture1,569.0,19.29,4.3,9.71,39.28
perimeter1,569.0,91.97,24.3,43.79,188.5
area1,569.0,654.89,351.91,143.5,2501.0
smoothness1,569.0,0.1,0.01,0.05,0.16
compactness1,569.0,0.1,0.05,0.02,0.35
concavity1,569.0,0.09,0.08,0.0,0.43
concave_points1,569.0,0.05,0.04,0.0,0.2
symmetry1,569.0,0.18,0.03,0.11,0.3
fractal_dimension1,569.0,0.06,0.01,0.05,0.1


In [9]:
q1_values = df.quantile(0.25)
q3_values = df.quantile(0.75)
iqr_values = q3_values - q1_values

In [10]:
iqr_table = pd.DataFrame({'Interquartile Range (IQR)': iqr_values})
print(iqr_table)

                    Interquartile Range (IQR)
radius1                              4.080000
texture1                             5.630000
perimeter1                          28.930000
area1                              362.400000
smoothness1                          0.018930
compactness1                         0.065480
concavity1                           0.101140
concave_points1                      0.053690
symmetry1                            0.033800
fractal_dimension1                   0.008420
radius2                              0.246500
texture2                             0.640100
perimeter2                           1.751000
area2                               27.340000
smoothness2                          0.002977
compactness2                         0.019370
concavity2                           0.026960
concave_points2                      0.007072
symmetry2                            0.008320
fractal_dimension2                   0.002310
radius3                           