In [1]:
#  Import necessary libraries

from sklearn.datasets import load_breast_cancer  # Load the dataset
from sklearn.model_selection import train_test_split  # For splitting the data
import pandas as pd  # For creating and manipulating dataframes
import numpy as np  # For numerical operations

# Set a seed for reproducibility

SEED = 42  # Ensures that the splits are the same every time you run the code

# Load the breast cancer dataset

cancer_data = load_breast_cancer()

# Create a DataFrame with feature names

df = pd.DataFrame(cancer_data.data, columns=cancer_data.feature_names)

# Add the target variable to the DataFrame

df['target'] = cancer_data.target

# Split the data into training (60%), validation (20%), and test (20%) sets

train_df, temp_df = train_test_split(df, test_size=20, random_state=SEED)
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=SEED)

# Display the sizes of the resulting dataframes

print("Training set size:", train_df.shape)
print("Validation set size:", val_df.shape)
print("Test set size:", test_df.shape)

Training set size: (549, 31)
Validation set size: (10, 31)
Test set size: (10, 31)


In [5]:
test_df

NameError: name 'test_df' is not defined

In [6]:
train_df['concave points error']

73     0.009206
394    0.007513
393    0.012880
425    0.003527
305    0.006998
         ...   
71     0.017660
106    0.013980
270    0.003608
435    0.009567
102    0.006797
Name: concave points error, Length: 549, dtype: float64

In [15]:
train_df[['mean perimeter', 'target']]

Unnamed: 0,mean perimeter,target
73,90.43,0
394,78.07,1
393,144.40,0
425,63.19,1
305,74.23,1
...,...,...
71,58.79,1
106,75.17,1
270,90.30,1
435,91.12,0


In [2]:
train_df[(train_df['worst texture'] < 10) , (train_df['target'] == 1)]

NameError: name 'train_df' is not defined

In [11]:
train_df.sample(10)

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
347,14.76,14.74,94.87,668.7,0.08875,0.0778,0.04608,0.03528,0.1521,0.05912,...,17.93,114.2,880.8,0.122,0.2009,0.2151,0.1251,0.3109,0.08187,1
441,17.27,25.42,112.4,928.8,0.08331,0.1109,0.1204,0.05736,0.1467,0.05407,...,35.46,132.8,1284.0,0.1436,0.4122,0.5036,0.1739,0.25,0.07944,0
33,19.27,26.47,127.9,1162.0,0.09401,0.1719,0.1657,0.07593,0.1853,0.06261,...,30.9,161.4,1813.0,0.1509,0.659,0.6091,0.1785,0.3672,0.1123,0
301,12.46,19.89,80.43,471.3,0.08451,0.1014,0.0683,0.03099,0.1781,0.06249,...,23.07,88.13,551.3,0.105,0.2158,0.1904,0.07625,0.2685,0.07764,1
244,19.4,23.5,129.1,1155.0,0.1027,0.1558,0.2049,0.08886,0.1978,0.06,...,30.53,144.9,1417.0,0.1463,0.2968,0.3458,0.1564,0.292,0.07614,0
236,23.21,26.97,153.5,1670.0,0.09509,0.1682,0.195,0.1237,0.1909,0.06309,...,34.51,206.0,2944.0,0.1481,0.4126,0.582,0.2593,0.3103,0.08677,0
437,14.04,15.98,89.78,611.2,0.08458,0.05895,0.03534,0.02944,0.1714,0.05898,...,21.58,101.2,750.0,0.1195,0.1252,0.1117,0.07453,0.2725,0.07234,1
298,14.26,18.17,91.22,633.1,0.06576,0.0522,0.02475,0.01374,0.1635,0.05586,...,25.26,105.8,819.7,0.09445,0.2167,0.1565,0.0753,0.2636,0.07676,1
288,11.26,19.96,73.72,394.1,0.0802,0.1181,0.09274,0.05588,0.2595,0.06233,...,22.33,78.27,437.6,0.1028,0.1843,0.1546,0.09314,0.2955,0.07009,1
270,14.29,16.82,90.3,632.6,0.06429,0.02675,0.00725,0.00625,0.1508,0.05376,...,20.65,94.44,684.6,0.08567,0.05036,0.03866,0.03333,0.2458,0.0612,1


In [12]:
train_df.head(15)

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
73,13.8,15.79,90.43,584.1,0.1007,0.128,0.07789,0.05069,0.1662,0.06566,...,20.86,110.3,812.4,0.1411,0.3542,0.2779,0.1383,0.2589,0.103,0
394,12.1,17.72,78.07,446.2,0.1029,0.09758,0.04783,0.03326,0.1937,0.06161,...,25.8,88.33,559.5,0.1432,0.1773,0.1603,0.06266,0.3049,0.07081,1
393,21.61,22.28,144.4,1407.0,0.1167,0.2087,0.281,0.1562,0.2162,0.06606,...,28.74,172.0,2081.0,0.1502,0.5717,0.7053,0.2422,0.3828,0.1007,0
425,10.03,21.28,63.19,307.3,0.08117,0.03912,0.00247,0.005159,0.163,0.06439,...,28.94,69.92,376.3,0.1126,0.07094,0.01235,0.02579,0.2349,0.08061,1
305,11.6,24.49,74.23,417.2,0.07474,0.05688,0.01974,0.01313,0.1935,0.05878,...,31.62,81.39,476.5,0.09545,0.1361,0.07239,0.04815,0.3244,0.06745,1
76,13.53,10.94,87.91,559.2,0.1291,0.1047,0.06877,0.06556,0.2403,0.06641,...,12.49,91.36,605.5,0.1451,0.1379,0.08539,0.07407,0.271,0.07191,1
384,13.28,13.72,85.79,541.8,0.08363,0.08575,0.05077,0.02864,0.1617,0.05594,...,17.37,96.59,623.7,0.1166,0.2685,0.2866,0.09173,0.2736,0.0732,1
555,10.29,27.61,65.67,321.4,0.0903,0.07658,0.05999,0.02738,0.1593,0.06127,...,34.91,69.57,357.6,0.1384,0.171,0.2,0.09127,0.2226,0.08283,1
362,12.76,18.84,81.87,496.6,0.09676,0.07952,0.02688,0.01781,0.1759,0.06183,...,25.99,87.82,579.7,0.1298,0.1839,0.1255,0.08312,0.2744,0.07238,1
72,17.2,24.52,114.2,929.4,0.1071,0.183,0.1692,0.07944,0.1927,0.06487,...,33.82,151.6,1681.0,0.1585,0.7394,0.6566,0.1899,0.3313,0.1339,0


In [4]:
train_df.tail(15)

NameError: name 'train_df' is not defined