In [5]:
# Import necessary libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Set a random seed for reproducibility
np.random.seed(42)

# Create a sample DataFrame with 20 people
n = 20

# Generate realistic data with some built-in correlations
# Heights in centimeters (normally distributed around 170cm with 10cm std)
heights = np.random.normal(170, 10, n)

# Weights in kg (correlated with height but with some randomness)
# Formula creates correlation between height and weight
weights = (heights - 170) * 0.7 + 70 + np.random.normal(0, 5, n)

# Ages in years (18-65, not strongly correlated with height or weight)
ages = np.random.randint(18, 65, n)

# Create the DataFrame
df = pd.DataFrame({
    'height': heights,
    'weight': weights,
    'age': ages
})

In [6]:
# Display the first 5 rows of the DataFrame
print("Sample DataFrame:")
print(df.head())

Sample DataFrame:
       height     weight  age
0  174.967142  80.805243   35
1  168.617357  67.903268   43
2  176.476885  74.871461   61
3  185.230299  73.537468   51
4  167.658466  65.639013   27


In [7]:
# Basic statistics of the DataFrame
print("\nBasic Statistics:")
print(df.describe())


Basic Statistics:
           height     weight      age
count   20.000000  20.000000  20.0000
mean   168.287014  67.471034  42.5000
std      9.600284   7.639105  12.0547
min    150.867198  51.318484  25.0000
25%    163.512783  62.310610  32.7500
50%    167.658548  66.771141  40.5000
75%    175.081756  73.870966  51.5000
max    185.792128  80.805243  64.0000


In [8]:
corr_matrix = df.corr()
print("\nCorrelation Matrix (Pearson):")
print(corr_matrix)



Correlation Matrix (Pearson):
          height    weight       age
height  1.000000  0.780048  0.194836
weight  0.780048  1.000000  0.022725
age     0.194836  0.022725  1.000000


In [9]:
spearman_corr = df.corr(method='spearman')
print("\nSpearman Correlation:")
print(spearman_corr)


Spearman Correlation:
          height    weight       age
height  1.000000  0.792481  0.131728
weight  0.792481  1.000000 -0.050433
age     0.131728 -0.050433  1.000000


In [10]:
# Calculate Kendall Tau correlation
kendall_corr = df.corr(method='kendall')
print("\nKendall Tau Correlation:")
print(kendall_corr)



Kendall Tau Correlation:
          height    weight       age
height  1.000000  0.631579  0.079578
weight  0.631579  1.000000 -0.037136
age     0.079578 -0.037136  1.000000


In [11]:
# Calculate correlation between specific columns
height_weight_corr = df['height'].corr(df['weight'])
print(f"\nCorrelation between height and weight: {height_weight_corr:.4f}")



Correlation between height and weight: 0.7800
