In [1]:
import pandas as pd
df = pd.read_csv('BMD-2.csv')
df.head()

Unnamed: 0,Age,Weight_kg,Height_cm,BMD,Fracture
0,57.052768,64.0,155.5,0.8793,no fracture
1,75.741225,78.0,162.0,0.7946,no fracture
2,70.7789,73.0,170.5,0.9067,no fracture
3,78.247175,60.0,148.0,0.7112,no fracture
4,54.191877,55.0,161.0,0.7909,no fracture


In [2]:
# Display the count
num_rows = len(df)
print(f"Total number of customers: {num_rows}")
df.count()

Total number of customers: 169


Age          169
Weight_kg    169
Height_cm    169
BMD          169
Fracture     169
dtype: int64

In [3]:
# Display the column (categories)
num_column = df.shape[1]
print(f"Total number of categories: {num_column}")

Total number of categories: 5


In [4]:
# Check data types of each column
print(df.dtypes)

Age          float64
Weight_kg    float64
Height_cm    float64
BMD          float64
Fracture      object
dtype: object


In [5]:
missing_values = df.isnull().sum()
print(missing_values)

Age          0
Weight_kg    0
Height_cm    0
BMD          0
Fracture     0
dtype: int64


In [9]:
# Group by 'Category' and count the occurrences
grouped_count = df.groupby('Fracture').size().reset_index(name='Count')

print(grouped_count)


      Fracture  Count
0     fracture     50
1  no fracture    119


In [10]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report


In [11]:
# Features and target
X = df[['Age', 'Weight_kg','Height_cm','BMD']]
y = df['Fracture']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

# Create a logistic regression model with L2 regularization (Ridge)
log_reg = LogisticRegression(penalty='l2', C=1.0, solver='liblinear')  # 'C' is the inverse of regularization strength

# Fit the model
log_reg.fit(X_train, y_train)


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [12]:
# Predict on the test set
y_pred = log_reg.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(classification_rep)


Accuracy: 0.803921568627451
Classification Report:
             precision    recall  f1-score   support

   fracture       0.71      0.62      0.67        16
no fracture       0.84      0.89      0.86        35

avg / total       0.80      0.80      0.80        51



In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

# Sample data
data = {
    'Feature1': [2.5, 3.6, 1.1, 4.4, 5.5, 6.1, 2.2, 7.4, 8.0, 9.1],
    'Feature2': [1.2, 0.9, 3.5, 2.1, 4.6, 3.9, 5.0, 6.1, 7.2, 8.4],
    'Target': [0, 0, 0, 1, 1, 1, 0, 1, 1, 1]
}

df = pd.DataFrame(data)

# Features and target
X = df[['Feature1', 'Feature2']]
y = df['Target']

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# Create logistic regression model with L2 regularization (Ridge)
log_reg = LogisticRegression(penalty='l2', C=1.0, solver='liblinear', random_state=42)

# Fit the model
log_reg.fit(X_train, y_train)

# Get the coefficients
coefficients = log_reg.coef_[0]  # For binary classification, coef_ is a 2D array; select the first row

# Print feature importance
feature_importance = pd.DataFrame({
    'Feature': ['Feature1', 'Feature2'],
    'Coefficient': coefficients
})

print(feature_importance)
