In [34]:
# Cell 1: Imports
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")
np.random.seed(42)

In [35]:
# Cell 2: Simulate a dataset with 300 access requests
N = 300

# Define categorical options
relationship_types = ['friend', 'colleague', 'stranger']
post_sensitivities = ['low', 'medium', 'high']
request_times = ['day', 'night']
location_matches = ['yes', 'no']

# Generate features
data = {
    'relationship_type': np.random.choice(relationship_types, N, p=[0.5, 0.3, 0.2]),
    'post_sensitivity': np.random.choice(post_sensitivities, N, p=[0.4, 0.4, 0.2]),
    'request_time': np.random.choice(request_times, N),
    'location_match': np.random.choice(location_matches, N, p=[0.7, 0.3])
}

df = pd.DataFrame(data)

# Simulate a rule-based label with some noise (to mimic real behavior)
def simulate_access(row):
    if row['relationship_type'] == 'stranger' and row['post_sensitivity'] == 'high':
        return 0
    if row['relationship_type'] == 'friend' and row['location_match'] == 'yes':
        return 1
    if row['post_sensitivity'] == 'low':
        return 1
    return np.random.choice([0, 1], p=[0.3, 0.7])  # add uncertainty

df['access_granted'] = df.apply(simulate_access, axis=1)

df.head()

Unnamed: 0,relationship_type,post_sensitivity,request_time,location_match,access_granted
0,friend,low,day,yes,1
1,stranger,medium,night,no,1
2,colleague,medium,night,yes,1
3,colleague,medium,night,yes,1
4,friend,medium,day,yes,1


In [36]:
# Cell 3: Encode categorical variables into numeric form
le = LabelEncoder()
for column in df.columns[:-1]:  # Skip the target column
    df[column] = le.fit_transform(df[column])

df

Unnamed: 0,relationship_type,post_sensitivity,request_time,location_match,access_granted
0,1,1,0,1,1
1,2,2,1,0,1
2,0,2,1,1,1
3,0,2,1,1,1
4,1,2,0,1,1
...,...,...,...,...,...
295,0,2,0,1,0
296,0,2,1,1,1
297,1,1,1,1,1
298,0,1,0,0,1


In [37]:
# Cell 4: Split dataset into training and test sets
X = df.drop('access_granted', axis=1)
y = df['access_granted']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

In [38]:
# Cell 5: Train the Decision Tree model
clf = DecisionTreeClassifier(criterion='entropy', random_state=42)
clf.fit(X_train, y_train)

DecisionTreeClassifier(criterion='entropy', random_state=42)

In [39]:
# Cell 6: Evaluate model performance
y_pred = clf.predict(X_test)
print("=== Classification Report ===")
print(classification_report(y_test, y_pred))

=== Classification Report ===
              precision    recall  f1-score   support

           0       0.50      0.20      0.29        15
           1       0.86      0.96      0.91        75

    accuracy                           0.83        90
   macro avg       0.68      0.58      0.60        90
weighted avg       0.80      0.83      0.80        90

