### Importing required libraries

In [1]:
import pandas as pd
import re
from urllib.parse import urlparse

### Feature extraction

In [2]:
def extract_url_features(df, url_column='URL'):
    df = df.copy()
    

    df['url_length'] = df[url_column].apply(len)
    df['num_dots'] = df[url_column].apply(lambda x: x.count('.'))
    df['num_hyphens'] = df[url_column].apply(lambda x: x.count('-'))
    df['has_at'] = df[url_column].apply(lambda x: 1 if '@' in x else 0)
    df['has_https'] = df[url_column].apply(lambda x: 1 if 'https' in x else 0)
    df['has_ip'] = df[url_column].apply(lambda x: 1 if re.match(r'^http[s]?://\d+\.\d+\.\d+\.\d+', x) else 0)
    df['count_suspicious_words'] = df[url_column].apply(lambda x: sum(word in x.lower() for word in ['login', 'secure', 'account', 'update', 'free']))

    return df

### Load the raw datasets

In [3]:
train = pd.read_csv("../data/train.csv")
test = pd.read_csv("../data/test.csv")

### Apply feature extraction

In [4]:
train_features = extract_url_features(train, url_column='URL')
test_features = extract_url_features(test, url_column='URL')

### Saving featured data

In [5]:
train_features.to_csv("../data/train_features.csv", index=False)
test_features.to_csv("../data/test_features.csv", index=False)