In [53]:
import faker

fake = faker.Faker()

print(fake.name())
print(fake.last_name())
print(fake.email())
print(fake.city())
print(fake.address())

Meghan Patel
Hart
davidbeck@example.org
Smithside
PSC 0505, Box 5433
APO AP 15076


In [54]:
import json
import faker

fake = faker.Faker()

# Generate a user

users = []
for i in range(100):
    user = {
        "username": fake.user_name(),
        "name": fake.name(),
        "email": fake.email(),
        "address": fake.address(),
        "age": fake.random_int(min=18, max=90)
    }
    users.append(user)

# Save in JSON
with open("users.json", "w") as file:
    json.dump(users, file, indent=4)

In [55]:
# Generate ad campaign
import random
from datetime import date, timedelta

# Start and end of campaign
def get_start_end_dates():
    duration = random.randint(1, 2*365)
    offset = random.randint(-365, 365)
    start = date.today() - timedelta(days=offset)
    end = start + timedelta(days=duration)
    return start.strftime("%Y.%m.%d"), end.strftime("%Y.%m.%d")

print(get_start_end_dates())

# Generate target age
def get_age_range():
    age = random.randrange(20, 46, 5)
    diff = random.randrange(5, 26, 5)
    
    return f"{age}-{age + diff}"

print(get_age_range())

# Generate the currency
def get_currency():
    return random.choice(("GBR", "USD", "EUR"))

# Create campaign name
def get_campaign_name():
    date = get_start_end_dates()
    date_start = date[0]
    date_end = date[1]
    age_range = get_age_range()
    currency = get_currency()
    
    return f"{date_start}_{date_end}_{age_range}_{currency}"

print(get_campaign_name())

('2026.10.09', '2026.10.23')
25-40
2026.06.23_2026.09.09_35-55_GBR


In [56]:
# Generate the data for the campaign
def get_campaign_data():
    name = get_campaign_name()
    budget = random.randint(10**3, 10**6)
    spent = random.randint(10**2, budget)

    # return dict
    return { 
        "cmp_name": name,
        "cmp_bgt": budget,
        "cmp_spent": spent
    }

print(get_campaign_data())

def get_raw_data(users):
    raw_data = []
    for user in users:
        campaigns = []
        for i in range(random.randint(5, 10)):
            campaign_data = get_campaign_data()
            dates = campaign_data["cmp_name"].split("_")[2].split("-") # start - end
            min_age = int(dates[0])
            max_age = int(dates[1])
            if (user["age"] >= min_age and user["age"] <= max_age):
                campaigns.append(campaign_data)
        
        raw_data.append({"user": user, "campaigns": campaigns})
    return raw_data

raw_data = get_raw_data(users)
with open("raw_data.json", "w") as file:
    json.dump(raw_data, file, indent=4)

campaign_data = []
for data in raw_data:
    for campaign in data["campaigns"]:
        campaign["user"] = data["user"]
        campaign_data.append(campaign)

with open("campaigns_data.json", "w") as file:
    json.dump(campaign_data, file, indent=4)


{'cmp_name': '2025.09.30_2027.06.18_30-50_EUR', 'cmp_bgt': 331116, 'cmp_spent': 114117}


In [57]:
import pandas as pd

# DataFrame = Excel spreadsheet in Python
df = pd.read_json("campaigns_data.json")

df.head(10)

print(df.shape) # check the size of the DataFrame (rows, cols)
print(df.columns) # column names
print(df.dtypes) # types of data


(163, 4)
Index(['cmp_name', 'cmp_bgt', 'cmp_spent', 'user'], dtype='object')
cmp_name     object
cmp_bgt       int64
cmp_spent     int64
user         object
dtype: object


In [58]:
# Filter the data
print(df.query("cmp_bgt < 100000 and cmp_spent > 9000"))

                            cmp_name  cmp_bgt  cmp_spent  \
2    2025.11.12_2027.09.05_20-40_GBR    38174      16800   
15   2026.04.11_2026.12.22_35-55_GBR    36780      36096   
35   2025.08.20_2026.08.15_25-40_EUR    30358      23611   
56   2026.01.19_2026.02.09_25-40_EUR    70552      46236   
70   2025.07.24_2025.10.19_20-35_EUR    74290      16636   
77   2025.12.15_2027.03.02_20-45_EUR    23207      22318   
87   2026.03.15_2028.02.18_45-60_GBR    67602      62402   
120  2025.11.21_2025.12.23_45-55_EUR    55698      22637   
122  2024.12.14_2025.09.05_40-55_EUR    96497      80239   
127  2026.11.15_2028.05.16_20-25_USD    48679      41099   
140  2026.09.14_2028.01.05_45-50_USD    36237      22185   

                                                  user  
2    {'username': 'donaldsmith', 'name': 'Brittany ...  
15   {'username': 'mtaylor', 'name': 'Emily Aguilar...  
35   {'username': 'taylor79', 'name': 'Lauren Price...  
56   {'username': 'jennifer79', 'name': 'Cindy Fuen

In [62]:
# Get the campaign with maximum spent
idx = df["cmp_spent"].idxmax()
df.loc[idx]

cmp_name                       2026.02.22_2028.02.12_45-65_EUR
cmp_bgt                                                 874513
cmp_spent                                               839790
user         {'username': 'penny17', 'name': 'Christopher W...
Name: 144, dtype: object

In [65]:
df.sort_values(by=["cmp_spent"], ascending=False).head(1)

Unnamed: 0,cmp_name,cmp_bgt,cmp_spent,user
144,2026.02.22_2028.02.12_45-65_EUR,874513,839790,"{'username': 'penny17', 'name': 'Christopher W..."


In [70]:
# Add a new column
df["cmp_remaining_budget"] = df["cmp_bgt"] - df["cmp_spent"]

# Get the start date
start_date = df["cmp_name"].str.split("_").str[0]
end_date = df["cmp_name"].str.split("_").str[1]

df["cmp_start_date"] = pd.to_datetime(start_date)
df["cmp_end_date"] = pd.to_datetime(end_date)

df["days_running"] = df["cmp_end_date"] - df["cmp_start_date"]

df.head(10)

Unnamed: 0,cmp_name,cmp_bgt,cmp_spent,user,cmp_remaining_budget,cmp_start_date,cmp_end_date,days_running
0,2025.03.27_2025.10.18_25-50_USD,61442,5725,"{'username': 'donaldsmith', 'name': 'Brittany ...",55717,2025-03-27,2025-10-18,205 days
1,2026.08.16_2028.02.07_30-40_USD,771485,435488,"{'username': 'donaldsmith', 'name': 'Brittany ...",335997,2026-08-16,2028-02-07,540 days
2,2025.11.12_2027.09.05_20-40_GBR,38174,16800,"{'username': 'donaldsmith', 'name': 'Brittany ...",21374,2025-11-12,2027-09-05,662 days
3,2026.04.28_2026.06.19_20-40_EUR,745378,18452,"{'username': 'donaldsmith', 'name': 'Brittany ...",726926,2026-04-28,2026-06-19,52 days
4,2025.01.03_2026.03.18_45-70_EUR,494163,346120,"{'username': 'rogerhumphrey', 'name': 'Darrell...",148043,2025-01-03,2026-03-18,439 days
5,2025.10.12_2027.07.14_35-60_EUR,213822,202586,"{'username': 'bwebster', 'name': 'Jamie Steven...",11236,2025-10-12,2027-07-14,640 days
6,2026.07.24_2027.09.04_40-65_EUR,314297,232462,"{'username': 'bwebster', 'name': 'Jamie Steven...",81835,2026-07-24,2027-09-04,407 days
7,2025.03.03_2026.03.06_45-55_GBR,946981,306737,"{'username': 'melissa54', 'name': 'Miranda Cha...",640244,2025-03-03,2026-03-06,368 days
8,2026.05.16_2027.01.19_45-60_GBR,260461,237606,"{'username': 'melissa54', 'name': 'Miranda Cha...",22855,2026-05-16,2027-01-19,248 days
9,2026.09.06_2028.07.28_40-60_EUR,133418,76708,"{'username': 'melissa54', 'name': 'Miranda Cha...",56710,2026-09-06,2028-07-28,691 days


In [71]:
df.describe()

Unnamed: 0,cmp_bgt,cmp_spent,cmp_remaining_budget,cmp_start_date,cmp_end_date,days_running
count,163.0,163.0,163.0,163,163,163
mean,478834.122699,227918.779141,250915.343558,2025-11-08 23:42:19.877300736,2026-11-27 09:25:23.926380288,383 days 09:43:04.049079756
min,8946.0,1168.0,684.0,2024-11-29 00:00:00,2025-03-20 00:00:00,10 days 00:00:00
25%,227713.5,59295.5,71126.0,2025-05-06 12:00:00,2026-06-02 12:00:00,245 days 00:00:00
50%,473893.0,172489.0,181009.0,2025-11-12 00:00:00,2026-11-13 00:00:00,374 days 00:00:00
75%,729977.5,343501.0,339673.5,2026-05-06 12:00:00,2027-07-06 00:00:00,549 days 12:00:00
max,996832.0,839790.0,973292.0,2026-11-15 00:00:00,2028-07-28 00:00:00,720 days 00:00:00
std,290839.757573,222826.808519,228786.207917,,,196 days 07:26:36.861114264


In [74]:
df.to_csv("data.csv")
df.to_excel("data.xlsx")