In [17]:
import csv
import random
from faker import Faker
from datetime import datetime, timedelta
import pandas as pd
import os

# Initialize the Faker generator
fake = Faker()

# Define the categories, subcategories, and product names with prices
categories = ["Agriculture", "Energy", "Food and Beverages", "Pharmaceuticals", "Materials and Chemicals"]
subcategories = {
    "Agriculture": ["Biopesticides", "Biofertilizers"],
    "Energy": ["Biofuels", "Biomass"],
    "Food and Beverages": ["Organic Food", "Plant-Based Meat Alternatives"],
    "Pharmaceuticals": ["Biopharmaceuticals", "Herbal Medicines"],
    "Materials and Chemicals": ["Bioplastics", "Biodegradable Packaging"]
}
products = {
    "Biopesticides": {
        "Neem oil-based insecticides": "71$",
        "Bacillus thuringiensis (Bt) products": "44$"
    },
    "Biofertilizers": {
        "Rhizobium-based fertilizers": "80$",
        "Mycorrhizal fungi products": "75$"
    },
    "Biofuels": {
        "Biodiesel": "5$",
        "Ethanol": "9$",
        "Biogas": "11$"
    },
    "Biomass": {
        "Wood pellets": "11$",
        "Crop residues for energy generation": "12$"
    },
    "Organic Food": {
        "Organic fruits and vegetables": "36$",
        "organic dairy products": "61$"
    },
    "Plant-Based Meat Alternatives": {
        "Tofu": "19$",
        "Tempeh": "31$",
        "Seitan": "16$"
    },
    "Biopharmaceuticals": {
        "Monoclonal antibodies": "10$",
        "Insulin": "11$",
        "Vaccines": "50$"
    },
    "Herbal Medicines": {
        "Echinacea extracts": "46$",
        "Turmeric supplements": "29$"
    },
    "Bioplastics": {
        "PLA (Polylactic Acid)": "23$",
        "PHA (Polyhydroxyalkanoates)": "11$"
    },
    "Biodegradable Packaging": {
        "Biodegradable food containers": "30$",
        "Compostable bags": "34$"
    }
}

# Create a list to store the data
data = []

# Define the number of rows
num_rows = random.randint(100, 2000)

# Create a dictionary to track customer data
customer_data = {}

# Create a dictionary to track inventory data
inventory_data = {}
inventory_id_counter = 0

# Define the initial stock level and restock threshold
initial_stock_level = 4000
restock_threshold = random.choice([100, 150])

# Define the initial last restock date
last_restock_date = (datetime.now() - timedelta(days=random.randint(1, 365))).strftime('%Y-%m-%d')

fake = Faker(unique=True)

# Generate data for each row
for _ in range(num_rows):
    customer_name = fake.first_name() + " " + fake.last_name()
    email = f"{customer_name.replace(' ', '_')}@{fake.random_element(['gmail.com', 'outlook.fr', 'hotmail.com','youcode.ma', 'maroc.ma', 'yopmail.com'])}"
    phone = fake.phone_number()
    address = fake.unique.street_address()
    age = random.randint(-90, 90)
    if age == 0:
        age = random.choice([-90, 90])
    gender = random.choice(["Male", "Female"])
    location = fake.country()
    customer_segment = random.choice(["Bronze", "Gold", "Silver"])
    
    category = random.choice(categories)
    subcategory = random.choice(subcategories[category])
    product, price = random.choice(list(products[subcategory].items()))
    
    price = price.strip('$')
    quantity = random.randint(-10, 10)
    
    inventory_id = inventory_data.get(product, None)
    if inventory_id is None:
        inventory_id_counter += 1
        inventory_id = inventory_id_counter
        inventory_data[product] = inventory_id
    
    stock_level = initial_stock_level - quantity
    if stock_level <= restock_threshold:
        restock_threshold = random.choice([100, 150])
        last_restock_date = (datetime.now() - timedelta(days=random.randint(1, 365))).strftime('%Y-%m-%d')
    
    warehouse_name = random.choice(["China WH", "Japan WH", "Turkey WH"])
    location_warehouse = "China" if warehouse_name == "China WH" else "Japan" if warehouse_name == "Japan WH" else "Turkey"
    
    supplier_name = fake.company()
    supplier_phone = fake.unique.phone_number()
    location_supplier = random.choice(["USA", "France", "Colombia", "China", "Turkey"])
    quality_score = round(random.uniform(1, 5), 2)
    rating_customer = str(random.randint(1, 5)) if random.random() > 0.1 else ""
    #comments = fake.sentence() if random.random() > 0.1 else ""
    
    # Generate a random weight for each product
    weight_data = {
        "Organic fruits and vegetables": random.uniform(0.5, 2.0),
        "PHA (Polyhydroxyalkanoates)": random.uniform(0.1, 2.0),
        "Compostable bags": random.uniform(0.1, 2.0),
        "PLA (Polylactic Acid)": random.uniform(0.1, 2.0),
        "Ethanol": random.uniform(0.1, 2.0),
        "Bacillus thuringiensis (Bt) products": random.uniform(0.1, 2.0),
        "Insulin": random.uniform(0.1, 2.0),
        "Rhizobium-based fertilizers": random.uniform(0.1, 2.0),
        "Neem oil-based insecticides": random.uniform(0.1, 2.0),
        "Mycorrhizal fungi products": random.uniform(0.1, 2.0),
        "Tofu": random.uniform(0.1, 2.0),
        "Echinacea extracts": random.uniform(0.1, 2.0),
        "Seitan": random.uniform(0.1, 2.0),
        "Biogas": random.uniform(0.1, 2.0),
        "Monoclonal antibodies": random.uniform(0.1, 2.0),
        "Tempeh": random.uniform(0.1, 2.0),
        "Turmeric supplements": random.uniform(0.1, 2.0),
        "organic dairy products": random.uniform(0.5, 2.0),
        "Wood pellets": random.uniform(0.5, 2.0),
        "Crop residues for energy generation": random.uniform(0.1, 2.0),
        "Vaccines": random.uniform(0.1, 2.0),
        "Biodegradable food containers": random.uniform(0.5, 2.0),
        "Biodiesel": random.uniform(0.1, 2.0)
    }
    weight = weight_data.get(product, 0.0)
    
    end_date = datetime.now()
    start_date = end_date - timedelta(days=6)

    date_order = fake.date_between_dates(date_start=start_date, date_end=end_date).strftime('%Y-%m-%d')

    tax_5_percent = round(float(price) * 0.05, 2)
    payment = random.choice(["Credit Card", "Ewallet", "Cash"])
    shipper = random.choice(["FedEx", "United Parcel Service", "GeoPost", "Hermés Internation", "DHL"])
    shipping_method = random.choice(["Sea", "Ground", "Air"])
    shipping_costs = round(random.uniform(5, 50), 2)
    
    # Append the data to the list
    data.append([customer_name, email, phone, address, age, gender, location, customer_segment,
                 category, subcategory, product, price, quantity, weight, date_order, tax_5_percent,
                 payment, shipper, shipping_method, shipping_costs, inventory_id, stock_level, restock_threshold,
                 last_restock_date, warehouse_name, location_warehouse, supplier_name, supplier_phone,
                 location_supplier, quality_score, rating_customer])
    
    
import datetime

# Get the current date and time
date_now = datetime.datetime.now().strftime("%Y%m%d")

# Specify the CSV file name with the current date
csv_file = f'ecom_data_{date_now}.csv'

# Write the data to a CSV file
with open(csv_file, 'w', newline='') as file:
    writer = csv.writer(file)
    # Write the header
    writer.writerow(['Customer_Name', 'Email', 'Phone', 'Adresse', 'Age', 'Gender', 'Location',
                     'Customer_Segment', 'Category', 'Subcategory', 'Product', 'Price', 'Quantity',
                     'Weight', 'Date_Order', 'Tax_5_percent', 'Payement', 'Shipper', 'Shipping_Method',
                     'Shipping Costs', 'Inventory_id', 'StockLevel', 'Restock_Threshold', 'Last_Restock_Date',
                     'Warehouse_Name', 'Location_warehouse', 'Supplier_Name', 'Supplier_Phone', 'Location_Supplier',
                     'Quality_Score', 'rating_customer'])
    # Write the data
    writer.writerows(data)

print(f"CSV file '{csv_file}' with {num_rows} rows of data has been generated.")

CSV file 'ecom_data_20240115.csv' with 1337 rows of data has been generated.


------------------------------------------------------------------------------------

In [9]:
from datetime import datetime, timedelta

end_date = datetime.now()
start_date = end_date - timedelta(days=100)

#     date_order = fake.date_between_dates(date_start=start_date, date_end=end_date).strftime('%Y-%m-%d')

In [12]:
import csv
import random
from faker import Faker
from datetime import datetime, timedelta
import pandas as pd
import os

# Initialize the Faker generator
fake = Faker()

# Define the categories, subcategories, and product names with prices
categories = ["Agriculture", "Energy", "Food and Beverages", "Pharmaceuticals", "Materials and Chemicals"]
subcategories = {
    "Agriculture": ["Biopesticides", "Biofertilizers"],
    "Energy": ["Biofuels", "Biomass"],
    "Food and Beverages": ["Organic Food", "Plant-Based Meat Alternatives"],
    "Pharmaceuticals": ["Biopharmaceuticals", "Herbal Medicines"],
    "Materials and Chemicals": ["Bioplastics", "Biodegradable Packaging"]
}
products = {
    "Biopesticides": {
        "Neem oil-based insecticides": "71$",
        "Bacillus thuringiensis (Bt) products": "44$"
    },
    "Biofertilizers": {
        "Rhizobium-based fertilizers": "80$",
        "Mycorrhizal fungi products": "75$"
    },
    "Biofuels": {
        "Biodiesel": "5$",
        "Ethanol": "9$",
        "Biogas": "11$"
    },
    "Biomass": {
        "Wood pellets": "11$",
        "Crop residues for energy generation": "12$"
    },
    "Organic Food": {
        "Organic fruits and vegetables": "36$",
        "organic dairy products": "61$"
    },
    "Plant-Based Meat Alternatives": {
        "Tofu": "19$",
        "Tempeh": "31$",
        "Seitan": "16$"
    },
    "Biopharmaceuticals": {
        "Monoclonal antibodies": "10$",
        "Insulin": "11$",
        "Vaccines": "50$"
    },
    "Herbal Medicines": {
        "Echinacea extracts": "46$",
        "Turmeric supplements": "29$"
    },
    "Bioplastics": {
        "PLA (Polylactic Acid)": "23$",
        "PHA (Polyhydroxyalkanoates)": "11$"
    },
    "Biodegradable Packaging": {
        "Biodegradable food containers": "30$",
        "Compostable bags": "34$"
    }
}

# Create a list to store the data
data = []

# Define the number of rows
num_rows = random.randint(3000, 8000)

# Create a dictionary to track customer data
customer_data = {}

# Create a dictionary to track inventory data
inventory_data = {}
inventory_id_counter = 0

# Define the initial stock level and restock threshold
initial_stock_level = 4000
restock_threshold = random.choice([100, 150])

# Define the initial last restock date
last_restock_date = (datetime.now() - timedelta(days=random.randint(1, 365))).strftime('%Y-%m-%d')

fake = Faker(unique=True)

# Generate data for each row
for _ in range(num_rows):
    customer_name = fake.first_name() + " " + fake.last_name()
    email = f"{customer_name.replace(' ', '_')}@{fake.random_element(['gmail.com', 'outlook.fr', 'hotmail.com','space.net', 'maroc.ma', 'yopmail.com'])}"
    phone = fake.phone_number()
    address = fake.unique.street_address()
    age = random.randint(-90, 90)
    if age == 0:
        age = random.choice([-90, 90])
    gender = random.choice(["Male", "Female"])
    location = fake.country()
    customer_segment = random.choice(["Bronze", "Gold", "Silver"])
    
    category = random.choice(categories)
    subcategory = random.choice(subcategories[category])
    product, price = random.choice(list(products[subcategory].items()))
    
    price = price.strip('$')
    quantity = random.randint(-10, 10)
    
    inventory_id = inventory_data.get(product, None)
    if inventory_id is None:
        inventory_id_counter += 1
        inventory_id = inventory_id_counter
        inventory_data[product] = inventory_id
    
    stock_level = initial_stock_level - quantity
    if stock_level <= restock_threshold:
        restock_threshold = random.choice([100, 150])
        last_restock_date = (datetime.now() - timedelta(days=random.randint(1, 365))).strftime('%Y-%m-%d')
    
    warehouse_name = random.choice(["China WH", "Japan WH", "Turkey WH"])
    location_warehouse = "China" if warehouse_name == "China WH" else "Japan" if warehouse_name == "Japan WH" else "Turkey"
    
    supplier_name = fake.company()
    supplier_phone = fake.unique.phone_number()
    location_supplier = random.choice(["USA", "France", "Colombia", "China", "Turkey"])
    quality_score = round(random.uniform(1, 5), 2)
    rating_customer = str(random.randint(1, 5)) if random.random() > 0.1 else ""
    #comments = fake.sentence() if random.random() > 0.1 else ""
    
    # Generate a random weight for each product
    weight_data = {
        "Organic fruits and vegetables": random.uniform(0.5, 2.0),
        "PHA (Polyhydroxyalkanoates)": random.uniform(0.1, 2.0),
        "Compostable bags": random.uniform(0.1, 2.0),
        "PLA (Polylactic Acid)": random.uniform(0.1, 2.0),
        "Ethanol": random.uniform(0.1, 2.0),
        "Bacillus thuringiensis (Bt) products": random.uniform(0.1, 2.0),
        "Insulin": random.uniform(0.1, 2.0),
        "Rhizobium-based fertilizers": random.uniform(0.1, 2.0),
        "Neem oil-based insecticides": random.uniform(0.1, 2.0),
        "Mycorrhizal fungi products": random.uniform(0.1, 2.0),
        "Tofu": random.uniform(0.1, 2.0),
        "Echinacea extracts": random.uniform(0.1, 2.0),
        "Seitan": random.uniform(0.1, 2.0),
        "Biogas": random.uniform(0.1, 2.0),
        "Monoclonal antibodies": random.uniform(0.1, 2.0),
        "Tempeh": random.uniform(0.1, 2.0),
        "Turmeric supplements": random.uniform(0.1, 2.0),
        "organic dairy products": random.uniform(0.5, 2.0),
        "Wood pellets": random.uniform(0.5, 2.0),
        "Crop residues for energy generation": random.uniform(0.1, 2.0),
        "Vaccines": random.uniform(0.1, 2.0),
        "Biodegradable food containers": random.uniform(0.5, 2.0),
        "Biodiesel": random.uniform(0.1, 2.0)
    }
    weight = weight_data.get(product, 0.0)
    
    end_date = datetime.now()
    start_date = end_date - timedelta(days=6)

    date_order = fake.date_between_dates(date_start=start_date, date_end=end_date).strftime('%Y-%m-%d')

    tax_5_percent = round(float(price) * 0.05, 2)
    payment = random.choice(["Credit Card", "Ewallet", "Cash"])
    shipper = random.choice(["FedEx", "United Parcel Service", "GeoPost", "Hermés Internation", "DHL"])
    shipping_method = random.choice(["Sea", "Ground", "Air"])
    shipping_costs = round(random.uniform(5, 50), 2)
    
    # Append the data to the list
    data.append([customer_name, email, phone, address, age, gender, location, customer_segment,
                 category, subcategory, product, price, quantity, weight, date_order, tax_5_percent,
                 payment, shipper, shipping_method, shipping_costs, inventory_id, stock_level, restock_threshold,
                 last_restock_date, warehouse_name, location_warehouse, supplier_name, supplier_phone,
                 location_supplier, quality_score, rating_customer])
    
    
import datetime

# Get the current date and time
date_now = datetime.datetime.now().strftime("%Y%m%d")

In [13]:
import json


# Assuming 'data' is your list

# Convert the list to a DataFrame
columns = ['Customer_Name', 'Email', 'Phone', 'Adresse', 'Age', 'Gender', 'Location',
           'Customer_Segment', 'Category', 'Subcategory', 'Product', 'Price', 'Quantity',
           'Weight', 'Date_Order', 'Tax_5_percent', 'Payment', 'Shipper', 'Shipping_Method',
           'Shipping Costs', 'Inventory_id', 'StockLevel', 'Restock_Threshold', 'Last_Restock_Date',
           'Warehouse_Name', 'Location_warehouse', 'Supplier_Name', 'Supplier_Phone', 'Location_Supplier',
           'Quality_Score', 'rating_customer']

data = pd.DataFrame(data, columns=columns)

# Calculate the index to split the data into two halves
split_index = len(data) // 2

# Split the data into two halves
csv_data = data.iloc[:split_index]
json_data = data.iloc[split_index:]

# Specify the CSV file name with the current date
csv_file = f'ecom_data_{date_now}.csv'

# Write the first half to a CSV file
with open(csv_file, 'w', newline='') as csv_file:
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(data.columns)  # Write the header
    csv_writer.writerows(csv_data.values)

print(f"CSV file '{csv_file}' with {len(csv_data)} rows of data has been generated.")

# Specify the JSON file name with the current date
json_file = f'ecom_data_{date_now}.json'

# Write the second half to a JSON file with column names
json_data.to_json(json_file, orient='records', lines=True)

print(f"JSON file '{json_file}' with {len(json_data)} rows of data has been generated.")

CSV file '<_io.TextIOWrapper name='ecom_data_20240123.csv' mode='w' encoding='cp1252'>' with 1585 rows of data has been generated.
JSON file 'ecom_data_20240123.json' with 1586 rows of data has been generated.


-------------------------------------------------------------------------------------------

In [1]:
import csv
import random
from faker import Faker
from datetime import datetime, timedelta
import pandas as pd
import os

In [2]:
# Initialize the Faker generator
fake = Faker()

In [3]:
# Define the categories, subcategories, and product names with prices
categories = ["Agriculture", "Energy", "Food and Beverages", "Pharmaceuticals", "Materials and Chemicals"]
subcategories = {
    "Agriculture": ["Biopesticides", "Biofertilizers"],
    "Energy": ["Biofuels", "Biomass"],
    "Food and Beverages": ["Organic Food", "Plant-Based Meat Alternatives"],
    "Pharmaceuticals": ["Biopharmaceuticals", "Herbal Medicines"],
    "Materials and Chemicals": ["Bioplastics", "Biodegradable Packaging"]
}
products = {
    "Biopesticides": {
        "Neem oil-based insecticides": "71$",
        "Bacillus thuringiensis (Bt) products": "44$"
    },
    "Biofertilizers": {
        "Rhizobium-based fertilizers": "80$",
        "Mycorrhizal fungi products": "75$"
    },
    "Biofuels": {
        "Biodiesel": "5$",
        "Ethanol": "9$",
        "Biogas": "11$"
    },
    "Biomass": {
        "Wood pellets": "11$",
        "Crop residues for energy generation": "12$"
    },
    "Organic Food": {
        "Organic fruits and vegetables": "36$",
        "organic dairy products": "61$"
    },
    "Plant-Based Meat Alternatives": {
        "Tofu": "19$",
        "Tempeh": "31$",
        "Seitan": "16$"
    },
    "Biopharmaceuticals": {
        "Monoclonal antibodies": "10$",
        "Insulin": "11$",
        "Vaccines": "50$"
    },
    "Herbal Medicines": {
        "Echinacea extracts": "46$",
        "Turmeric supplements": "29$"
    },
    "Bioplastics": {
        "PLA (Polylactic Acid)": "23$",
        "PHA (Polyhydroxyalkanoates)": "11$"
    },
    "Biodegradable Packaging": {
        "Biodegradable food containers": "30$",
        "Compostable bags": "34$"
    }
}

In [4]:
# Create a list to store the data
data = []

In [5]:
# Define the number of rows
num_rows = random.randint(1000, 4000)

In [6]:
# Create a dictionary to track customer data
customer_data = {}

In [7]:
# Create a dictionary to track inventory data
inventory_data = {}
inventory_id_counter = 0

In [8]:
# Define the initial stock level and restock threshold
initial_stock_level = 4000
restock_threshold = random.choice([100, 150])

In [9]:
# Define the initial last restock date
last_restock_date = (datetime.now() - timedelta(days=random.randint(1, 365))).strftime('%Y-%m-%d')

In [10]:
fake = Faker(unique=True)

In [11]:
# Generate data for each row
for _ in range(num_rows):
    customer_name = fake.first_name() + " " + fake.last_name()
    email = f"{customer_name.replace(' ', '_')}@{fake.random_element(['gmail.com', 'outlook.fr', 'hotmail.com','youcode.ma', 'maroc.ma', 'yopmail.com'])}"
    phone = fake.phone_number()
    address = fake.unique.street_address()
    age = random.randint(-90, 90)
    if age == 0:
        age = random.choice([-90, 90])
    gender = random.choice(["Male", "Female"])
    location = fake.country()
    customer_segment = random.choice(["Bronze", "Gold", "Silver"])
    
    category = random.choice(categories)
    subcategory = random.choice(subcategories[category])
    product, price = random.choice(list(products[subcategory].items()))
    
    price = price.strip('$')
    quantity = random.randint(-10, 10)
    
    inventory_id = inventory_data.get(product, None)
    if inventory_id is None:
        inventory_id_counter += 1
        inventory_id = inventory_id_counter
        inventory_data[product] = inventory_id
    
    stock_level = initial_stock_level - quantity
    if stock_level <= restock_threshold:
        restock_threshold = random.choice([100, 150])
        last_restock_date = (datetime.now() - timedelta(days=random.randint(1, 365))).strftime('%Y-%m-%d')
    
    warehouse_name = random.choice(["China WH", "Japan WH", "Turkey WH"])
    location_warehouse = "China" if warehouse_name == "China WH" else "Japan" if warehouse_name == "Japan WH" else "Turkey"
    
    supplier_name = fake.company()
    supplier_phone = fake.unique.phone_number()
    location_supplier = random.choice(["USA", "France", "Colombia", "China", "Turkey"])
    quality_score = round(random.uniform(1, 5), 2)
    rating_customer = str(random.randint(1, 5)) if random.random() > 0.1 else ""
    #comments = fake.sentence() if random.random() > 0.1 else ""
    
    # Generate a random weight for each product
    weight_data = {
        "Organic fruits and vegetables": random.uniform(0.5, 2.0),
        "PHA (Polyhydroxyalkanoates)": random.uniform(0.1, 2.0),
        "Compostable bags": random.uniform(0.1, 2.0),
        "PLA (Polylactic Acid)": random.uniform(0.1, 2.0),
        "Ethanol": random.uniform(0.1, 2.0),
        "Bacillus thuringiensis (Bt) products": random.uniform(0.1, 2.0),
        "Insulin": random.uniform(0.1, 2.0),
        "Rhizobium-based fertilizers": random.uniform(0.1, 2.0),
        "Neem oil-based insecticides": random.uniform(0.1, 2.0),
        "Mycorrhizal fungi products": random.uniform(0.1, 2.0),
        "Tofu": random.uniform(0.1, 2.0),
        "Echinacea extracts": random.uniform(0.1, 2.0),
        "Seitan": random.uniform(0.1, 2.0),
        "Biogas": random.uniform(0.1, 2.0),
        "Monoclonal antibodies": random.uniform(0.1, 2.0),
        "Tempeh": random.uniform(0.1, 2.0),
        "Turmeric supplements": random.uniform(0.1, 2.0),
        "organic dairy products": random.uniform(0.5, 2.0),
        "Wood pellets": random.uniform(0.5, 2.0),
        "Crop residues for energy generation": random.uniform(0.1, 2.0),
        "Vaccines": random.uniform(0.1, 2.0),
        "Biodegradable food containers": random.uniform(0.5, 2.0),
        "Biodiesel": random.uniform(0.1, 2.0)
    }
    weight = weight_data.get(product, 0.0)
    
    end_date = datetime.now()
    start_date = end_date - timedelta(days=6)

    date_order = fake.date_between_dates(date_start=start_date, date_end=end_date).strftime('%Y-%m-%d')

    tax_5_percent = round(float(price) * 0.05, 2)
    payment = random.choice(["Credit Card", "Ewallet", "Cash"])
    shipper = random.choice(["FedEx", "United Parcel Service", "GeoPost", "Hermés Internation", "DHL"])
    shipping_method = random.choice(["Sea", "Ground", "Air"])
    shipping_costs = round(random.uniform(5, 50), 2)
    
    # Append the data to the list
    data.append([customer_name, email, phone, address, age, gender, location, customer_segment,
                 category, subcategory, product, price, quantity, weight, date_order, tax_5_percent,
                 payment, shipper, shipping_method, shipping_costs, inventory_id, stock_level, restock_threshold,
                 last_restock_date, warehouse_name, location_warehouse, supplier_name, supplier_phone,
                 location_supplier, quality_score, rating_customer])

In [12]:
import datetime

In [13]:
# Get the current date and time
date_now = datetime.datetime.now().strftime("%Y%m%d")

In [14]:
# Specify the CSV file name with the current date
csv_file = f'ecom_data_{date_now}.csv'

In [15]:
# Write the data to a CSV file
with open(csv_file, 'w', newline='') as file:
    writer = csv.writer(file)
    # Write the header
    writer.writerow(['Customer_Name', 'Email', 'Phone', 'Adresse', 'Age', 'Gender', 'Location',
                     'Customer_Segment', 'Category', 'Subcategory', 'Product', 'Price', 'Quantity',
                     'Weight', 'Date_Order', 'Tax_5_percent', 'Payement', 'Shipper', 'Shipping_Method',
                     'Shipping Costs', 'Inventory_id', 'StockLevel', 'Restock_Threshold', 'Last_Restock_Date',
                     'Warehouse_Name', 'Location_warehouse', 'Supplier_Name', 'Supplier_Phone', 'Location_Supplier',
                     'Quality_Score', 'rating_customer'])
    # Write the data
    writer.writerows(data)

print(f"CSV file '{csv_file}' with {num_rows} rows of data has been generated.")

CSV file 'ecom_data_20240115.csv' with 3339 rows of data has been generated.


In [16]:
import pandas as pd
df= pd.read_csv("ecom_data_20240112.csv" , encoding='ISO-8859-1')

In [24]:
import pandas as pd
from datetime import datetime

In [30]:
# Get the current date in the format you specified
date_now = datetime.now().strftime("%Y%m%d")
file_name = f"ecom_data_{date_now}.csv"

# Read the CSV file using pandas
df = pd.read_csv(file_name , encoding='ISO-8859-1')

In [31]:
df.head()

Unnamed: 0,Customer_Name,Email,Phone,Adresse,Age,Gender,Location,Customer_Segment,Category,Subcategory,...,StockLevel,Restock_Threshold,Last_Restock_Date,Warehouse_Name,Location_warehouse,Supplier_Name,Supplier_Phone,Location_Supplier,Quality_Score,rating_customer
0,Dawn Kramer,Dawn_Kramer@yopmail.com,239-352-5971x3832,3900 Chen Mountains,-11,Female,Lithuania,Bronze,Energy,Biomass,...,4004,150,2023-02-12,Turkey WH,Turkey,"Delgado, Crosby and Bush",9806188333,USA,2.55,5.0
1,Jillian Reed,Jillian_Reed@yopmail.com,794.248.8393x350,44647 Ward Lane Suite 694,18,Female,Guernsey,Bronze,Pharmaceuticals,Herbal Medicines,...,4009,150,2023-02-12,Japan WH,Japan,Ryan PLC,251-646-4407x9579,China,3.39,4.0
2,Victoria Torres,Victoria_Torres@hotmail.com,9433241027,73841 Ayala Drive Apt. 871,-22,Male,Argentina,Silver,Pharmaceuticals,Herbal Medicines,...,4004,150,2023-02-12,Japan WH,Japan,King-Woodward,001-845-954-6559x95814,Colombia,1.59,3.0
3,Vicki Johnson,Vicki_Johnson@maroc.ma,246.848.3805x4621,137 Jodi Knoll Apt. 885,-52,Male,Mauritania,Bronze,Materials and Chemicals,Bioplastics,...,3993,150,2023-02-12,China WH,China,"Hanna, Sanchez and Bryant",001-919-295-5861x61911,USA,4.49,2.0
4,Dominique Brown,Dominique_Brown@outlook.fr,270.742.0999x220,73812 Stephen Lakes Apt. 464,-55,Male,Cuba,Gold,Energy,Biomass,...,4002,150,2023-02-12,Japan WH,Japan,"Harmon, Harris and Shaffer",826-714-7407x07696,France,4.44,1.0


In [38]:
dfjson.head()

NameError: name 'dfjson' is not defined

---------------------------------