# End-to-end injection for Shopify (development) store

In [None]:
# Credentials

import os
from dotenv import load_dotenv

load_dotenv()

store_api_key = os.getenv("SHOPIFY_API_KEY")
store_secret_key = os.getenv("SHOPIFY_SECRET_KEY")
admin_api_access_token = os.getenv("SHOPIFY_ADMIN_API_ACCESS_TOKEN")

shop_url = "urban-thread-test-store.myshopify.com"
api_version = '2025-07'

In [26]:
import shopify
shopify.Session.setup(api_key=store_api_key, secret=store_secret_key)

session = shopify.Session(shop_url, api_version, admin_api_access_token)

In [27]:
shopify.ShopifyResource.activate_session(session)

# Inject customers data

In [3]:
import pandas as pd
import time

df_sorted = pd.read_csv('./data/shopify_customers_sorted.csv')
# df_sorted_first_five = df_sorted.head(5).copy()

In [5]:
# Iterate over each row in df_sorted_first_five, inject into Shopify, get the id, and add to a new "id" column

ids = []
for idx, row in df_sorted.iterrows():
    new_customer = shopify.Customer()
    # Split the full name into first and last name
    full_name = row.get("name", "")
    name_parts = full_name.strip().split(" ", 1)
    first_name = name_parts[0] if len(name_parts) > 0 else ""
    last_name = name_parts[1] if len(name_parts) > 1 else ""
    
    new_customer.first_name = first_name
    new_customer.last_name = last_name
    new_customer.email = row.get("email", "")
    # Add createdAt from created_at column

    new_customer.createdAt = row["created_at"]
    # Add any other fields as needed, e.g. phone, address, etc.
    
    new_customer.save()
    ids.append(new_customer.id)

    # Add a delay to avoid hitting Shopify API rate limits
    time.sleep(0.2)

df_sorted["id"] = [str(i) for i in ids]

In [6]:
display(df_sorted)

Unnamed: 0,name,email,gender,age,created_at,segment,id
0,Christopher Bass,christopher.bass@dummydomain.com,male,58,2024-01-01,prospective,9059439771976
1,Daniel Wagner,daniel.wagner@dummydomain.com,male,50,2024-01-04,returning,9059439804744
2,Stephen Mckee,stephen.mckee@dummydomain.com,male,30,2024-01-05,churned,9059439837512
3,Patty Perez,patty.perez@dummydomain.com,female,35,2024-01-05,churned,9059439870280
4,Dominic Jacobs,dominic.jacobs@dummydomain.com,male,40,2024-01-06,prospective,9059439935816
...,...,...,...,...,...,...,...
388,Stephanie Williams,stephanie.williams@dummydomain.com,female,64,2025-03-26,prospective,9059457007944
389,Carolyn Miller,carolyn.miller@dummydomain.com,female,51,2025-03-26,prospective,9059457040712
390,Shannon James,shannon.james@dummydomain.com,female,18,2025-03-27,prospective,9059457139016
391,Briana Murray,briana.murray@dummydomain.com,female,65,2025-03-27,churned,9059457171784


## Save the customer df with unique ids to new csv

In [7]:
df_sorted.to_csv('./data/shopify_customers_sorted_with_id.csv', index=False)

# Inject products data

In [15]:
import numpy as np
import re

product_df = pd.read_csv('./data/shopify_products_cleaned.csv')


# Helper: Check if a value is NaN
def is_nan(x):
    return pd.isna(x) or (isinstance(x, float) and np.isnan(x))

# Helper: Extract numeric id from Shopify GID
def extract_numeric_id(gid):
    if not isinstance(gid, str):
        return ""
    match = re.search(r'/(\d+)$', gid)
    return match.group(1) if match else ""

# Add a new column for variant_id, initialized as empty string
product_df['variant_id'] = ""

# 1. Find the first two unique products
# unique_titles = product_df['title'].drop_duplicates().head(2).tolist()
unique_titles = product_df['title'].drop_duplicates().tolist()

for title in unique_titles:
    product_rows = product_df[product_df['title'] == title]

    # Use the first row for product-level fields
    first_row = product_rows.iloc[0]

    description = first_row['description']
    product_type = first_row['product_type']
    tags = first_row['tags']
    published_at = first_row['release_date']
    
    # 2. Find product options (Size, Color)
    option_names = []
    if 'variant_size' in product_rows.columns and product_rows['variant_size'].notna().any():
        option_names.append('Size')
    if 'variant_color' in product_rows.columns and product_rows['variant_color'].notna().any():
        option_names.append('Color')
    
    # Build productOptions
    product_options = []
    for idx, opt in enumerate(option_names):
        if opt == 'Size':
            values = sorted(product_rows['variant_size'].dropna().unique().tolist())
        elif opt == 'Color':
            values = sorted(product_rows['variant_color'].dropna().unique().tolist())
        else:
            values = []
        product_options.append({
            "name": opt,
            "position": idx + 1,
            "values": [{"name": v} for v in values]
        })
    
    # 3. Build variants
    variants = []
    variant_row_indices = []  # To keep track of the DataFrame indices for each variant
    for idx, row in product_rows.iterrows():
        option_values = []
        if 'Color' in option_names:
            option_values.append({"optionName": "Color", "name": row['variant_color']})
        if 'Size' in option_names:
            option_values.append({"optionName": "Size", "name": row['variant_size']})
        # Inventory policy and quantity
        discontinued = not is_nan(row.get('discontinued_date', np.nan))
        quantity = 0 if discontinued else int(row['inventory_quantity'])
        variant = {
            "optionValues": option_values,
            "price": float(row['price']),
            "inventoryPolicy": "CONTINUE",
            # "inventoryQuantities": [
            #     {
            #         "quantity": quantity,
            #         # "locationId": None  # If you have a locationId, set it here
            #     }
            # ]
        }
        variants.append(variant)
        variant_row_indices.append(idx)
    
    # 4. Build the productSet input
    product_input = {
        "title": title,
        "descriptionHtml": description,
        "productType": product_type,
        "tags": tags,
        # "publishedAt": published_at,
        "productOptions": product_options,
        "variants": variants
    }
    
    # 5. Prepare mutation and variables
    mutation = """
        mutation createProductAsynchronous($productSet: ProductSetInput!, $synchronous: Boolean!) {
          productSet(synchronous: $synchronous, input: $productSet) {
            product {
              id
              variants(first: 100) {
                edges {
                  node {
                    id
                    title
                  }
                }
              }
            }
            productSetOperation {
              id
              status
              userErrors {
                code
                field
                message
              }
            }
            userErrors {
              code
              field
              message
            }
          }
        }
    """
    variables = {
        "synchronous": True,
        "productSet": product_input
    }
    
    # 6. Execute the mutation
    result = shopify.GraphQL().execute(mutation, variables=variables)
    print(f"Result for product '{title}':")
    print(result)
    # Extract and save variant IDs to the DataFrame
    try:
        import json
        if isinstance(result, str):
            data = json.loads(result)
        else:
            data = result
        variants_data = (
            data.get("data", {})
            .get("productSet", {})
            .get("product", {})
            .get("variants", {})
            .get("edges", [])
        )
        variant_ids = [edge["node"]["id"] for edge in variants_data]
        print("Variant IDs:", variant_ids)
        # Save only the numeric id to the corresponding rows in the DataFrame
        for idx, variant_id in zip(variant_row_indices, variant_ids):
            product_df.at[idx, 'variant_id'] = extract_numeric_id(variant_id)
    except Exception as e:
        print("Could not extract variant IDs:", e)


Result for product 'Classic Cotton Tee':
{"data":{"productSet":{"product":{"id":"gid://shopify/Product/9587678642504","variants":{"edges":[{"node":{"id":"gid://shopify/ProductVariant/50944337215816","title":"XS / White"}},{"node":{"id":"gid://shopify/ProductVariant/50944337248584","title":"S / White"}},{"node":{"id":"gid://shopify/ProductVariant/50944337281352","title":"M / White"}},{"node":{"id":"gid://shopify/ProductVariant/50944337314120","title":"L / White"}},{"node":{"id":"gid://shopify/ProductVariant/50944337346888","title":"XL / White"}},{"node":{"id":"gid://shopify/ProductVariant/50944337379656","title":"XS / Black"}},{"node":{"id":"gid://shopify/ProductVariant/50944337412424","title":"S / Black"}},{"node":{"id":"gid://shopify/ProductVariant/50944337445192","title":"M / Black"}},{"node":{"id":"gid://shopify/ProductVariant/50944337477960","title":"L / Black"}},{"node":{"id":"gid://shopify/ProductVariant/50944337510728","title":"XL / Black"}},{"node":{"id":"gid://shopify/ProductV

In [16]:
# Save the updated DataFrame to a new CSV file
product_df.to_csv('./data/shopify_products_cleaned_with_variant_ids.csv', index=False)

# Inject orders

In [28]:
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv

import os
import json

load_dotenv()

openai_api_key = os.getenv("OPENAI_API_KEY")

In [29]:
def open_customer_df(customer_csv_path):

    df = pd.read_csv(customer_csv_path)

    df['id'] = df['id'].apply(lambda x: str(int(float(x))) if pd.notnull(x) else '')

    # Add "last_buy_month" column: for now, set to the month of "created_at"
    created_at = pd.to_datetime(df['created_at'])
    df['last_buy_month'] = created_at.dt.strftime('%Y-%m')
    # Add "churn_date" column: for "churned" segment, set to created_at + X days, else NaT
    df['churn_date'] = pd.NaT
    mask_churned = df['segment'] == 'churned'
    import numpy as np
    # Add randomness: churned customers churn 20-40 days after created_at
    random_days = np.random.randint(20, 41, size=mask_churned.sum())
    df.loc[mask_churned, 'churn_date'] = pd.to_datetime(df.loc[mask_churned, 'created_at']) + pd.to_timedelta(random_days, unit='D')

    # For one-timer segment, churn_date is the first day of the next month after created_at
    mask_one_timer = df['segment'] == 'one-timer'
    created_at_one_timer = pd.to_datetime(df.loc[mask_one_timer, 'created_at'])
    # Set churn_date to next day after created_at
    churn_date_one_timer = (created_at_one_timer + pd.Timedelta(days=1)).dt.normalize()
    df.loc[mask_one_timer, 'churn_date'] = churn_date_one_timer

    return df

In [30]:
def open_product_df(product_csv_path):
    df = pd.read_csv(product_csv_path)

    # Convert 'variant_id' column to string type
    if 'variant_id' in df.columns:
        df['variant_id'] = df['variant_id'].apply(lambda x: str(int(float(x))) if pd.notnull(x) else '')
    
    return df

In [7]:
customer_df = open_customer_df('./data/shopify_customers_sorted_with_id.csv')
product_df = open_product_df('./data/shopify_products_cleaned_with_variant_ids.csv')


months = ['2024-01', '2024-02', '2024-03', '2024-04', '2024-05', '2024-06', '2024-07', '2024-08', '2024-09', '2024-10', '2024-11', '2024-12', 
          '2025-01', '2025-02', '2025-03']

def get_valid_customers_for_month(customer_df, month):
    # Filter customers by created_at date
    customer_df['created_at'] = pd.to_datetime(customer_df['created_at'])
    # Ensure churn_date is datetime
    if 'churn_date' in customer_df.columns:
        customer_df['churn_date'] = pd.to_datetime(customer_df['churn_date'], errors='coerce')
    # Customers created before or in the given month
    month_end = pd.to_datetime(f"{month}-01") + pd.offsets.MonthEnd(0)
    valid_customers = customer_df[customer_df['created_at'] <= month_end].copy()

    # Further filter: churn_date must be in the future or NaT (not churned yet)
    if 'churn_date' in valid_customers.columns:
        # For the filter, use the first day of the month as the reference
        month_start = pd.to_datetime(f"{month}-01")
        valid_customers = valid_customers[(valid_customers['churn_date'].isna()) | (valid_customers['churn_date'] > month_start)]
    return valid_customers

def get_valid_products_for_month(product_df, month):
    # Filter products by release_date (should be any date before or in the current month)
    product_df['release_date'] = pd.to_datetime(product_df['release_date'])
    if 'discontinued_date' in product_df.columns:
        product_df['discontinued_date'] = pd.to_datetime(product_df['discontinued_date'], errors='coerce')

    # Get the first day of the current month and the last day of the current month
    month_start = pd.to_datetime(f"{month}-01")
    month_end = month_start + pd.offsets.MonthEnd(0)
    # Keep products released any time before or in the current month
    mask_created = (product_df['release_date'] <= month_end)

    filtered_df = product_df[mask_created].copy()

    # Filter out products discontinued before the start of this month
    if 'discontinued_date' in filtered_df.columns:
        filtered_df = filtered_df[(filtered_df['discontinued_date'].isna()) | (filtered_df['discontinued_date'] >= month_start)]

    return filtered_df

previous_month_prod_str = ''

all_months_prod_list = []

for i, month in enumerate(months):

    valid_cust = get_valid_customers_for_month(customer_df, month)
    valid_prod = get_valid_products_for_month(product_df, month)

    total_item_sold_in_month = int(round(1.5 * len(valid_cust)))

    valid_unique_prod = valid_prod['title'].drop_duplicates()

    valid_unique_prod_str = ', '.join(valid_unique_prod.tolist())

    llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.2, api_key=openai_api_key)

    prompt = f"""
    You are a helpful assistant that generates order items for a Shopify store.
    You will be given a list of unique product titles for a given month.
    You need to assign a number of items sold for each product title.
    The total number of items sold should be equal to the total item sold in the month.

    You must assign the number to simulate a realistic distribution of items sold, mainly to simulate seasonality and new item releases.

    The current month is: {month}

    The list of unique product titles is:
    {valid_unique_prod_str}

    The total number of items sold in the month is:
    {total_item_sold_in_month}

    For info on what might be new products this month, you can use the following info of previous month products:
    {previous_month_prod_str}

    You must assign the number of items sold for each product title FOR THE CURRENT MONTH.
    You must assign the number of items sold for each product title FOR THE CURRENT MONTH.
    You must assign the number of items sold for each product title FOR THE CURRENT MONTH.

    You must return the result in the following JSON format:
    {{
      "thought": "Your reasoning about the distribution and any assumptions.",
      "product_list": [
        {{
          "product_title": "Product Title 1",
          "quantity_sold": <integer>
        }},
        {{
          "product_title": "Product Title 2",
          "quantity_sold": <integer>
        }}
        // ... one entry per product title
      ]
    }}

    Your answer:
    """

    response = llm.invoke(prompt)

    response_json = json.loads(response.content)

    
    all_months_prod_list.append({
        "month": month,
        "product_list": response_json['product_list']
    })

    previous_month_prod_str = valid_unique_prod_str

    

In [9]:
print(all_months_prod_list)
import json
import os

# Ensure the 'data' directory exists
os.makedirs('data', exist_ok=True)

with open('data/all_months_prod_list.json', 'w') as f:
    json.dump(all_months_prod_list, f, indent=2)


[{'month': '2024-01', 'product_list': [{'product_title': 'Classic Cotton Tee', 'quantity_sold': 5}, {'product_title': 'Slim Fit Crew Tee', 'quantity_sold': 4}, {'product_title': 'Heavyweight Oversized Tee', 'quantity_sold': 3}, {'product_title': 'Oversized Pullover Hoodie', 'quantity_sold': 15}, {'product_title': 'Denim Jacket', 'quantity_sold': 6}, {'product_title': 'Relaxed Fit Chinos', 'quantity_sold': 4}, {'product_title': 'Canvas Tote Bag', 'quantity_sold': 3}, {'product_title': 'Ankle Socks 3-Pack', 'quantity_sold': 4}, {'product_title': 'Wool Beanie', 'quantity_sold': 8}]}, {'month': '2024-02', 'product_list': [{'product_title': 'Classic Cotton Tee', 'quantity_sold': 8}, {'product_title': 'Slim Fit Crew Tee', 'quantity_sold': 7}, {'product_title': 'Heavyweight Oversized Tee', 'quantity_sold': 6}, {'product_title': 'Oversized Pullover Hoodie', 'quantity_sold': 20}, {'product_title': 'Denim Jacket', 'quantity_sold': 10}, {'product_title': 'Relaxed Fit Chinos', 'quantity_sold': 9},

## Build orders, assign quantity to valid customer


we just need: variant_id, quantity, customer

In [31]:
with open('data/all_months_prod_list.json', 'r') as f:
    all_months_prod_list = json.load(f)


In [32]:
import random
import datetime
import time

In [33]:
import numpy as np

# Assume all_months_prod_list is a list of dicts with "month" and "product_list" (with "variant_id" and "quantity_sold")
# Assume months is a list of month strings in the same order as all_months_prod_list

months_idx_to_inject = [11]

for month_idx, month in enumerate(months):

    if month_idx in months_idx_to_inject: #only do for months in months_idx_to_inject
        
        print("injecting for month: ", month)

        
        valid_cust = get_valid_customers_for_month(customer_df, month)
        month_prod_list = [x for x in all_months_prod_list if x["month"] == month][0]["product_list"]

        total_item_sold_in_month = sum([prod["quantity_sold"] for prod in month_prod_list])

        remainder_quantity = total_item_sold_in_month

        # 1. Get one timer segment from valid_cust
        one_timer_cust = valid_cust[valid_cust["segment"] == "one-timer"]
        if len(one_timer_cust) == 0:
            continue  # skip if no one-timer
        
        quantity_to_assign_one_timer = len(one_timer_cust)

        if(quantity_to_assign_one_timer > 0):

            one_timer_cust = one_timer_cust.copy()
            one_timer_cust["item_quantity"] = 1
            # display(one_timer_cust)


        remainder_quantity -= quantity_to_assign_one_timer


        # 2. Now get the remainder valid_cust, and remainder product to be sold
        assigned_one_timer_ids = set(one_timer_cust["id"])
        remainder_valid_cust = valid_cust[~valid_cust["id"].isin(assigned_one_timer_ids)]
        # display(remainder_valid_cust)
        
        # Get new customers from the remainder_valid_cust whose created_at date is in this month
        # Assume created_at is in format YYYY-MM-DD and month is in format YYYY-MM
        month_str = str(month)
        # Ensure 'created_at' is string type before using .str accessor
        new_cust = remainder_valid_cust[
            remainder_valid_cust["created_at"].astype(str).str.startswith(month_str)
        ]

        n_new_cust = len(new_cust)
        n_remainder_valid_cust = len(remainder_valid_cust)
        new_cust_percentage = n_new_cust / n_remainder_valid_cust if n_remainder_valid_cust > 0 else 0
        # print(f"Percentage of new customers to remainder valid customers in {month}: {new_cust_percentage:.2%}")
        # print("number of new customer",n_new_cust)
        # print("number of remainder item",remainder_quantity)

        # display(new_cust)

        quantity_to_assign_new_cust = n_new_cust

        if(quantity_to_assign_new_cust > 0):
            new_cust = new_cust.copy()
            new_cust["item_quantity"] = 1
        

        remainder_quantity -= quantity_to_assign_new_cust

        # print("remainder qty", remainder_quantity)

        # Get other valid customers from remainder_valid_cust that are not new
        other_valid_cust = remainder_valid_cust[~remainder_valid_cust["id"].isin(new_cust["id"])]

        if len(other_valid_cust) > 0:
            
            # Assign remainder_quantity items randomly to other_valid_cust, allowing some to get 0
            if remainder_quantity > 0:
                # Each customer can get 0 or more, sum equals remainder_quantity
                random_assignment = np.random.multinomial(remainder_quantity, [1/len(other_valid_cust)]*len(other_valid_cust))
                other_valid_cust = other_valid_cust.copy()
                other_valid_cust["item_quantity"] = random_assignment
            else:
                other_valid_cust = other_valid_cust.copy()
                other_valid_cust["item_quantity"] = 0
            
            # display(other_valid_cust)
            # Validate if the total quantity of other_valid_cust is the same as remainder_quantity
            # if len(other_valid_cust) > 0:
            #     total_assigned = other_valid_cust["item_quantity"].sum()
            #     if total_assigned != remainder_quantity:
            #         print(f"Warning: Total assigned quantity to other_valid_cust ({total_assigned}) does not match remainder_quantity ({remainder_quantity})")
            #     else:
            #         print("all good")

        else:
            # Assign remainder_quantity randomly to the new_cust["item_quantity"]
            if len(new_cust) > 0 and remainder_quantity > 0:
                # Each new_cust already has 1 item assigned; now distribute the remainder_quantity on top
                # Generate a random distribution of remainder_quantity into len(new_cust) bins (customers)
                # Each customer gets at least 0, sum equals remainder_quantity
                # Use multinomial for this
                random_assignment = np.random.multinomial(remainder_quantity, [1/len(new_cust)]*len(new_cust))
                new_cust = new_cust.copy()
                new_cust["item_quantity"] = 1 + random_assignment
            
            # Validate if the total quantity of new_cust is the same as remainder_quantity
            # if len(new_cust) > 0:
            #     total_new_cust_qty = new_cust["item_quantity"].sum()
            #     if total_new_cust_qty != quantity_to_assign_new_cust + (remainder_quantity if len(other_valid_cust) == 0 else 0):
            #         print(f"Warning: Total assigned quantity to new_cust ({total_new_cust_qty}) does not match expected ({quantity_to_assign_new_cust + (remainder_quantity if len(other_valid_cust) == 0 else 0)})")
            #     else:
            #         # print("all good")


        # Combine one_timer_cust, new_cust, and other_valid_cust into a single DataFrame
        combined_assigned_cust = pd.concat([one_timer_cust, new_cust, other_valid_cust], ignore_index=True)

        print("all_possible_cust: ", len(combined_assigned_cust))

        # assign variant id to each customer
        
        # Convert month_prod_list to a DataFrame
        month_prod_df = pd.DataFrame(month_prod_list)
        month_prod_df = month_prod_df.sort_values(by="quantity_sold", ascending=False).reset_index(drop=True)

        # display(month_prod_df)

        flat_variant_rows = []

        for idx, row in month_prod_df.iterrows():
            product_title = row["product_title"]
            quantity = int(row["quantity_sold"])

            # Get all variants for this product
            variants = product_df[product_df["title"] == product_title]["variant_id"].tolist()
            if not variants or quantity == 0:
                continue

            # Randomly select a variant for each unit sold
            chosen_variants = np.random.choice(variants, size=quantity, replace=True)
            for variant_id in chosen_variants:
                flat_variant_rows.append({"product_title": product_title, "variant_id": variant_id})

        flat_variant_id_df = pd.DataFrame(flat_variant_rows)
        flat_variant_id_df = flat_variant_id_df.sample(frac=1, random_state=42).reset_index(drop=True)
        # display(flat_variant_id_df)

        # Assign variant ids to each customer based on their item_quantity
        # flat_variant_id_df contains the shuffled list of variant_ids for this month

        # Calculate the total number of items to assign
        # total_items_to_assign = combined_assigned_cust["item_quantity"].sum()
        # # Get the variant_ids to assign (as a list)
        # variant_ids_to_assign = flat_variant_id_df["variant_id"].tolist()[:total_items_to_assign]
        variant_ids_to_assign = flat_variant_id_df["variant_id"].tolist()

        # Assign slices of variant_ids to each customer
        variants_id_col = []
        idx = 0
        for qty in combined_assigned_cust["item_quantity"]:
            qty = int(qty)
            variants_for_cust = variant_ids_to_assign[idx:idx+qty]
            variants_id_col.append(variants_for_cust)
            idx += qty

        combined_assigned_cust = combined_assigned_cust.copy()
        combined_assigned_cust["variants_id"] = variants_id_col

        # display(combined_assigned_cust)


        # inject to shopify graphQL

        # Helper to get a random datetime string in a given month (YYYY-MM)
        def random_datetime_in_month(month_str, before_date=None):
            # month_str: "2024-03"
            year, month = map(int, month_str.split('-'))
            if month == 12:
                next_month = datetime.datetime(year + 1, 1, 1)
            else:
                next_month = datetime.datetime(year, month + 1, 1)
            start = datetime.datetime(year, month, 1)
            end = next_month - datetime.timedelta(seconds=1)
            if before_date is not None:
                # before_date is a Timestamp or string
                if isinstance(before_date, str):
                    before_date = pd.to_datetime(before_date)
                if before_date < start:
                    # fallback to start
                    end = start
                elif before_date < end:
                    end = before_date - pd.Timedelta(seconds=1)
            # Pick a random datetime between start and end
            delta = end - start
            random_second = random.randint(0, int(delta.total_seconds()))
            dt = start + datetime.timedelta(seconds=random_second)
            return dt.strftime("%Y-%m-%dT%H:%M:%S-00:00")

        injecting_idx = 0

        this_month_valid_cust = 0

        # do_job = False

        for cust_idx, cust_row in combined_assigned_cust.iterrows():
            # print("injecting.....")
    
            customer_id = cust_row.get("id", None)

            # if str(customer_id) == "9059443278152":
            #     do_job = True
            # if not do_job:
            #     continue

            
            if not customer_id or not cust_row["variants_id"]:
                # print("skipping")
                continue  # skip if no customer or no variants
            
            this_month_valid_cust += 1

            print("injecting", customer_id)

            # Prepare line_items: each variant_id, quantity 1
            line_items = []
            for variant_id in cust_row["variants_id"]:
                line_items.append({
                    "variant_id": int(variant_id),
                    "quantity": 1
                })

            # Determine order date logic
            created_at = cust_row.get("created_at", None)
            churned_at = cust_row.get("churned_at", None)
            month_str = cust_row.get("order_month", None)
            # If not present, fallback to month variable
            if not month_str:
                month_str = month

            # If created_at is in this month, use that date
            use_date = None
            if pd.notnull(created_at) and str(created_at)[:7] == month_str:
                use_date = pd.to_datetime(created_at)
            elif pd.notnull(churned_at) and str(churned_at)[:7] == month_str:
                # churned this month: pick random date before churned_at
                use_date = random_datetime_in_month(month_str, before_date=churned_at)
            else:
                # random date in this month
                use_date = random_datetime_in_month(month_str)

            if isinstance(use_date, pd.Timestamp):
                date_str = use_date.strftime("%Y-%m-%dT%H:%M:%S-00:00")
            else:
                date_str = use_date  # already string

            order_data = {
                "order": {
                    "line_items": line_items,
                    # "created_at": date_str,
                    "processed_at": date_str,
                    # "closed_at": date_str,
                    "financial_status": "paid",
                    "fulfillment_status": "fulfilled",
                    "customer": {
                        "id": int(customer_id)
                    },
                }
            }

            order = shopify.Order()
            order.attributes = order_data["order"]
            try:
                response = order.save()
            except Exception as e:
                print(f"Failed to create order for customer {customer_id}: {e}")

            # sleep for rate limiting (4 orders per minute, from shopify)
            injecting_idx += 1
            if injecting_idx == 4:
                injecting_idx = 0
                time.sleep(61)

        print("ordering cust: ", this_month_valid_cust)


    # break

injecting for month:  2024-12
all_possible_cust:  213
injecting 9059452518728
injecting 9059452813640
injecting 9059452420424
injecting 9059452485960
injecting 9059452584264
injecting 9059452617032
injecting 9059452649800
injecting 9059452682568
injecting 9059452715336
injecting 9059452780872
injecting 9059452879176
injecting 9059452911944
injecting 9059452944712
injecting 9059452977480
injecting 9059453010248
injecting 9059439771976
injecting 9059439804744
injecting 9059439968584
injecting 9059440034120
injecting 9059440099656
injecting 9059440165192
injecting 9059440197960
injecting 9059440230728
injecting 9059440361800
injecting 9059440722248
injecting 9059440787784
injecting 9059440820552
injecting 9059441148232
injecting 9059441181000
injecting 9059441246536
injecting 9059441279304
injecting 9059441377608
injecting 9059441410376
injecting 9059441606984
injecting 9059441639752
injecting 9059441705288
injecting 9059441836360
injecting 9059442032968
injecting 9059442098504
injecting 