In [1]:
import random
import pandas as pd

products = [f'Product_{i}' for i in range(1, 21)]
weather_options = ['sunny', 'rainy', 'snowy', 'cloudy']
sentiment_options = ['positive', 'neutral', 'negative']

data = []

for _ in range(300):
    product = random.choice(products)
    sales_history = [random.randint(10, 100) for _ in range(4)]
    weather = random.choice(weather_options)
    sentiment = random.choice(sentiment_options)
    inventory_level = random.randint(20, 200)

    # Target depends slightly on history and external factors
    base_sales = int(sum(sales_history) / 4)
    sentiment_factor = {'positive': 1.2, 'neutral': 1.0, 'negative': 0.8}[sentiment]
    weather_factor = {'sunny': 1.1, 'cloudy': 1.0, 'rainy': 0.9, 'snowy': 0.8}[weather]
    sales_t5 = int(base_sales * sentiment_factor * weather_factor + random.randint(-5, 5))

    data.append({
        'product': product,
        'sales_t1': sales_history[0],
        'sales_t2': sales_history[1],
        'sales_t3': sales_history[2],
        'sales_t4': sales_history[3],
        'weather': weather,
        'social_sentiment': sentiment,
        'inventory_level': inventory_level,
        'sales_t5': sales_t5
    })

df = pd.DataFrame(data)
df.to_csv("retail_demand_dataset.csv", index=False)
print(df.head())

      product  sales_t1  sales_t2  sales_t3  sales_t4 weather  \
0  Product_16        44        45        41        77   sunny   
1  Product_15        76        59        88        11  cloudy   
2  Product_16        72        80        76        35   sunny   
3   Product_9        60        59        65        95  cloudy   
4  Product_18        49        66        47        34   sunny   

  social_sentiment  inventory_level  sales_t5  
0         positive              187        66  
1         negative              141        51  
2          neutral               94        76  
3          neutral              122        67  
4         positive              154        63  


In [2]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Load dataset
df = pd.read_csv("retail_demand_dataset.csv")

# Features and target
X = df.drop(columns=['sales_t5', 'product'])
y = df['sales_t5']

# Categorical columns
categorical_cols = ['weather', 'social_sentiment']

# Preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), categorical_cols)
    ],
    remainder='passthrough'  # Keep numeric columns
)

# ML pipeline
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)

print(f"✅ RMSE: {rmse:.2f}")
print(f"✅ R² Score: {r2:.2f}")


✅ RMSE: 7.71
✅ R² Score: 0.79




In [3]:
def inventory_decision(predicted_demand, inventory_level):
    if inventory_level < predicted_demand:
        return "🔁 Reorder needed"
    elif inventory_level > 2 * predicted_demand:
        return "⚠️ Overstock - consider reducing future orders"
    else:
        return "✅ Inventory level is optimal"

# Apply to test set
results = X_test.copy()
results['predicted_sales_t5'] = y_pred
results['actual_sales_t5'] = y_test.values
results['inventory_action'] = results.apply(
    lambda row: inventory_decision(row['predicted_sales_t5'], row['inventory_level']), axis=1
)

# Show sample output
print(results[['inventory_level', 'predicted_sales_t5', 'inventory_action']].head(10))


     inventory_level  predicted_sales_t5  \
203              123               67.66   
266               79               51.16   
152              117               71.21   
9                137               58.84   
233               84               44.58   
226              131               44.45   
196               85               44.92   
109              122               40.92   
5                149               44.02   
175               40               26.75   

                                   inventory_action  
203                    ✅ Inventory level is optimal  
266                    ✅ Inventory level is optimal  
152                    ✅ Inventory level is optimal  
9    ⚠️ Overstock - consider reducing future orders  
233                    ✅ Inventory level is optimal  
226  ⚠️ Overstock - consider reducing future orders  
196                    ✅ Inventory level is optimal  
109  ⚠️ Overstock - consider reducing future orders  
5    ⚠️ Overstock - consider 

In [5]:
!pip install langchain openai faiss-cpu tiktoken


Collecting langchain


[notice] A new release of pip is available: 23.2.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip



  Obtaining dependency information for langchain from https://files.pythonhosted.org/packages/d4/49/6e933837da1931c9db745967282ff8bfff51bc3faec0eade846b12203b75/langchain-0.3.23-py3-none-any.whl.metadata
  Downloading langchain-0.3.23-py3-none-any.whl.metadata (7.8 kB)
Collecting openai
  Obtaining dependency information for openai from https://files.pythonhosted.org/packages/80/9a/f34f163294345f123673ed03e77c33dee2534f3ac1f9d18120384457304d/openai-1.75.0-py3-none-any.whl.metadata
  Downloading openai-1.75.0-py3-none-any.whl.metadata (25 kB)
Collecting faiss-cpu
  Obtaining dependency information for faiss-cpu from https://files.pythonhosted.org/packages/2c/2d/d2a4171a9cca9a7c04cd9d6f9441a37f1e0558724b90bf7fc7db08553601/faiss_cpu-1.10.0-cp311-cp311-win_amd64.whl.metadata
  Downloading faiss_cpu-1.10.0-cp311-cp311-win_amd64.whl.metadata (4.5 kB)
Collecting tiktoken
  Obtaining dependency information for tiktoken from https://files.pythonhosted.org/packages/6f/07/c67ad1724b8e14e2b4c8cca

In [7]:

import os
os.environ["OPENAI_API_KEY"] = "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" 

In [9]:
!pip install langchain-community


Collecting langchain-community
  Obtaining dependency information for langchain-community from https://files.pythonhosted.org/packages/bb/72/4046a132a180b569265bc8aa7ecd6f958f6c11085bdf68c7e1bbe52f1907/langchain_community-0.3.21-py3-none-any.whl.metadata
  Downloading langchain_community-0.3.21-py3-none-any.whl.metadata (2.4 kB)
Collecting aiohttp<4.0.0,>=3.8.3 (from langchain-community)
  Obtaining dependency information for aiohttp<4.0.0,>=3.8.3 from https://files.pythonhosted.org/packages/e7/dd/01f6fe028e054ef4f909c9d63e3a2399e77021bb2e1bb51d56ca8b543989/aiohttp-3.11.16-cp311-cp311-win_amd64.whl.metadata
  Downloading aiohttp-3.11.16-cp311-cp311-win_amd64.whl.metadata (8.0 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Obtaining dependency information for dataclasses-json<0.7,>=0.5.7 from https://files.pythonhosted.org/packages/c3/be/d0d44e092656fe7a06b55e6103cbce807cdbdee17884a5367c68c9860853/dataclasses_json-0.6.7-py3-none-any.whl.metadata
  Downloading d

ERROR: Could not install packages due to an OSError: [WinError 5] Access is denied: 'C:\\Users\\HP\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\~umpy.libs\\libopenblas64__v0.3.23-293-gc2f4bdbb-gcc_10_3_0-2bde3a66a51006b2b53eb373ff767a3f.dll'
Consider using the `--user` option or check the permissions.


[notice] A new release of pip is available: 23.2.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [12]:
!pip install faiss-cpu openai numpy



[notice] A new release of pip is available: 23.2.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip




In [17]:
pip install openai==0.28.0

Collecting openai==0.28.0
  Obtaining dependency information for openai==0.28.0 from https://files.pythonhosted.org/packages/ae/59/911d6e5f1d7514d79c527067643376cddcf4cb8d1728e599b3b03ab51c69/openai-0.28.0-py3-none-any.whl.metadata
  Downloading openai-0.28.0-py3-none-any.whl.metadata (13 kB)
Collecting aiohttp (from openai==0.28.0)
  Obtaining dependency information for aiohttp from https://files.pythonhosted.org/packages/e7/dd/01f6fe028e054ef4f909c9d63e3a2399e77021bb2e1bb51d56ca8b543989/aiohttp-3.11.16-cp311-cp311-win_amd64.whl.metadata
  Using cached aiohttp-3.11.16-cp311-cp311-win_amd64.whl.metadata (8.0 kB)
Collecting aiohappyeyeballs>=2.3.0 (from aiohttp->openai==0.28.0)
  Obtaining dependency information for aiohappyeyeballs>=2.3.0 from https://files.pythonhosted.org/packages/0f/15/5bf3b99495fb160b63f95972b81750f18f7f4e02ad051373b669d17d44f2/aiohappyeyeballs-2.6.1-py3-none-any.whl.metadata
  Using cached aiohappyeyeballs-2.6.1-py3-none-any.whl.metadata (5.9 kB)
Collecting aiosig


[notice] A new release of pip is available: 23.2.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [20]:
pip install sentence-transformers faiss-cpu


Collecting sentence-transformers
  Obtaining dependency information for sentence-transformers from https://files.pythonhosted.org/packages/45/2d/1151b371f28caae565ad384fdc38198f1165571870217aedda230b9d7497/sentence_transformers-4.1.0-py3-none-any.whl.metadata
  Downloading sentence_transformers-4.1.0-py3-none-any.whl.metadata (13 kB)
Collecting transformers<5.0.0,>=4.41.0 (from sentence-transformers)
  Obtaining dependency information for transformers<5.0.0,>=4.41.0 from https://files.pythonhosted.org/packages/a9/b6/5257d04ae327b44db31f15cce39e6020cc986333c715660b1315a9724d82/transformers-4.51.3-py3-none-any.whl.metadata
  Downloading transformers-4.51.3-py3-none-any.whl.metadata (38 kB)
Collecting torch>=1.11.0 (from sentence-transformers)
  Obtaining dependency information for torch>=1.11.0 from https://files.pythonhosted.org/packages/11/c5/2370d96b31eb1841c3a0883a492c15278a6718ccad61bb6a649c80d1d9eb/torch-2.6.0-cp311-cp311-win_amd64.whl.metadata
  Downloading torch-2.6.0-cp311-cp311


[notice] A new release of pip is available: 23.2.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [21]:
# Define domain-specific knowledge (you can add more)
knowledge = [
    "Sunny weather boosts demand for cold drinks and snacks.",
    "Rainy days lead to fewer walk-in customers but more online orders.",
    "Snowy weather lowers demand for outdoor goods.",
    "Positive social sentiment increases overall demand.",
    "Negative sentiment can suppress sales even during holidays.",
    "Festivals and holidays can increase demand by up to 30%.",
    "Overstocking leads to higher storage costs and waste.",
    "Understocking can result in lost sales and poor customer satisfaction."
]


In [22]:
from sentence_transformers import SentenceTransformer
import numpy as np

# Initialize the model (use a pre-trained model like 'paraphrase-MiniLM-L6-v2')
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Generate embeddings for each knowledge base entry
embeddings = model.encode(knowledge)

# Convert embeddings to numpy array (this can be useful for further processing)
embedding_matrix = np.array(embeddings).astype("float32")

# Print out the embeddings (optional for verification)
print(embedding_matrix)


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/3.51k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/314 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

[[-0.2872388   0.1109681   0.43667957 ... -0.22614448 -0.76329666
   0.6413564 ]
 [ 0.45359594 -0.28804916  0.13332823 ... -0.22632363 -0.29720053
   0.5952655 ]
 [-0.23923644  0.5392298   0.60066974 ... -0.93435496 -1.0243886
   0.5243329 ]
 ...
 [ 0.5339718  -0.46942976 -0.08922337 ... -0.11905345  0.43722382
   0.05457694]
 [-0.00634818  0.03923307  0.00802894 ... -0.72019315  0.23659705
   0.31507254]
 [-0.10105354 -0.00832186 -0.00761243 ... -0.69189584 -0.07913829
   0.12440749]]


In [23]:
import faiss

# Create a FAISS index
index = faiss.IndexFlatL2(embedding_matrix.shape[1])  # L2 distance-based index

# Add the embeddings to the FAISS index
index.add(embedding_matrix)

# Example query (can be any new input that you'd want to match with the knowledge base)
query = ["What impacts demand during rainy days?"]

# Generate embedding for the query
query_embedding = model.encode(query)
query_embedding = np.array(query_embedding).astype("float32")

# Search for the nearest neighbor
distances, indices = index.search(query_embedding, k=1)  # Find the closest 1 match
print(f"Most similar knowledge: {knowledge[indices[0][0]]}")


Most similar knowledge: Rainy days lead to fewer walk-in customers but more online orders.


In [24]:
pip install transformers


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.2.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [25]:
from transformers import pipeline

# Load a pre-trained model (e.g., GPT-2 or GPT-3, depending on your use case)
generator = pipeline("text-generation", model="gpt2")

# RAG function: augment a query with relevant knowledge
def rag_response(query, knowledge_base, embeddings, index):
    # Encode the query and find the most similar knowledge
    query_embedding = model.encode([query]).astype("float32")
    distances, indices = index.search(query_embedding, k=1)  # Find top match

    # Fetch the most relevant knowledge
    relevant_knowledge = knowledge_base[indices[0][0]]

    # Combine query and relevant knowledge to create context for the model
    augmented_input = f"Query: {query}\nRelevant Knowledge: {relevant_knowledge}"

    # Use a pre-trained language model to generate a response
    response = generator(augmented_input, max_length=100)
    return response[0]['generated_text']

# Test the RAG function with a query
query = "How do holidays affect demand?"
response = rag_response(query, knowledge, embedding_matrix, index)
print(response)


config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cpu
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Query: How do holidays affect demand?
Relevant Knowledge: Festivals and holidays can increase demand by up to 30%. However, it does not have to be. For example, Christmas (a day that makes people celebrate, particularly holidays) is a good event to increase demand by up to 30%.
The more you believe you can, the more important is the holiday. For example, in the United States, most people know that Christmas Day is only used for a certain group of holidays.


In [26]:
# Apply the inventory decision logic along with RAG
def enhanced_inventory_decision(predicted_demand, inventory_level, query):
    # Get RAG-enhanced response for the query
    rag_answer = rag_response(query, knowledge, embedding_matrix, index)
    
    # Now, combine this information with inventory decisions
    if inventory_level < predicted_demand:
        return f"🔁 Reorder needed\nAdditional Info: {rag_answer}"
    elif inventory_level > 2 * predicted_demand:
        return f"⚠️ Overstock - consider reducing future orders\nAdditional Info: {rag_answer}"
    else:
        return f"✅ Inventory level is optimal\nAdditional Info: {rag_answer}"

# Apply to test set with RAG
results['inventory_action'] = results.apply(
    lambda row: enhanced_inventory_decision(
        row['predicted_sales_t5'], 
        row['inventory_level'], 
        "How can we optimize inventory for this product?"
    ), axis=1
)

# Show sample output with RAG explanations
print(results[['inventory_level', 'predicted_sales_t5', 'inventory_action']].head(10))


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

     inventory_level  predicted_sales_t5  \
203              123               67.66   
266               79               51.16   
152              117               71.21   
9                137               58.84   
233               84               44.58   
226              131               44.45   
196               85               44.92   
109              122               40.92   
5                149               44.02   
175               40               26.75   

                                      inventory_action  
203  ✅ Inventory level is optimal\nAdditional Info:...  
266  ✅ Inventory level is optimal\nAdditional Info:...  
152  ✅ Inventory level is optimal\nAdditional Info:...  
9    ⚠️ Overstock - consider reducing future orders...  
233  ✅ Inventory level is optimal\nAdditional Info:...  
226  ⚠️ Overstock - consider reducing future orders...  
196  ✅ Inventory level is optimal\nAdditional Info:...  
109  ⚠️ Overstock - consider reducing future orders...  
5 