In [1]:
# Install required libraries
!pip install -U langchain langchain-experimental langchain-community langchain-huggingface pandas torch accelerate transformers huggingface_hub


Collecting langchain
  Downloading langchain-0.3.18-py3-none-any.whl.metadata (7.8 kB)
Collecting langchain-experimental
  Downloading langchain_experimental-0.3.4-py3-none-any.whl.metadata (1.7 kB)
Collecting langchain-community
  Downloading langchain_community-0.3.17-py3-none-any.whl.metadata (2.4 kB)
Collecting langchain-huggingface
  Downloading langchain_huggingface-0.1.2-py3-none-any.whl.metadata (1.3 kB)
Collecting torch
  Downloading torch-2.6.0-cp310-cp310-manylinux1_x86_64.whl.metadata (28 kB)
Collecting accelerate
  Downloading accelerate-1.3.0-py3-none-any.whl.metadata (19 kB)
Collecting transformers
  Downloading transformers-4.48.3-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
Collecting langchain-core<1.0.0,>=0.3.34 (from langchain)
  Downloading langchain_core-0.3.34-py3-none-any.whl.metadata (5.9 kB)
Collecting langchain-text-splitters<1.0.0,>=0.3.6 (from langch

In [2]:

# Import Libraries
import os
import pandas as pd
import torch
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnableLambda
from langchain.chains import LLMChain
from huggingface_hub import InferenceClient  # Correct way to use HF API

# Set Hugging Face API Token
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"

# Check GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print("✅ Using device:", device)




✅ Using device: cuda


In [11]:
# Load LLaMA using Hugging Face's Inference API
hf_client = InferenceClient("https://api-inference.huggingface.co/models/tiiuae/falcon-7b-instruct", token=os.environ["HUGGINGFACEHUB_API_TOKEN"])
# https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.1
# https://api-inference.huggingface.co/models/tiiuae/falcon-7b-instruct
# https://api-inference.huggingface.co/models/google/gemma-7b-it

# Function to send a request to Hugging Face
def llama_generate(prompt):
    """Convert input to a string and send request to Hugging Face API"""
    if not isinstance(prompt, str):
        prompt = str(prompt)  # Ensure the prompt is a string
    response = hf_client.text_generation(prompt, max_new_tokens=256)
    return response

print("✅ LLaMA Model Connected to Hugging Face API")

# Create Sample CSV Data
data = {
    "Customer": ["Alice", "Bob", "Charlie", "David", "Eve"],
    "Product": ["Laptop", "Phone", "Tablet", "Monitor", "Keyboard"],
    "Quantity": [1, 2, 1, 3, 2],
    "Price": [1200, 800, 500, 300, 100],
    "Total": [1200, 1600, 500, 900, 200]
}

# Create and Save CSV File
csv_path = "sales_data.csv"
df = pd.DataFrame(data)
df.to_csv(csv_path, index=False)

# Load CSV File
df = pd.read_csv(csv_path)
print("✅ CSV File Loaded\n", df.head())

# Create a prompt template for querying the CSV data
prompt_template = PromptTemplate(
    input_variables=["query", "data"],
    template="You are an AI assistant analyzing sales data. Answer the query: {query}\nData:\n{data}"
)

# Use LangChain's new method for invoking LLaMA
llm_chain = prompt_template | RunnableLambda(llama_generate)

# Function to run AI queries on CSV data
def ask_csv(query):
    """Processes a natural language query on the CSV file."""
    data_str = df.to_string(index=False)  # Convert DataFrame to string
    response = llm_chain.invoke({"query": query, "data": data_str})  # Use invoke() instead of run()
    return response



✅ LLaMA Model Connected to Hugging Face API
✅ CSV File Loaded
   Customer   Product  Quantity  Price  Total
0    Alice    Laptop         1   1200   1200
1      Bob     Phone         2    800   1600
2  Charlie    Tablet         1    500    500
3    David   Monitor         3    300    900
4      Eve  Keyboard         2    100    200


In [12]:
# Run AI queries
query_1 = "What is the total revenue in the dataset?"
query_2 = "Which product generated the highest sales?"
query_3 = "Who is the best customer based on spending?"

print("📝 Query 1 Response:", ask_csv(query_1))
print("📝 Query 2 Response:", ask_csv(query_2))
print("📝 Query 3 Response:", ask_csv(query_3))

📝 Query 1 Response: 
The total revenue in the dataset is $1,600.
📝 Query 2 Response: 
Alice
📝 Query 3 Response: 
Alice


In [13]:
!wget https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q4_K_M.gguf


--2025-02-10 00:04:46--  https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q4_K_M.gguf
Resolving huggingface.co (huggingface.co)... 18.172.134.4, 18.172.134.24, 18.172.134.88, ...
Connecting to huggingface.co (huggingface.co)|18.172.134.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cdn-lfs.hf.co/repos/90/07/90076ae9a201487aedadb49bde2070797e223829cae7492b17e60c2fd791b379/4567208c2221da5a9f2ded6cc26ce58dd47d0410902c3f57a4a3ed104ce51b0b?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27llama-2-7b.Q4_K_M.gguf%3B+filename%3D%22llama-2-7b.Q4_K_M.gguf%22%3B&Expires=1739149486&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTczOTE0OTQ4Nn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5oZi5jby9yZXBvcy85MC8wNy85MDA3NmFlOWEyMDE0ODdhZWRhZGI0OWJkZTIwNzA3OTdlMjIzODI5Y2FlNzQ5MmIxN2U2MGMyZmQ3OTFiMzc5LzQ1NjcyMDhjMjIyMWRhNWE5ZjJkZWQ2Y2MyNmNlNThkZDQ3ZDA0MTA5MDJjM2Y1N2E0YTNlZDEwNGNlNTFiMGI%7EcmVzc

In [14]:
!pip install llama-cpp-python


Collecting llama-cpp-python
  Downloading llama_cpp_python-0.3.7.tar.gz (66.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.7/66.7 MB[0m [31m26.0 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting diskcache>=5.6.1 (from llama-cpp-python)
  Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)
Downloading diskcache-5.6.3-py3-none-any.whl (45 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: llama-cpp-python
  Building wheel for llama-cpp-python (pyproject.toml) ... [?25l[?25hdone
  Created wheel for llama-cpp-python: filename=llama_cpp_python-0.3.7-cp310-cp310-linux_x86_64.whl size=4601126 sha256=8

In [15]:
from llama_cpp import Llama

# Load the quantized model (set GPU layers for acceleration)
llm = Llama(model_path="llama-2-7b.Q4_K_M.gguf", n_gpu_layers=40)

# Test the model
response = llm("What is the capital of France?")
print(response["choices"][0]["text"])


llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from llama-2-7b.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 llama.attention.head_count u32              = 32
ll

 Paris
 What is the population of France? 65,695


In [16]:
import os
import pandas as pd
import torch
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnableLambda
from langchain.chains import LLMChain
from llama_cpp import Llama  # Use llama.cpp for local execution

# Load LLaMA Locally (Ensure you have GPU enabled)
llm = Llama(model_path="llama-2-7b.Q4_K_M.gguf", n_gpu_layers=40)

# Function to generate text using local LLaMA
def llama_generate(prompt):
    """Send prompt to locally running LLaMA"""
    response = llm(prompt, max_tokens=256)
    return response["choices"][0]["text"]

print("✅ LLaMA Model Loaded Locally")

# Create Sample CSV Data
data = {
    "Customer": ["Alice", "Bob", "Charlie", "David", "Eve"],
    "Product": ["Laptop", "Phone", "Tablet", "Monitor", "Keyboard"],
    "Quantity": [1, 2, 1, 3, 2],
    "Price": [1200, 800, 500, 300, 100],
    "Total": [1200, 1600, 500, 900, 200]
}

# Create and Save CSV File
csv_path = "sales_data.csv"
df = pd.DataFrame(data)
df.to_csv(csv_path, index=False)

# Load CSV File
df = pd.read_csv(csv_path)
print("✅ CSV File Loaded\n", df.head())

# Create a prompt template for querying the CSV data
prompt_template = PromptTemplate(
    input_variables=["query", "data"],
    template="You are an AI assistant analyzing sales data. Answer the query: {query}\nData:\n{data}"
)

# Use LangChain with Local LLaMA
llm_chain = prompt_template | RunnableLambda(llama_generate)

# Function to run AI queries on CSV data
def ask_csv(query):
    """Processes a natural language query on the CSV file."""
    data_str = df.to_string(index=False)  # Convert DataFrame to string
    response = llm_chain.invoke({"query": query, "data": data_str})  # Use invoke() instead of run()
    return response

# Run AI queries
query_1 = "What is the total revenue in the dataset?"
query_2 = "Which product generated the highest sales?"
query_3 = "Who is the best customer based on spending?"

print("📝 Query 1 Response:", ask_csv(query_1))
print("📝 Query 2 Response:", ask_csv(query_2))
print("📝 Query 3 Response:", ask_csv(query_3))


llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from llama-2-7b.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 llama.attention.head_count u32              = 32
ll

✅ LLaMA Model Loaded Locally
✅ CSV File Loaded
   Customer   Product  Quantity  Price  Total
0    Alice    Laptop         1   1200   1200
1      Bob     Phone         2    800   1600
2  Charlie    Tablet         1    500    500
3    David   Monitor         3    300    900
4      Eve  Keyboard         2    100    200


CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | AVX512 = 1 | LLAMAFILE = 1 | OPENMP = 1 | AARCH64_REPACK = 1 | 
Model metadata: {'tokenizer.ggml.unknown_token_id': '0', 'tokenizer.ggml.eos_token_id': '2', 'general.architecture': 'llama', 'llama.context_length': '4096', 'general.name': 'LLaMA v2', 'llama.embedding_length': '4096', 'llama.feed_forward_length': '11008', 'llama.attention.layer_norm_rms_epsilon': '0.000010', 'llama.rope.dimension_count': '128', 'llama.attention.head_count': '32', 'tokenizer.ggml.bos_token_id': '1', 'llama.block_count': '32', 'llama.attention.head_count_kv': '32', 'general.quantization_version': '2', 'tokenizer.ggml.model': 'llama', 'general.file_type': '15'}
Using fallback chat format: llama-2


TypeError: object of type 'StringPromptValue' has no len()

In [18]:
import os
import pandas as pd
import torch
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnableLambda
from langchain.chains import LLMChain
from llama_cpp import Llama  # Use llama.cpp for local execution

# Load LLaMA Locally (Ensure you have GPU enabled)
# Load LLaMA with GPU acceleration
llm = Llama(
    model_path="llama-2-7b.Q4_K_M.gguf",  # Path to quantized model
    n_gpu_layers=40,  # Allocate more layers to GPU
    n_ctx=2048,  # Increase context window for better responses
    verbose=True  # Print logs to verify GPU usage
)

# Function to generate text using local LLaMA
def llama_generate(input_data):
    """Convert input data to string and send it to the locally running LLaMA."""
    if isinstance(input_data, dict):  
        # Extract the query text
        prompt = input_data.get("query", "") + "\n" + input_data.get("data", "")
    else:
        prompt = str(input_data)  # Ensure it's a string
    
    # Send prompt to LLaMA
    response = llm(prompt, max_tokens=256)
    
    return response["choices"][0]["text"]

print("✅ LLaMA Model Loaded Locally")

# Create Sample CSV Data
data = {
    "Customer": ["Alice", "Bob", "Charlie", "David", "Eve"],
    "Product": ["Laptop", "Phone", "Tablet", "Monitor", "Keyboard"],
    "Quantity": [1, 2, 1, 3, 2],
    "Price": [1200, 800, 500, 300, 100],
    "Total": [1200, 1600, 500, 900, 200]
}

# Create and Save CSV File
csv_path = "sales_data.csv"
df = pd.DataFrame(data)
df.to_csv(csv_path, index=False)

# Load CSV File
df = pd.read_csv(csv_path)
print("✅ CSV File Loaded\n", df.head())

# Create a prompt template for querying the CSV data
prompt_template = PromptTemplate(
    input_variables=["query", "data"],
    template="You are an AI assistant analyzing sales data. Answer the query: {query}\nData:\n{data}"
)

# Use LangChain with Local LLaMA
llm_chain = prompt_template | RunnableLambda(llama_generate)

# Function to run AI queries on CSV data
def ask_csv(query):
    """Processes a natural language query on the CSV file."""
    data_str = df.to_string(index=False)  # Convert DataFrame to string
    response = llm_chain.invoke({"query": query, "data": data_str})  # Use invoke() instead of run()
    return response

# Run AI queries
query_1 = "What is the total revenue in the dataset?"
query_2 = "Which product generated the highest sales?"
query_3 = "Who is the best customer based on spending?"

print("📝 Query 1 Response:", ask_csv(query_1))
print("📝 Query 2 Response:", ask_csv(query_2))
print("📝 Query 3 Response:", ask_csv(query_3))


llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from llama-2-7b.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 llama.attention.head_count u32              = 32
ll

✅ LLaMA Model Loaded Locally
✅ CSV File Loaded
   Customer   Product  Quantity  Price  Total
0    Alice    Laptop         1   1200   1200
1      Bob     Phone         2    800   1600
2  Charlie    Tablet         1    500    500
3    David   Monitor         3    300    900
4      Eve  Keyboard         2    100    200


llama_perf_context_print:        load time =   21772.62 ms
llama_perf_context_print: prompt eval time =   21772.31 ms /   130 tokens (  167.48 ms per token,     5.97 tokens per second)
llama_perf_context_print:        eval time =   78244.73 ms /   255 runs   (  306.84 ms per token,     3.26 tokens per second)
llama_perf_context_print:       total time =  100184.17 ms /   385 tokens
Llama.generate: 18 prefix-match hit, remaining 109 prompt tokens to eval


📝 Query 1 Response: 
data = pd.read_csv(text)

# data.shape
print(data.shape)

# data.head()
print(data.head())

# data.describe()
print(data.describe())

# data.info()
print(data.info())

# data.dtypes
print(data.dtypes)

# data.head()
print(data.head())

# data.describe()
print(data.describe())

# data.info()
print(data.info())

# data.dtypes
print(data.dtypes)

# data.head()
print(data.head())

# data.describe()
print(data.describe())

# data.info()
print(data.info())

# data.dtypes
print(data.dtypes)

# data.head()
print(data.head())

# data.describe()
print(data.describe())

# data.info()
print(data.info())

# data.dtypes
print(data


llama_perf_context_print:        load time =   21772.62 ms
llama_perf_context_print: prompt eval time =   18306.46 ms /   109 tokens (  167.95 ms per token,     5.95 tokens per second)
llama_perf_context_print:        eval time =   77641.89 ms /   255 runs   (  304.48 ms per token,     3.28 tokens per second)
llama_perf_context_print:       total time =   96116.54 ms /   364 tokens
Llama.generate: 18 prefix-match hit, remaining 112 prompt tokens to eval


📝 Query 2 Response: 
# The first line is the header (you must add it)
# The next line is the data
# The next lines are the questions (you must add them)

from sklearn.decomposition import PCA
from sklearn.svm import LinearSVC

def get_pca(data):
    pca = PCA(n_components=2)
    data_pca = pca.fit_transform(data.reshape(1, -1))
    return data_pca

def get_linear_svc(data_pca):
    clf = LinearSVC()
    return clf.fit(data_pca, labels=None)

data = pd.read_csv("data.csv", header=None, skip_blank_lines=True)
data = pd.DataFrame(data)
data = data[0].reshape(1, -1)
data = get_pca(data)
clf = get_linear_svc(data)
print(clf.coef_)
print(clf.coef_.argmax(1)[0])
\end


llama_perf_context_print:        load time =   21772.62 ms
llama_perf_context_print: prompt eval time =   18749.01 ms /   112 tokens (  167.40 ms per token,     5.97 tokens per second)
llama_perf_context_print:        eval time =   30282.74 ms /    99 runs   (  305.89 ms per token,     3.27 tokens per second)
llama_perf_context_print:       total time =   49080.69 ms /   211 tokens


📝 Query 3 Response: 

import re

pattern = re.compile(r'\d+')


def is_number(x):
    return pattern.match(x) is not None


def count_numbers(text):
    numbers = []
    for x in text.split():
        if is_number(x):
            numbers.append(x)
    return len(numbers)


print(count_numbers(text))



In [None]:
print('hi')