In [138]:
import random
import pandas as pd 

from langchain_community.vectorstores import Qdrant
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_ollama import ChatOllama
from langchain.chat_models import init_chat_model
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain import hub
from typing_extensions import List, TypedDict, Optional
from langgraph.graph import START, StateGraph
from langchain_core.documents import Document


In [139]:
with open('fireworksai_api_key.txt', 'r') as file:
    API_KEY = file.read().strip()

In [140]:
FEET_TO_CM = 30.48

In [141]:
data = pd.read_csv('data/texas_plant_list.csv')

In [142]:
data['Price'] = None
data['Delivery'] = None
data['Labels'] = None

for i in range(len(data)):
    data.at[i, 'Price'] = round(random.uniform(1, 100)) + random.choice([0, 0.50, 0.99])
    data.at[i, 'Delivery'] = random.choice(['Delivered within 2 weeks', 'Delivered within 1 week', 'Buy today, delivered tomorrow'])
    data.at[i, 'Labels'] = random.choice(['ONLY ONLINE', 'BULK DISCOUNT', 'PROMOTION', 'NEW', 'SOLD OUT'])

In [143]:
data['Growth Form'] = data['Growth Form'].apply(lambda x: ', '.join(eval(x)) if isinstance(x, str) else x)
data['Native Habitat'] = data['Native Habitat'].apply(lambda x: ', '.join(eval(x)) if isinstance(x, str) else x)

data['Min Height'] = data['Min Height'] * FEET_TO_CM
data['Max Height'] = data['Max Height'] * FEET_TO_CM
data['Min Spread'] = data['Min Spread'] * FEET_TO_CM
data['Max Spread'] = data['Max Spread'] * FEET_TO_CM

data = data.drop_duplicates(subset='Scientific Name', keep='first')
data.reset_index(drop=True, inplace=True)

data = data.fillna('Unknown')

  data = data.fillna('Unknown')


In [None]:
# data.to_csv('data/texas_plant_list_cleaned.csv', index=False)

In [87]:
docs = []

count = 1

for i in range(len(data)):
  t = (
      f"ID: {count} | "
      f"Latin Name: {data.at[i, 'Scientific Name']} | "
      f"Common Name: {data.at[i, 'Common Name']} | "

      # f"Ecoregion: {data.at[i, 'Ecoregion III']} | "
      f"Native Habitat: {data.at[i, 'Native Habitat']} | "

      f"Growth Form: {data.at[i, 'Growth Form']} | "

      f"Bloom Season: {data.at[i, 'Bloom Season']} | "
      f"Bloom Color: {data.at[i, 'Bloom Color']} | "

      # f"Leaf Retention: {data.at[i, 'Leaf Retention']} | "
      f"Lifespan: {data.at[i, 'Lifespan']} | "

      f"Wildlife Benefit: {data.at[i, 'Wildlife Benefit']} | "

      f"Soil: {data.at[i, 'Soil']} | "
      f"Light: {data.at[i, 'Light']} | "
      f"Water: {data.at[i, 'Water']} | "
      
      f"Min Height (cm): {data.at[i, 'Min Height']} | "
      f"Max Height (cm): {data.at[i, 'Max Height']} | "
      f"Min Spread (cm): {data.at[i, 'Min Spread']} | "
      f"Max Spread (cm): {data.at[i, 'Max Spread']} | "

      f"Maintenence: {data.at[i, 'Maintenence']} | "
      f"Comments: {data.at[i, 'Comments']}"

      # f"Price (euro): {data.at[i, 'Price']} | "
      # f"Delivery: {data.at[i, 'Delivery']} | "
      # f"Labels: {data.at[i, 'Labels']}"
  )
  
  docs.append(t)

  count += 1

print(docs[5])
print(docs[5].__len__())

ID: 6 | Latin Name: Acer rubrum var. drummondii | Common Name: Drummond Red Maple | Native Habitat: Woodland, Wetland or Riparian | Growth Form: Tree | Bloom Season: Spring | Bloom Color: Red | Lifespan: Perennial | Wildlife Benefit: Beetles, Butterflies, Birds, Small Mammals, Deer, Moths, Bees | Soil: Sand, Loam, Clay, Calcareous, Poor Drainage, Moist | Light: Sun, Part Shade | Water: Medium, High | Min Height (cm): 1524.0 | Max Height (cm): 2743.2 | Min Spread (cm): 914.4 | Max Spread (cm): 1828.8 | Maintenence: Fast growth rate. Tolerates a wide range of soil conditions. A showy shade tree with attractive fall foliage. The fallen leaves decompose over time, releasing essential nutrients back into the soil. This process enriches the surrounding environment, supporting the growth of other plant species and wildlife. Prefers moist, swampy sites and is not as cold hardy as other maples. Propagation: seed. | Comments: Blooms February-April. Forms a dense crown with sturdy branches. Leave

In [None]:
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2"
)

# qdrant = Qdrant.from_texts(
#     docs,
#     embeddings,
#     path="vector_stores/plantkiezer1", 
#     collection_name="planten",
# )


______

In [90]:
vectorstore = Qdrant.from_existing_collection(
    collection_name="planten",
    embedding=embeddings,
    path="vector_stores/plantkiezer1",
)

In [120]:
query = "Which plants are nice in a humid environment?"

# Query Expansion

In [121]:
instruction = "You enrich a user query for dense vector search. Return ONE line: the original query first, then up to 5 short synonym/keyword variants separated by ' | '. Preserve intent; prefer domain-specific terms likely found in the corpus. Each variant 2-6 words. No quotes, no explanations, no boolean operators, nothing else."

In [None]:
llm = ChatOllama(
    model="gemma3:4b", 
    keep_alive="30m",
    num_ctx=2048,
    num_predict=256,
    temperature=0.5
)

In [103]:
messages = [
    (
        "system",
        instruction,
    ),
    (
        "human", 
        query),
]

In [131]:
query_updated = llm.invoke(messages).content

# Retrieve and Chat Test

In [None]:

unique_documents = list(doc.page_content for doc in vectorstore.max_marginal_relevance_search(query_updated, k=5, filter=None))

In [127]:
llm = init_chat_model(
    "accounts/fireworks/models/deepseek-v3",
    # "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507", 
    # "accounts/fireworks/models/gpt-oss-20b",
    # "accounts/fireworks/models/gpt-oss-120b",
    model_provider="fireworks", 
    api_key=API_KEY
)

# MODEL = "gpt-oss-20b"
# MODEL = "gpt-oss-120b"
# MODEL = "qwen3-30b"
MODEL = "deepseek-v3"


In [None]:
prompt = hub.pull("rlm/rag-prompt")

class State(TypedDict):
    question: str
    context: List[Document]
    answer: str
    
    # generation & retrieval controls
    max_tokens: Optional[int]
    top_p: Optional[float]
    top_k: Optional[int]
    presence_penalty: Optional[float]
    frequency_penalty: Optional[float]
    temperature: Optional[float]

def retrieve(state: State):
    retrieved_docs = vectorstore.max_marginal_relevance_search(query_updated, k=5, filter=None)
    
    return {"context": retrieved_docs}

def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)

    return {"answer": response.content}

graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()



In [129]:
instruction = "You are an expert botanical assistant. You will be provided with five retrieved plant entries. Choose three of them that you think answers the user query the best and recommend it. Use the descriptions of the retrieved data to also provide more information about the plants."

In [134]:
response = graph.invoke({
        "question": query,
        "max_tokens": 1024,
        "top_p": 1,
        "top_k": 40,
        "presence_penalty": 0,
        "frequency_penalty": 0,
        "temperature": 0.6,
    })

In [135]:
response['answer']

'The **Louisiana Spiderlily (Hymenocallis liriosme)** is well-suited for humid environments, as it thrives in moist to wet soils and part shade. Its perennial nature and fragrant white blooms make it an excellent choice for humid, wetland areas.'

In [137]:
data

Unnamed: 0,Scientific Name,Common Name,Other Common Names,Growth Form,Ecoregion III,Ecoregion IV,Min Height,Max Height,Min Spread,Max Spread,...,Bloom Season,Bloom Color,Seasonal Interest,Wildlife Benefit,Maintenence,Comments,References,Price,Delivery,Labels
0,Abronia ameliae,Heart's Delight,Amelia's Sand-verbena,Herbaceous,"Gulf Coast Prairies and Marshes, Southern Texa...",", , , , , Coastal Sand Plain, , Rio Grande Flo...",30.48,45.72,15.24,30.48,...,Spring,"Pink, Purple","Nectar, Pollen","Butterflies, Nectar Insects, Moths",A short-lived perennial. Abronias do not trans...,Blooms January-June. Stems are coarse and hair...,"1) Griffith, Bryce, Omernick & Rodgers (2007)....",64.50,"Buy today, delivered tomorrow",PROMOTION
1,Acacia angustissima,Prairie Acacia,"White Ball Acacia, Fern Acacia, Whiteball Acac...","Herbaceous, Shrub","Central Great Plains, Chihuahuan Deserts, Cros...","Broken Red Plains,Limestone Plains,Red Prairie...",30.48,91.44,45.72,60.96,...,"Summer, Fall",White,"Seeds, Nectar, Larval Host","Butterflies, Birds, Small Mammals, Nectar Inse...","Drought Tolerant. Colonizes by rhizomes, and i...","Blooms June-September. A mounding, thornless a...","1) Griffith, Bryce, Omernick & Rodgers (2007)....",62.00,Delivered within 2 weeks,SOLD OUT
2,Acer floridanum,Southern Sugar Maple,"Caddo Maple, Florida Maple, Rock Maple",Tree,Western Gulf Coastal Plain,", , , , , , , , , , Flatwoods,Floodplains and ...",609.6,3048.0,365.76,1828.8,...,Spring,"Yellow, Green","Fall Color, Seeds","Birds, Small Mammals","Moderate to fast growth rate. This small, spre...",A smaller southern version of the Sugar Maple....,"1) Griffith, Bryce, Omernick & Rodgers (2007)....",82.99,"Buy today, delivered tomorrow",SOLD OUT
3,Acer grandidentatum,Bigtooth Maple,"Big-toothed Maple, Uvalde Bigtooth Maple, Sout...",Tree,"Chihuahuan Deserts, Cross Timbers, Edwards Pla...",", Chihuahuan Desert Grasslands,Chihuahuan Mont...",457.2,1219.2,304.8,457.2,...,Spring,"Yellow, Green","Fall Color, Seeds, Forage, Nectar, Pollen, Flo...","Browsers, Butterflies, Birds, Small Mammals","Slow growth rate, especially in the first few ...",Blooms March-April. The western relative of Su...,"1) Griffith, Bryce, Omernick & Rodgers (2007)....",14.00,Delivered within 1 week,BULK DISCOUNT
4,Acer negundo,Box Elder,Unknown,Tree,"Cross Timbers, East Central Texas Plains, Edwa...",", , Eastern Cross Timbers,Grand Prairie, Bastr...",1066.8,1828.8,914.4,1524.0,...,Spring,"Yellow, Green, Brown","Fall Color, Seeds, Nectar, Pollen, Larval Host","Birds, Moths, Bees","Low maintenance, adaptable. Fast growth rate. ...",Usually a small to medium-sized tree. It doesn...,"1) Griffith, Bryce, Omernick & Rodgers (2007)....",41.99,Delivered within 1 week,ONLY ONLINE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
690,Zephyranthes drummondii,Prairie Lily,"Hill Country Rain Lily, Rain Lily, Flor De May...",Herbaceous,"East Central Texas Plains, Edwards Plateau, Gu...",", , , Southern Post Oak Savanna, Balcones Cany...",15.24,30.48,3.048,7.62,...,"Spring, Summer","White, Pink","Nectar, Pollen","Butterflies, Bees",Very low maintenance. Tends to have a main spr...,Blooms March-August. Bulbs produce blue-green ...,1) https://www.itis.gov/servlet/SingleRpt/Sing...,96.00,"Buy today, delivered tomorrow",PROMOTION
691,Zinnia acerosa,Desert Zinnia,"Dwarf Zinnia, Dwarf White Zinnia, Shrubby Zinn...","Groundcover, Herbaceous",Chihuahuan Deserts,", Chihuahuan Basins and Playas,Chihuahuan Dese...",15.24,30.48,30.48,45.72,...,Summer,"White, Yellow","Nectar, Pollen","Butterflies, Bees",Low maintenance. Makes a good groundcover. Wor...,"Blooms June-August. A low, mounding plant with...",1) http://bonap.net/TDC/Image/Map?taxonType=Sp...,89.50,Delivered within 2 weeks,SOLD OUT
692,Zinnia grandiflora,Plains Zinnia,"Rocky Mountain Zinnia, Prairie Zinnia, Little ...",Shrub,"Chihuahuan Deserts, Edwards Plateau, High Plai...",", Chihuahuan Basins and Playas,Chihuahuan Dese...",15.24,22.86,15.24,30.48,...,"Spring, Summer, Fall",Yellow,"Nectar, Pollen","Butterflies, Moths, Bees","Prefers dry, gravelly rock gardens, and well-d...",Blooms April-November. A mound-shaped plant wi...,"1) Griffith, Bryce, Omernick & Rodgers (2007)...",31.50,Delivered within 2 weeks,BULK DISCOUNT
693,Zizia aurea,Golden Alexanders,Golden Zizia,Herbaceous,"East Central Texas Plains, Western Gulf Coasta...",", , , Northern Post Oak Savanna,Northern Prair...",30.48,91.44,30.48,60.96,...,"Spring, Summer",Yellow,"Nectar, Pollen, Larval Host","Caterpillars, Butterflies, Nectar Insects, Mot...",A short-lived perennial that also spreads easi...,"Blooms April-August. Erect, branching, form wi...",1) https://portal.torcherbaria.org/portal/taxa...,74.50,Delivered within 1 week,SOLD OUT
