# Vision to Manufacture: A Fully Local Manufacturing Deep Search Engine

Vinay Lanka | Apoorv Thapliyal 

## Imports

In [1]:
import torch

# Langchain setup
from langchain_core.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain_ollama import OllamaLLM
import json, re, textwrap, time
from typing import Dict, List, Tuple

#DDG
from duckduckgo_search import DDGS

# Fetching HTML
from langchain_community.document_loaders import AsyncHtmlLoader
from langchain_community.document_transformers import Html2TextTransformer
from typing import List
from langchain.schema import BaseRetriever, Document

# Web scraping
import asyncio

# Database
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import FAISS
from langchain.prompts import PromptTemplate

from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma

import os, glob
from langchain.document_loaders import PyPDFLoader
from langchain.schema import Document
from langchain.vectorstores import Chroma, FAISS
import shutil

from langchain_ollama import ChatOllama
from langchain_core.messages import HumanMessage
import base64, pathlib

import os, glob, shutil
from langchain_community.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import pypandoc

USER_AGENT environment variable not set, consider setting it to identify your requests.


## Important Definitons
Add the image path of your choice here

In [2]:
# Image path
img_path = "imgs/demo_pen.jpeg"

# Object name: 
object_name = img_path.split("/")[-1].split(".")[0]

# Model definition
model_name = "gemma3:4b"

##### Static DB relevant chunks #####
static_db_relevant_chunks = 10 # Number of static DB relevant chunks to get from retriever

##### Dynamic DB relevant chunks #####
webscraping_queries = 10 # Number of webscraping queries
number_of_webscraping_results = 5 # Number of webscraping results
dynamic_db_relevant_chunks = 10 # Number of dynamic DB relevant chunks to get from retriever

## Captioning


In [3]:
# Set the device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Print device information
print(f"Using device: {device}")

Using device: cuda


### Using LLM to generate captions

In [4]:
img_b64 = base64.b64encode(pathlib.Path(img_path).read_bytes()).decode()

content = [
    {  # image part
        "type": "image_url",
        "image_url": f"data:image/jpeg;base64,{img_b64}",
    },
    {  # text prompt part
        "type": "text",
        "text": "Identify the primary object in this image and enumerate all observable material characteristics—such as base material, surface finish, color, texture, gloss level, or coating.  Ignore background elements and give the answer in one clear English sentence.",
    },
]

llm = ChatOllama(model=model_name, temperature=0.3)


object_desc = llm.invoke([HumanMessage(content=content)]).content

print(object_desc)

The primary object in the image is a clear plastic pen with a swirling, marbled pattern, a matte surface finish, a glossy barrel, and a light blue color.


## Static DB

### Setup static db 

Chroma DB with Persistence
Embedding LLM - all-MiniLM-L12-v2

Converts documents under `/data` to vector embeddings under `/db`.
Place any documents to be stored under `/data`.
To be run once.

In [5]:
static_dir = "db/sdb"
embedder   = HuggingFaceEmbeddings(model_name="all-MiniLM-L12-v2")

if os.path.exists(static_dir) and os.listdir(static_dir):
    print(f"Loading existing vector store from {static_dir}")
    vectordb_static = Chroma(
        persist_directory=static_dir,
        embedding_function=embedder,
    )

else:
    if os.path.exists(static_dir):
        print(f"Clearing existing directory {static_dir}")
        shutil.rmtree(static_dir) # Remove without ignoring errors this time

    print(f"Building new vector store in {static_dir}")
    os.makedirs(static_dir, exist_ok=True) # Create the directory

    # load PDFs
    pdf_docs = []
    for path in glob.glob("data/*.pdf"):
        loader = PyPDFLoader(path)
        pdf_docs.extend(loader.load())

    # split into chunks
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=1500,
        chunk_overlap=200,
        length_function=len
    )
    static_chunks = splitter.split_documents(pdf_docs)

    vectordb_static = Chroma.from_documents(
        documents=static_chunks,
        embedding=embedder,
        persist_directory=static_dir,
    )
    vectordb_static.persist()

  embedder   = HuggingFaceEmbeddings(model_name="all-MiniLM-L12-v2")
  from .autonotebook import tqdm as notebook_tqdm


Loading existing vector store from db/sdb


  vectordb_static = Chroma(


### Create a retreiver

Pulling $k$ relevant chunks

In [6]:
static_ret  = vectordb_static.as_retriever(
    search_kwargs={"k": static_db_relevant_chunks}
)

## Generate queries

The LLM Model: Ollama gemma3:4b model will be used for query generation as well as final manufacturing document generation

In [7]:
# Set the query prompt
query_prompt = PromptTemplate.from_template(textwrap.dedent("""
You are a manufacturing-research assistant with subject-matter-expertise in product realization.

- Given the object description below, generate *{k}* diverse Search Engine queries that a seasoned manufacturing SME would use to uncover *every aspect* of how this product is made—from raw material selection through process steps, tooling, automation, quality control, sustainability, and cost analysis.
- Return *only* a JSON array of query strings—no extra text.

OBJECT DESCRIPTION
{caption}
"""))


def generate_queries(caption: str, k: int = 6) -> list[str]:
    chain = LLMChain(llm=llm, prompt=query_prompt.partial(k=k))
    out_dict = chain.invoke({"caption": caption})
    raw      = out_dict["text"].strip()      # ← extract the string
    match    = re.search(r"\[.*\]", raw, re.DOTALL)
    if not match:
        raise ValueError(f"Could not parse JSON from model:\n{raw}")
    return [q.strip() for q in json.loads(match.group(0))]


In [8]:
queries = generate_queries(object_desc, k=webscraping_queries)
print("Generated queries:")
for q in queries:
    print(f"  • {q}")

  chain = LLMChain(llm=llm, prompt=query_prompt.partial(k=k))


Generated queries:
  • marbled plastic pen manufacturing processes
  • injection molding of translucent colored plastics
  • surface finish techniques for matte plastic pens
  • color matching and dispersion in plastic pen production
  • tooling design for swirl pattern injection molding
  • automated pen assembly line robotics
  • plastic pen quality control inspection methods
  • sustainable material sourcing for pen production
  • cost analysis of marbled plastic pen manufacturing
  • optical brighteners for translucent plastic pen coloration


## Dynamic DB

### Duck-Duck-Go scraping for top N URLs

In [9]:
def ddg_search(query: str, max_results: int = 5) -> list[str]:
    """
    Returns the top‑N DuckDuckGo result URLs for a query.
    """
    with DDGS() as ddgs:
        return [hit["href"] for hit in ddgs.text(query, max_results=max_results)]

# Example usage:
# urls = [u for q in queries for u in ddg_search(q, max_results=5)]
# urls = list(dict.fromkeys(urls))  # dedupe while preserving order

In [10]:
async def fetch_documents(urls: list[str]):
    """
    Asynchronously downloads each page and converts HTML→plain text.
    Returns a list of LangChain Document objects.
    """
    loader    = AsyncHtmlLoader(urls)
    html_docs = await loader.load()                       # list[Document] with HTML in .page_content
    transformer = Html2TextTransformer()                  # strips tags, yields markdown‑style text :contentReference[oaicite:0]{index=0}
    text_docs = transformer.transform_documents(html_docs)
    return text_docs


In [11]:
all_urls = []
for q in queries:
    urls = ddg_search(q, max_results=number_of_webscraping_results)
    print(f"\nQuery: {q}\n  URLs:")
    for u in urls:
        print("    •", u)
    all_urls.extend(urls)
    time.sleep(1)  # be nice to DDG

unique_urls = list(dict.fromkeys(all_urls))
print(f"\nTotal unique URLs: {len(unique_urls)}")



Query: marbled plastic pen manufacturing processes
  URLs:
    • https://www.madehow.com/Volume-3/Ballpoint-Pen.html
    • https://www.pensonly.com.au/blog/pen-factories-an-inside-look-at-how-different-pens-are-manufactured.htm
    • https://www.pencoamerica.com/the-write-way-to-manufacture/
    • https://www.scribd.com/document/183228481/The-Manufacturing-pdf
    • https://u.osu.edu/bicpens/03-manufacturing/

Query: injection molding of translucent colored plastics
  URLs:
    • https://www.protolabs.com/resources/blog/translucent-and-clear-plastic-injection-molded-parts/
    • https://gems-mfg.com/clear-plastic-injection-molding-techniques-challenges-and-solutions-for-transparent-and-optical-grade-plastics/
    • https://www.ecomolding.com/transparent-plastic-materials/
    • https://hitopindustrial.com/translucent-and-clear-plastic-injection-molded-parts/
    • https://www.protolabs.com/en-gb/resources/blog/translucent-and-clear-plastic-injection-moulded-parts/

Query: surface fini

### Fetch HTML Docs as plain text

In [12]:
N = len(unique_urls)
print(f"\nFetching and parsing first {N} available pages:")

html_docs = []
success_count = 0
failure_count = 0

for idx, url in enumerate(unique_urls[:N], start=1):
    single_loader = AsyncHtmlLoader([url])
    try:
        docs_for_url = await asyncio.wait_for(
            asyncio.to_thread(single_loader.load),
            timeout=10
        )
        html_docs.extend(docs_for_url)
        print(f"[{idx}/{N}] ✓ Fetched {url}")
        success_count += 1

    except asyncio.TimeoutError:
        failure_count += 1
        print(f"[{idx}/{N}] ✗ Timeout {url}")
    except Exception as e:
        failure_count += 1
        print(f"[{idx}/{N}] ✗ Error {url}: {e}")

print(f"\nCompleted: {success_count} succeeded, {failure_count} failed out of {N} URLs.\n")

# Transform and inspect as before
transformer = Html2TextTransformer()
docs = transformer.transform_documents(html_docs)

splitter = RecursiveCharacterTextSplitter(
    chunk_size=1500,
    chunk_overlap=200,
    length_function=len
)
ddb = splitter.split_documents(docs)


Fetching and parsing first 50 available pages:


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.17it/s]


[1/50] ✓ Fetched https://www.madehow.com/Volume-3/Ballpoint-Pen.html


Fetching pages: 100%|##########| 1/1 [00:06<00:00,  6.60s/it]


[2/50] ✓ Fetched https://www.pensonly.com.au/blog/pen-factories-an-inside-look-at-how-different-pens-are-manufactured.htm


Fetching pages: 100%|##########| 1/1 [00:02<00:00,  2.94s/it]


[3/50] ✓ Fetched https://www.pencoamerica.com/the-write-way-to-manufacture/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.18it/s]


[4/50] ✓ Fetched https://www.scribd.com/document/183228481/The-Manufacturing-pdf


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.29it/s]


[5/50] ✓ Fetched https://u.osu.edu/bicpens/03-manufacturing/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  2.42it/s]


[6/50] ✓ Fetched https://www.protolabs.com/resources/blog/translucent-and-clear-plastic-injection-molded-parts/


Fetching pages: 100%|##########| 1/1 [00:03<00:00,  3.05s/it]


[7/50] ✓ Fetched https://gems-mfg.com/clear-plastic-injection-molding-techniques-challenges-and-solutions-for-transparent-and-optical-grade-plastics/


Fetching pages: 100%|##########| 1/1 [00:03<00:00,  3.03s/it]


[8/50] ✓ Fetched https://www.ecomolding.com/transparent-plastic-materials/


Fetching pages: 100%|##########| 1/1 [00:03<00:00,  3.47s/it]


[9/50] ✓ Fetched https://hitopindustrial.com/translucent-and-clear-plastic-injection-molded-parts/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  2.70it/s]


[10/50] ✓ Fetched https://www.protolabs.com/en-gb/resources/blog/translucent-and-clear-plastic-injection-moulded-parts/


Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.21s/it]


[11/50] ✓ Fetched https://polymer-additives.specialchem.com/tech-library/article/matte-finish


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  2.14it/s]


[12/50] ✓ Fetched https://etcnmachining.com/blog/plastic-finishes/


Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.22s/it]


[13/50] ✓ Fetched https://www.uplandfab.com/blog/improve-precision-surface-finish-in-plastic-parts-with-post-machining-processes


Fetching pages: 100%|##########| 1/1 [00:02<00:00,  2.90s/it]


[14/50] ✓ Fetched https://www.richconn-cnc.com/surface-finish-101-an-overview-of-plastic-surface-finish-chart.html


Fetching pages: 100%|##########| 1/1 [00:04<00:00,  4.77s/it]


[15/50] ✓ Fetched https://blog.mppcorp.net/the-types-of-surface-finishes-for-plastic-parts


Fetching pages:   0%|          | 0/1 [00:00<?, ?it/s]Failed to decode content from https://chromacolors.com/wp-content/uploads/2024/05/Chroma-Color-Matching-101.pdf
Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.46it/s]


[16/50] ✓ Fetched https://chromacolors.com/wp-content/uploads/2024/05/Chroma-Color-Matching-101.pdf


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.74it/s]


[17/50] ✓ Fetched https://www.marvalindustries.com/color-compounding-blogs/plastic-resin-suppliers/long-lasting-colors-plastics.html


Fetching pages:   0%|          | 0/1 [00:00<?, ?it/s]Failed to decode content from https://em8p26t79wg.exactdn.com/wp-content/uploads/Foster-Corporation-Intoduction-to-Coloring-Plastics-Aug-2020.pdf
Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.52it/s]


[18/50] ✓ Fetched https://em8p26t79wg.exactdn.com/wp-content/uploads/Foster-Corporation-Intoduction-to-Coloring-Plastics-Aug-2020.pdf


Fetching pages: 100%|##########| 1/1 [00:02<00:00,  2.59s/it]


[19/50] ✓ Fetched https://www.team-mfg.com/blog/plastic-colorants.html


Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.56s/it]


[20/50] ✓ Fetched https://otechcompounds.com/the-science-of-color-matching-flexible-plastic-compounding/


Fetching pages: 100%|##########| 1/1 [00:02<00:00,  2.08s/it]


[21/50] ✓ Fetched https://www.fictiv.com/articles/injection-molding-tooling


Fetching pages: 100%|##########| 1/1 [00:02<00:00,  2.05s/it]


[22/50] ✓ Fetched https://www.acomold.com/injection-mold-tooling-guide.html


Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.00s/it]


[23/50] ✓ Fetched https://hitopindustrial.com/injection-molding-tooling/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  3.53it/s]


[24/50] ✓ Fetched https://www.spectrumplastics.com/about/technical-resources/injection-mold-tooling/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  2.32it/s]


[25/50] ✓ Fetched https://advancedplastiform.com/a-guide-to-injection-molding-tooling/


Fetching pages: 100%|##########| 1/1 [00:01<00:00,  2.00s/it]


[26/50] ✓ Fetched https://www.apmprinter.com/a-news-pen-assembly-line-efficiency-automating-writing-instrument-production


Fetching pages: 100%|##########| 1/1 [00:02<00:00,  2.07s/it]


[27/50] ✓ Fetched https://www.stevanatogroup.com/en/technologies-equipment/assembly-equipment/case-studies/injection-pen-assembly-line/


Fetching pages: 100%|##########| 1/1 [00:03<00:00,  3.17s/it]


[28/50] ✓ Fetched https://parcrobotics.in/understanding-automated-assembly-lines/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.39it/s]


[29/50] ✓ Fetched https://www.wevolver.com/article/what-are-robotic-assembly-lines-history-components-advantages-limitations-applications-and-future


Fetching pages: 100%|##########| 1/1 [00:03<00:00,  3.01s/it]


[30/50] ✓ Fetched https://jewettautomation.com/how-robotics-is-revolutionizing-assembly-lines/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  2.16it/s]


[31/50] ✓ Fetched https://www.phas.io/post/plastic-part-inspection


Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.66s/it]


[32/50] ✓ Fetched https://www.nbnqc.com/comprehensive-guide-to-inspecting-plastic-parts-techniques-standards-and-best-practices/


Fetching pages: 100%|##########| 1/1 [00:05<00:00,  5.61s/it]


[33/50] ✓ Fetched https://www.deskera.com/blog/plastic-manufacturing-quality-assurance-and-inspection/


Fetching pages: 100%|##########| 1/1 [00:04<00:00,  4.26s/it]


[34/50] ✓ Fetched https://www.deskera.com/blog/quality-control-in-plastic-manufacturing/


Fetching pages: 100%|##########| 1/1 [00:02<00:00,  2.43s/it]


[35/50] ✓ Fetched https://www.deskera.com/blog/best-practices-for-plastic-manufacturing-quality-control/


Fetching pages: 100%|##########| 1/1 [00:02<00:00,  2.03s/it]


[36/50] ✓ Fetched https://www.theenvironmentalblog.org/2025/03/bamboo-to-recycled-plastic-materials-green-pens/


Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.69s/it]


[37/50] ✓ Fetched https://environfriend.com/biodegradable-pens/


Fetching pages: 100%|##########| 1/1 [00:02<00:00,  2.75s/it]


[38/50] ✓ Fetched https://www.whygoeco.com/the-eco-friendly-pens-writing-towards-a-greener-future/


Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.20s/it]


[39/50] ✓ Fetched https://www.cubicpromote.com.au/blog/eco-friendly-writing-sustainable-practices-in-pen-production


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.66it/s]


[40/50] ✓ Fetched https://www.consumerenergycenter.org/eco-friendly-pens/


Fetching pages: 100%|##########| 1/1 [00:02<00:00,  2.75s/it]


[41/50] ✓ Fetched https://www.ttxpens.com/pen-manufacturing-cost/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.28it/s]


[42/50] ✓ Fetched https://datacalculus.com/en/blog/plastics-manufacturing/cost-estimator/material-cost-analysis-in-plastics-manufacturing-a-cost-estimators-guide


Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.16s/it]


[43/50] ✓ Fetched https://www.fountainpennetwork.com/forum/topic/359228-some-special-materials-cost-in-pen-making/


Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.22s/it]


[44/50] ✓ Fetched https://hingtungtech.com/abs-vs-pp-strategic-material-selection-for-pen-manufacturing/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.76it/s]


[45/50] ✓ Fetched https://www.goldengatemolders.com/post/breaking-down-the-cost-factors-in-plastic-manufacturing


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  2.90it/s]


[46/50] ✓ Fetched https://www.avient.com/products/polymer-additives/visual-enhancement-effects/cesa-bright-optical-brighteners


Fetching pages: 100%|##########| 1/1 [00:04<00:00,  4.50s/it]


[47/50] ✓ Fetched https://daiaplastic.com/optical-brightener-masterbatch-the-solution-to-increase-the-aesthetics-of-plastic/


Fetching pages: 100%|##########| 1/1 [00:09<00:00,  9.81s/it]


[48/50] ✓ Fetched https://europlas.com.vn/en-US/blog-1/optical-brightener-additive-what-is-it


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  2.06it/s]


[49/50] ✓ Fetched https://welltchemicals.com/blog/the-ultimate-guide-to-optical-brightener-in-2024/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.14it/s]


[50/50] ✓ Fetched https://polymer-additives.specialchem.com/product-categories/additives-optical-brighteners-whitening-agents

Completed: 50 succeeded, 0 failed out of 50 URLs.



### Convert fetched HTML into LangChain Documents

In [13]:
embedder = SentenceTransformerEmbeddings(
    model_name="all-MiniLM-L6-v2",
    model_kwargs={"device": "cpu"}
)

vectordb_dynamic = FAISS.from_documents(
    documents=ddb,    # your list of Document chunks
    embedding=embedder
)

## Create Combined Retrievers

In [14]:
class CombinedRetriever(BaseRetriever):
    retrievers: List[BaseRetriever]
    k: int = 5

    def get_relevant_documents(self, query: str) -> List[Document]:
        docs: List[Document] = []
        for r in self.retrievers:
            docs.extend(r.get_relevant_documents(query))
        return docs[: self.k]


dynamic_ret = vectordb_dynamic.as_retriever(
    search_kwargs={"k": dynamic_db_relevant_chunks}
)

combined_retriever = CombinedRetriever(
    retrievers=[static_ret, dynamic_ret],
    k=12
)

  class CombinedRetriever(BaseRetriever):


## Ask the LLM

Finally, prompt the LLM with the final query

### Final Prompt

In [15]:
def run_section(
    prompt_tpl: PromptTemplate,
    object_desc: str,
    retriever,
    model_name: str = "gemma3:4b",
    temperature: float = 0.3,
    k_ctx: int = 4
) -> str:
    """
    Generates a single report section using a provided PromptTemplate.

    Args:
      - prompt_tpl: a PromptTemplate expecting keys "object_desc" and "context"
      - object_desc: description of the object being manufactured
      - retriever: a LangChain retriever (e.g., your static vector-store retriever)
      - model_name: the Ollama model to use
      - temperature: sampling temperature for the LLM
      - k_ctx: number of context chunks to fetch

    Returns:
      - The generated section text.
    """
    # 1) Fetch top-k relevant text chunks
    docs: List[Document] = retriever.get_relevant_documents(object_desc)[:k_ctx]
    context = "\n\n".join(d.page_content for d in docs)

    # 2) Fill in the prompt and invoke the LLM
    chain = LLMChain(
        llm=OllamaLLM(model=model_name, temperature=temperature),
        prompt=prompt_tpl
    )
    return chain.run({"object_desc": object_desc, "context": context})

generated_content = []

### Prompts

In [16]:
# Introduction

intro_tpl = PromptTemplate.from_template(textwrap.dedent("""
You are a senior manufacturing engineer.

Use the context block (manufacturing knowledge from your static vector store plus any web snippets) as your sole source.  
Shape any web-sourced details **only** to illustrate those frameworks and metrics.  
Do **not** introduce anything outside your static DB and retrieved chunks.

Refer to the object being manufactured when writing this section, generate around ≈500 words of content. 

OBJECT DESCRIPTION  
{object_desc}

Introduction: Manufacturing Overview 

Formatting rules  
• Generate just the content, no need for any introduction or section headings.
• Bulleted points where possible  
• No made-up data or sources

---
CONTEXT
{context}
---
"""))

introduction = run_section(
    prompt_tpl=intro_tpl,
    object_desc=object_desc,
    retriever=combined_retriever
)
generated_content.append(("Introduction: Manufacturing Overview ", introduction))

  docs: List[Document] = retriever.get_relevant_documents(object_desc)[:k_ctx]
  return chain.run({"object_desc": object_desc, "context": context})


In [17]:
process_tpl = PromptTemplate.from_template(textwrap.dedent("""
You are a senior manufacturing engineer.

Use the context block (manufacturing knowledge from your static vector store plus any web snippets) as your sole source.  
Shape any web-sourced details **only** to illustrate those frameworks and metrics.  
Do **not** introduce anything outside your static DB and retrieved chunks.

Refer to the object being manufactured when writing this section.

OBJECT DESCRIPTION  
{object_desc}

**Process Selection & Workflow Design** 

• Compare candidate processes (e.g., injection molding vs. CNC vs. blow molding) against selection criteria: cycle time, per-unit cost, material yield, and quality tolerance.  
• Include a text-based flowchart outline (e.g., “Raw resin → drying → melt → injection → cooling → ejection → inspection”).  
• Highlight how the object’s geometry, material, and target volume (e.g. 50,000 units/month) drive process and tooling choices.

Formatting rules  
• Generate just the content, no need for any introduction or section headings.
• Bulleted points wherever possible.  
• No made-up data or sources.

---
CONTEXT
{context}
---
"""))

process = run_section(
    prompt_tpl=process_tpl,
    object_desc=object_desc,
    retriever=combined_retriever
)

generated_content.append(("Process Selection & Workflow Design", process))

In [18]:
# Material Strategy & Eco-Alternatives

material_tpl = PromptTemplate.from_template(textwrap.dedent("""
You are a senior manufacturing engineer.

Use the context block (manufacturing knowledge from your static vector store plus any web snippets) as your sole source.  
Shape any web-sourced details **only** to illustrate those frameworks and metrics.  
Do **not** introduce anything outside your static DB and retrieved chunks.

Refer to the object being manufactured when writing this section.

OBJECT DESCRIPTION  
{object_desc}

**Material Strategy & Eco-Alternatives**

• Identify the key material properties required (e.g., strength, rigidity, chemical resistance) for the object.  
• Compare candidate materials in a table format: embodied energy, cost per kg, recyclability rate, and toxicity.  
• Propose at least two bio-based or recycled alternatives (e.g., PLA, rPET, bio-nylon) and outline substitution strategies, including any trade-offs in performance or cost.  
• Call out any supply-chain or certification considerations (e.g., FDA-grade, ISO 14001 compliance).

Formatting rules  
• Generate just the content, no need for any introduction or section headings.
• Bulleted points wherever possible.  
• No made-up data or sources.

---
CONTEXT
{context}
---
"""))

material_section = run_section(
    prompt_tpl=material_tpl,
    object_desc=object_desc,
    retriever=combined_retriever
)

generated_content.append(("Material Strategy & Eco-Alternatives", material_section))
# print(material_section)

In [19]:
# Sustainability, Life-Cycle & Performance Metrics

sustainability_tpl = PromptTemplate.from_template(textwrap.dedent("""
You are a senior manufacturing engineer.

Use the context block (manufacturing knowledge from your static vector store plus any web snippets) as your sole source.  
Shape any web-sourced details **only** to illustrate those frameworks and metrics.  
Do **not** introduce anything outside your static DB and retrieved chunks.

Refer to the object being manufactured when writing this section.

OBJECT DESCRIPTION  
{object_desc}

**Sustainability, Life-Cycle & Performance Metrics**
                                                                  
• **Target KPIs:** Define object-specific sustainability targets (e.g., carbon footprint <0.5 kg CO₂e/unit, energy use <2 kWh/unit).  
• **LCA Flow Diagram (placeholder):** Raw material → Manufacturing → Distribution → Use (e.g., 2 year lifespan) → End-of-Life (recycle/compost).  
• **Life-Cycle Costing:** Break down costs by phase (material, energy, end-of-life) and set reduction goals.  
• **Benchmarking:** Compare recyclability rate and energy intensity against industry norms (e.g., PET bottles rPET ≥50%).  
• **Design Levers:** Identify design or material changes (wall-thickness reduction, rPET substitution) to meet KPIs.  
• **Monitoring Plan:** Recommend data sources (LCA software outputs, energy meters) and review cadence.

Formatting rules  
• Generate just the content, no need for any introduction or section headings.
• Bulleted points wherever possible.  
• No made-up data or sources.

---
CONTEXT
{context}
---
"""))

sustainability_section = run_section(
    prompt_tpl=sustainability_tpl,
    object_desc=object_desc,
    retriever=combined_retriever
)

generated_content.append((
    "Sustainability, Life-Cycle & Performance Metrics",
    sustainability_section
))
# print(sustainability_section)

In [20]:
# Quality Assurance & Validation
quality_tpl = PromptTemplate.from_template(textwrap.dedent("""
You are a senior manufacturing engineer.

Use the context block (manufacturing knowledge from your static vector store plus any web snippets) as your sole source.  
Shape any web-sourced details **only** to illustrate those frameworks and metrics.  
Do **not** introduce anything outside your static DB and retrieved chunks.

Refer to the object being manufactured when writing this section.

OBJECT DESCRIPTION  
{object_desc}

**Quality Assurance & Validation** 
                                                           
• **Target tolerances:** List the object’s critical dimensions or performance specs and their acceptable tolerance bands.  
• **Inspection methods:** Recommend 2–3 validation techniques suited to this object (e.g., gauge R&R for feature X, pressure/leak test for seal, vision inspection for surface defects).  
• **Sampling plan:** Define lot size and inspection frequency (e.g., inspect 5 units every production hour) to balance quality risk and throughput.  
• **Data analysis & roles:** Identify who reviews QC data (operator, quality engineer) and how out-of-tolerance findings trigger corrective actions.  
• **Validation schedule:** Suggest periodic gauge R&R studies or calibration intervals for key measurement tools.  
• **Documentation & traceability:** Outline required QC records (checklists, SPC charts) linked back to individual object serial numbers.

Formatting rules  
• Generate just the content, no need for any introduction or section headings.
• Bulleted points wherever possible.  
• No made-up data or sources.

---
CONTEXT
{context}
---
"""))

quality_section = run_section(
    prompt_tpl=quality_tpl,
    object_desc=object_desc,
    retriever=combined_retriever
)

generated_content.append((
    "Quality Assurance & Validation",
    quality_section
))
# print(quality_section)

In [21]:
# Digitalization & Smart-Manufacturing Enablers

digitalization_tpl = PromptTemplate.from_template(textwrap.dedent("""
You are a senior manufacturing engineer.

Use the context block (manufacturing knowledge from your static vector store plus any web snippets) as your sole source.  
Shape any web-sourced details **only** to illustrate those frameworks and metrics.  
Do **not** introduce anything outside your static DB and retrieved chunks.

Refer to the object being manufactured when writing this section.

OBJECT DESCRIPTION  
{object_desc}

**Digitalization & Smart-Manufacturing Enablers**
                                                                  
• **Sensor selection:** Recommend 2–3 IoT sensor types suited to the object’s material/process (e.g., thermal, vibration, force).  
• **Data flow & analytics:** Outline how raw signals become actionable insights—edge vs. cloud, dashboard cadence.  
• **Connectivity & scale:** Describe network topology and compute needs for ~N units/day in a standard production cell.  
• **Integration:** Map out which existing control layers (PLC/MES/SCADA) will consume the data.  
• **Security & governance:** Highlight key practices for this object’s data integrity and access control.  
• **Operator interaction:** Define operator-facing interfaces or alerts specific to this object’s process risks.


Formatting rules  
• Generate just the content, no need for any introduction or section headings.
• Bulleted points wherever possible.  
• No made-up data or sources.
                                                                  
---
CONTEXT
{context}
---
"""))

digitalization_section = run_section(
    prompt_tpl=digitalization_tpl,
    object_desc=object_desc,
    retriever=combined_retriever
)

generated_content.append((
    "Digitalization & Smart-Manufacturing Enablers",
    digitalization_section
))
# print(digitalization_section)

In [22]:
info_modeling_tpl = PromptTemplate.from_template(textwrap.dedent("""
You are a senior manufacturing engineer.

Use the context block (manufacturing knowledge from your static vector store plus any web-retrieved snippets) as your sole source.  
Shape any web-sourced details **only** to exemplify those pre-defined frameworks, methods, and metrics.  
Do **not** introduce topics, terms, or data outside your static DB and retrieved chunks.

Refer explicitly to the object being manufactured when writing this section.

OBJECT DESCRIPTION  
{object_desc}

**Information Modeling & Integration**  
• **Standards & Frameworks:** Summarize relevant models (ISA-95, RAMI 4.0, IIRA) and how they apply to this object’s data flows.  
• **Data Schema Outline:** Provide a placeholder entity-relationship diagram or table mapping part attributes (e.g., dimensions, material grade, batch ID) to MES/ERP fields.  
• **Integration Points:** **Highlight** where in the process (e.g., post-inspection, real-time sensor feeds) data should be captured and synchronized.  
• **Digital Thread Implementation:** Describe how a unique object ID (e.g., QR code or RFID) links CAD → production parameters → quality records → maintenance logs.  
• **Interoperability KPIs:** **Set targets** for data latency (<1s), accuracy (>99%), and system uptime (≥99.9%).  
• **Validation Plan:** Recommend tests or mock API calls to verify schema compliance and end-to-end data flow.  
• **Governance & Security:** Outline ownership of data models, change-management process, and access controls.

Formatting rules  
• Generate just the content, no need for any introduction or section headings.
• Bulleted points wherever possible.  
• No made-up data or sources.

---
CONTEXT
{context}
---
"""))

info_modeling_section = run_section(
    prompt_tpl=info_modeling_tpl,
    object_desc=object_desc,
    retriever=combined_retriever
)

generated_content.append((
    "Information Modeling & Integration",
    info_modeling_section
))
# print(info_modeling_section)

In [23]:
simulation_tpl = PromptTemplate.from_template(textwrap.dedent("""
You are a senior manufacturing engineer.

Use the context block (manufacturing knowledge from your static vector store plus any web-retrieved snippets) as your sole source.  
Shape any web-sourced details **only** to exemplify those pre-defined frameworks, methods, and metrics.  
Do **not** introduce topics, terms, or data outside your static DB and retrieved chunks.

Refer explicitly to the object being manufactured when writing this section.

OBJECT DESCRIPTION  
{object_desc}

**Simulation & Virtual Commissioning**  
• **Rationale for Discrete-Event Simulation:** Explain why DES is the most suitable approach for modeling this object’s production flow (e.g., event-driven operations, batch variability).  
• **Model Structure Sketch:** Provide a placeholder text swim-lane or block diagram (e.g., “Raw material → pre-processing → machine A → buffer → machine B → inspection → packaging”).  
• **Key Simulation KPIs:** **Set targets** for throughput (units/hour), work-in-progress (≤X units), resource utilization (≥Y %), and mean time between failures (MTBF ≥Z hours).  
• **Virtual Commissioning Steps:** Outline how to validate control logic off-line using the digital twin—include PLC code test cases, sensor input emulation, and HMI verification.  
• **Risks & Benefits:**  
  - **Risks:** model inaccuracy, input data gaps, overly optimistic performance estimates  
  - **Benefits:** reduced physical trial runs, faster ramp-up, early detection of bottlenecks  
• **Validation Plan:** Recommend a pilot run comparing simulation outputs to real-world metrics (e.g., cycle time ±5% deviation, error rate ≤1 %).  
• **Continuous Improvement Loop:** Describe how to integrate live production data back into the model for ongoing calibration.

Formatting rules  
• Generate just the content, no need for any introduction or section headings.
• Bulleted points wherever possible.  
• No made-up data or sources.

---
CONTEXT
{context}
---
"""))

simulation_section = run_section(
    prompt_tpl=simulation_tpl,
    object_desc=object_desc,
    retriever=combined_retriever
)

generated_content.append((
    "Simulation & Virtual Commissioning",
    simulation_section
))
# print(simulation_section)

In [24]:
network_centric_tpl = PromptTemplate.from_template(textwrap.dedent("""
You are a senior manufacturing engineer.

Use the context block (manufacturing knowledge from your static vector store plus any web-retrieved snippets) as your sole source.  
Shape any web-sourced details **only** to exemplify those pre-defined frameworks, methods, and metrics.  
Do **not** introduce topics, terms, or data outside your static DB and retrieved chunks.

Refer explicitly to the object being manufactured when writing this section.

OBJECT DESCRIPTION  
{object_desc}

**Network-Centric & Collaborative Manufacturing**  
• **Definition & Rationale:** Explain the concept of network-centric manufacturing and why it applies to this object’s value chain (e.g., supplier integration, built-to-order flexibility).  
• **Collaboration Topology:** Sketch a text-based network diagram (e.g., “Design hub ↔ Production cells ↔ Distribution partners ↔ After-sales service”) showing data and material flows.  
• **Information Exchange Standards:** Cite relevant protocols (e.g., OPC UA, MQTT) and how they ensure interoperability among MES, ERP, and shop-floor devices.  
• **Key Collaboration KPIs:**  
  - Order-fulfillment lead time (target ≤ X days)  
  - Supplier on-time delivery rate (≥ Y %)  
  - Production cell cycle synchronization (takt variance ≤ Z %)  
• **Digital Thread Implementation:** Describe how the digital thread links CAD models, process parameters, and quality data across the network for traceability.  
• **Cross-Enterprise Workflows:** Outline steps for co-engineering with suppliers (e.g., shared BOM revisions, joint simulation reviews) and dynamic capacity sharing.  
• **Security & Governance:** Highlight data-security considerations (authentication, encryption) and roles/responsibilities matrix for network participants.  
• **Benefits & Risks:**  
  - **Benefits:** increased responsiveness, reduced inventory buffers, real-time visibility  
  - **Risks:** cybersecurity threats, integration complexity, data ownership disputes  

Formatting rules  
• Generate just the content, no need for any introduction or section headings.
• Bulleted points wherever possible.  
• No made-up data or sources.

---
CONTEXT
{context}
---
"""))

network_centric_section = run_section(
    prompt_tpl=network_centric_tpl,
    object_desc=object_desc,
    retriever=combined_retriever
)

generated_content.append((
    "Network-Centric & Collaborative Manufacturing",
    network_centric_section
))
# print(network_centric_section)

In [25]:
roadmap_tpl = PromptTemplate.from_template(textwrap.dedent("""
You are a senior manufacturing engineer.

Use the context block (manufacturing knowledge from your static vector store plus any web-retrieved snippets) as your sole source.  
Shape any web-sourced details **only** to exemplify those pre-defined frameworks, methods, and metrics.  
Do **not** introduce topics, terms, or data outside your static DB and retrieved chunks.

Refer explicitly to the object being manufactured when writing this section.

OBJECT DESCRIPTION  
{object_desc}

**Implementation Roadmap & Governance**  
• **Phased Timeline:** Lay out 4–6 major phases (e.g., “Phase 1: Pilot cell deployment,” “Phase 2: Digital thread integration,” “Phase 3: Full-scale automation”), each with target start/end quarters.  
• **Milestone Deliverables:** For each phase, list key deliverables (e.g., functional PV run, KPI baseline achieved, supplier onboarding).  
• **Stakeholder Matrix:** Tabulate roles and responsibilities (Engineering, Operations, IT, Quality, Finance) against each phase.  
• **Governance Model:** Describe decision authority and escalation paths (e.g., change-control board, steering committee).  
• **Resource & Budget Outline:** Provide high-level resource needs (headcount, CAPEX, OPEX) per phase.  
• **Risk Mitigation Plan:** Identify top 3 risks (e.g., integration delays, training gaps) and corresponding mitigation actions.  
• **Go/No-Go Criteria:** Specify success metrics for phase transitions (e.g., OEE > 85%, defect rate < 1%, on-time supplier rate ≥ 95%).  
• **Continuous Improvement Loop:** Define feedback cadence (e.g., monthly steering review, quarterly process audits) to refine roadmap.

Formatting rules  
• Generate just the content—no extra headings or narrative framing.  
• Bulleted points and simple markdown tables where appropriate.  
• No made-up data or sources.

---
CONTEXT
{context}
---
"""))

roadmap_section = run_section(
    prompt_tpl=roadmap_tpl,
    object_desc=object_desc,
    retriever=combined_retriever
)

generated_content.append((
    "Implementation Roadmap & Governance",
    roadmap_section
))
# print(roadmap_section)

### Create MD Report

In [26]:
def write_markdown_report(generated_content, output_path="manufacturing_report.md"):
    with open(output_path, "w", encoding="utf-8") as md:
        for title, content in generated_content:
            # write an H2 for the section title
            md.write(f"## {title}\n\n")
            # then write the markdown content
            md.write(content.strip() + "\n\n")
    print(f"✅ Report written to {output_path}")

In [27]:
write_markdown_report(generated_content)

print("✅ Done writing manufacturing_report.md")

✅ Report written to manufacturing_report.md
✅ Done writing manufacturing_report.md


### Convert MD to PDF

In [30]:
# Ensure the output directory exists
output_dir = "sample_outputs"
os.makedirs(output_dir, exist_ok=True)

# Paths
md_file = "manufacturing_report.md"
pdf_file = os.path.join(output_dir, f"{object_name}.pdf")

# Optional pandoc arguments
extra_args = [
    "--pdf-engine=xelatex",
    "-V", "geometry:margin=1in",
]

# Convert Markdown to PDF
output = pypandoc.convert_file(md_file, "pdf", outputfile=pdf_file, extra_args=extra_args)

# Check success
if os.path.exists(pdf_file):
    print(f"✅ Successfully wrote PDF to {pdf_file}")
else:
    print("❌ PDF conversion failed.")





✅ Successfully wrote PDF to sample_outputs/demo_pen.pdf
