# Fully local webscraper for manufacturing

### ENPM692 Final Project
Vinay Lanka | Apoorv Thapliyal | Harsh Senjaliya

## Imports

In [1]:
from transformers import VisionEncoderDecoderModel, ViTFeatureExtractor, AutoTokenizer
from transformers import Blip2Processor, Blip2ForConditionalGeneration
import torch
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

# Langchain setup
from langchain_core.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain_ollama import OllamaLLM
import json, re, textwrap, time
from typing import Dict, List, Tuple

#DDG
from duckduckgo_search import DDGS

# Fetching HTML
from langchain_community.document_loaders import AsyncHtmlLoader
from langchain_community.document_transformers import Html2TextTransformer
from typing import List
from langchain.schema import BaseRetriever, Document

# Web scraping
import asyncio

# Database
from bs4 import BeautifulSoup
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.vectorstores import Chroma
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms.base import BaseLLM
from langchain.prompts import PromptTemplate

import chromadb
from chromadb.config import Settings
from langchain_community.embeddings import HuggingFaceEmbeddings
from chromadb.config import Settings, DEFAULT_TENANT, DEFAULT_DATABASE
from langchain_community.vectorstores import Chroma

import os, glob
from langchain.document_loaders import PyPDFLoader
from langchain.schema import Document
from langchain.vectorstores import Chroma, FAISS
import shutil
from chromadb.config import Settings

from langchain_ollama import ChatOllama
from langchain_core.messages import HumanMessage
import base64, pathlib

  from .autonotebook import tqdm as notebook_tqdm
USER_AGENT environment variable not set, consider setting it to identify your requests.


## Important Definitons

In [2]:
# Image path
img_path = "imgs/pencil.jpeg"

# Model definition
model_name = "gemma3:27b-it-qat"

## Captioning


In [3]:
# Hugging Face Image Captioning Model Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# # Print device information
print(f"Using device: {device}")

Using device: cuda


### Using Gemma to generate captions

In [4]:
img_b64 = base64.b64encode(pathlib.Path(img_path).read_bytes()).decode()

content = [
    {  # image part
        "type": "image_url",
        "image_url": f"data:image/jpeg;base64,{img_b64}",
    },
    {  # text prompt part
        "type": "text",
        "text": "Identify the primary object in this image and enumerate all observable material characteristics—such as base material, surface finish, color, texture, gloss level, or coating.  Ignore background elements and give the answer in one clear English sentence.",
    },
]

# llm = ChatOllama(model="gemma3:4b", temperature=0.3)
llm = ChatOllama(model=model_name, temperature=0.3)


object_desc = llm.invoke([HumanMessage(content=content)]).content

print(object_desc)

The primary object is a graphite pencil with a dark, matte black hexagonal wooden casing, a smooth, slightly glossy graphite core visible at one end, and white text printed along its length indicating “CPSQTO Platinum 6B” and “for good handwriting.”


## Static DB

### Setup static db 

Chroma DB with Persistence
Embedding LLM - all-MiniLM-L12-v2

Converts documents under `/data` to vector embeddings under `/db`.
Place any documents to be stored under `/data`.
To be run once.

In [5]:
static_dir = "db/sdb"
embedder   = HuggingFaceEmbeddings(model_name="all-MiniLM-L12-v2")

client = chromadb.PersistentClient(
    path=static_dir,
    settings=Settings(),          # you can pass custom Settings here if needed
    tenant=DEFAULT_TENANT,
    database=DEFAULT_DATABASE,
)

if os.path.exists(static_dir) and os.listdir(static_dir):
    print(f"Loading existing vector store from {static_dir}")
    vectordb_static = Chroma(
        client=client,
        embedding_function=embedder,
        persist_directory=static_dir
    )

else:
    print(f"Building new vector store in {static_dir}")
    if os.path.exists(static_dir):
        shutil.rmtree(static_dir)
    os.makedirs(static_dir, exist_ok=True)

    # load PDFs
    pdf_docs = []
    for path in glob.glob("data/*.pdf"):
        loader = PyPDFLoader(path)
        pdf_docs.extend(loader.load())

    # split into chunks
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=1500,
        chunk_overlap=200,
        length_function=len
    )
    static_chunks = splitter.split_documents(pdf_docs)

    # build & persist
    vectordb_static = Chroma.from_documents(
        documents=static_chunks,
        embedding=embedder,
        client=client,
        persist_directory=static_dir
    )
    client.persist() 

  embedder   = HuggingFaceEmbeddings(model_name="all-MiniLM-L12-v2")


Loading existing vector store from db/sdb


  vectordb_static = Chroma(


### Create a retreiver

Pulling k = 6 relevant chunks

In [6]:
static_ret  = vectordb_static.as_retriever(
    search_kwargs={"k": 10}
)

## Generate queries

The LLM Model: Ollama gemma3:4b model will be used for query generation as well as final manufacturing document generation

In [7]:
# Initialize your LLM
llm = OllamaLLM(model=model_name, temperature=0.3)

# query_prompt = PromptTemplate.from_template(textwrap.dedent("""
# You are a manufacturing‐research assistant.

# Goal ▸ Draft **{k}** DuckDuckGo search queries that, taken together,
# will surface authoritative information for *all* of the report areas
# listed under **SECTIONS** below.

# Guidelines
# • Draw wording **only** from the context slide excerpts, the object
#   caption, and the section descriptions.  
# • Mix high-level and specific terms so the result set spans every
#   section (sustainability, process flow, tooling, simulation, etc.).  
# • Avoid near-duplicate phrasing; each query should probe a different
#   angle (process, material, KPI, cost, digital thread, …).  
# • Do **not** introduce themes absent from the context.  
# • Return **only** a valid JSON array of strings—no commentary.

# ---
# CONTEXT
# {context}
# ---

# OBJECT DESCRIPTION
# \"\"\"{caption}\"\"\"

# SECTIONS (for your internal guidance; do NOT echo them)
# 1  Executive summary / Object overview
# 2  Sustainability & life-cycle considerations
# 3  Material selection & eco-alternatives
# 4  Manufacturing-process flow
# 5  Tooling, automation & industrial robotics
# 6  Digital & smart-manufacturing enablers
# 7  Simulation & virtual validation
# 8  Quality & performance metrics
# 9  Environmental & cost impact
# 10 Implementation roadmap / Smart-mfg priorities
# ---
# """))

query_prompt = PromptTemplate.from_template(textwrap.dedent("""
You are a manufacturing‐research assistant.

Goal ▸ Draft **{k}** DuckDuckGo search queries that, taken together,
will surface authoritative information for *all* of the report areas
listed under **SECTIONS** below, *plus* the additional manufacturing themes.
Your queries should collectively cover every numbered theme.

Guidelines
• Draw wording **only** from the context slide excerpts, the object
  caption, and the section headings below.  
• Mix high-level and specific terms so the result set spans every
  theme (process, material, KPI, cost, digital thread, …).  
• Avoid near-duplicate phrasing; each query should probe a unique angle.  
• Do **not** introduce themes absent from the context or the lists.  
• Return **only** a valid JSON array of strings—no extra text.

---
CONTEXT
{context}
---

OBJECT DESCRIPTION
\"\"\"{caption}\"\"\"

SECTIONS (for your internal guidance; do **NOT** echo)
1.  Executive summary / Object overview  
2.  Sustainability & life-cycle considerations  
3.  Material selection & eco-alternatives  
4.  Manufacturing-process flow  
5.  Tooling, automation & industrial robotics  
6.  Digital & smart-manufacturing enablers  
7.  Simulation & virtual validation  
8.  Quality & performance metrics  
9.  Environmental & cost impact  
10. Implementation roadmap / Smart-mfg priorities  

ADDITIONAL THEMES (do **NOT** echo)
11.  Manufacturing Automation  
12.  Industrial Robotics  
13.  Additive Manufacturing (AM)  
14.  Subtractive vs. Layer-by-Layer Processes  
15.  Sustainable Manufacturing  
16.  Life-Cycle Thinking & LCA  
17.  6-R Philosophy (Re-think, Reduce, Re-place, Re-cycle, Re-use, Re-pair)  
18.  Unit Manufacturing Processes (UMPs)  
19.  Process-Level KPIs (energy use, CO₂ per unit, cycle time, takt time)  
20.  Material Selection & Eco-Alternatives  
21.  Digital/Smart Manufacturing Enablers  
22.  Information Models (ISA-95, RAMI 4.0)  
23.  Network-Centric Manufacturing  
24.  Discrete-Event Simulation  
25.  Gauging & Validation (gauge R&R, virtual validation swim-lanes)  
26.  Quality & Performance Metrics  
27.  Environmental & Cost Impact (life-cycle costing, cost-breakdown structure)  
28.  Tooling & Automation Migration  
29.  Digital Thread & IoT Sensors  
30.  Implementation Roadmap (phased Gantt, stakeholder matrix)  
---
"""))



def generate_queries(caption: str,
                     static_ret,
                     k_queries: int = 6,
                     k_ctx: int = 4) -> list[str]:


    docs = static_ret.get_relevant_documents(caption)[:k_ctx]
    ctx  = "\n\n".join(d.page_content[:800] for d in docs)


    chain  = LLMChain(llm=llm,
                      prompt=query_prompt.partial(k=k_queries))

    result_dict = chain.invoke({"caption": caption, "context": ctx})
    raw = result_dict["text"] if isinstance(result_dict, dict) else result_dict

    match = re.search(r"\[.*\]", raw, re.DOTALL)
    if not match:
        raise ValueError(f"JSON not found:\n{raw}")

    return [q.strip() for q in json.loads(match.group(0))]


In [8]:
search_queries = generate_queries(object_desc, static_ret, k_queries=30, k_ctx=6)
print("Generated queries:")
for q in search_queries:
    print(f"  • {q}")

  docs = static_ret.get_relevant_documents(caption)[:k_ctx]
  chain  = LLMChain(llm=llm,


Generated queries:
  • CPSQTO Platinum 6B manufacturing process
  • graphite pencil life-cycle considerations
  • sustainable materials for pencils
  • manufacturing process flow graphite pencil
  • automation in pencil manufacturing
  • industrial robotics for pencil production
  • digital manufacturing enablers pencil industry
  • simulation and virtual validation pencil manufacturing
  • quality metrics graphite pencil
  • environmental impact of pencil production
  • implementation roadmap smart manufacturing pencils
  • manufacturing automation graphite pencil
  • industrial robotics pencil assembly
  • additive manufacturing pencil components
  • subtractive vs layer-by-layer pencil manufacturing
  • sustainable manufacturing practices pencils
  • life-cycle thinking pencil production
  • 6-R philosophy pencil manufacturing
  • unit manufacturing processes graphite pencil
  • process-level KPIs pencil manufacturing
  • material selection eco-alternatives pencils
  • digital smart

## Dynamic DB

### Duck-Duck-Go scraping for top N URLs

In [9]:
def ddg_search(query: str, max_results: int = 5) -> list[str]:
    """
    Returns the top‑N DuckDuckGo result URLs for a query.
    """
    with DDGS() as ddgs:
        return [hit["href"] for hit in ddgs.text(query, max_results=max_results)]

# Example usage:
# urls = [u for q in queries for u in ddg_search(q, max_results=5)]
# urls = list(dict.fromkeys(urls))  # dedupe while preserving order

In [10]:
async def fetch_documents(urls: list[str]):
    """
    Asynchronously downloads each page and converts HTML→plain text.
    Returns a list of LangChain Document objects.
    """
    loader    = AsyncHtmlLoader(urls)
    html_docs = await loader.load()                       # list[Document] with HTML in .page_content
    transformer = Html2TextTransformer()                  # strips tags, yields markdown‑style text :contentReference[oaicite:0]{index=0}
    text_docs = transformer.transform_documents(html_docs)
    return text_docs


In [11]:
all_urls = []
for q in search_queries:
    urls = ddg_search(q, max_results=5)
    print(f"\nQuery: {q}\n  URLs:")
    for u in urls:
        print("    •", u)
    all_urls.extend(urls)
    time.sleep(1)  # be nice to DDG

unique_urls = list(dict.fromkeys(all_urls))
print(f"\nTotal unique URLs: {len(unique_urls)}")



Query: CPSQTO Platinum 6B manufacturing process
  URLs:
    • https://ipa-news.com/assets/pdfs/guidance/chapter-3-pgm-guide.pdf
    • https://www.metal-am.com/articles/the-advantages-of-metal-3d-printing-for-the-processing-of-platinum-group-metals/
    • https://technology.matthey.com/content/journals/10.1595/003214063X74136143
    • https://www.sciencedirect.com/science/article/pii/S1044580323008707
    • https://www.researchgate.net/publication/323949041_Refining_Approaches_in_the_Platinum_Group_Metal_Processing_Value_Chain-A_Review

Query: graphite pencil life-cycle considerations
  URLs:
    • https://www.researchgate.net/publication/362707264_Life_Cycle_Assessment_of_Pencils
    • https://graphography.org/index.php/2022/12/02/mechanical-pencil-month-day-2-the-life-cycle-of-the-pencil/
    • https://studylib.net/doc/9193036/life-cycle-of-a-pencil
    • https://www.scribd.com/document/573071950/LIFE-CYCLE-ASSESSMENT-OF-LEAD-PENCIL
    • https://sharnbrook.academy/wp-content/uploads

### Fetch HTML Docs as plain text

In [12]:
N = len(unique_urls)
print(f"\nFetching and parsing first {N} available pages:")

html_docs = []
success_count = 0
failure_count = 0

for idx, url in enumerate(unique_urls[:N], start=1):
    single_loader = AsyncHtmlLoader([url])
    try:
        docs_for_url = await asyncio.wait_for(
            asyncio.to_thread(single_loader.load),
            timeout=10
        )
        html_docs.extend(docs_for_url)
        print(f"[{idx}/{N}] ✓ Fetched {url}")
        success_count += 1

    except asyncio.TimeoutError:
        failure_count += 1
        print(f"[{idx}/{N}] ✗ Timeout {url}")
    except Exception as e:
        failure_count += 1
        print(f"[{idx}/{N}] ✗ Error {url}: {e}")

print(f"\nCompleted: {success_count} succeeded, {failure_count} failed out of {N} URLs.\n")

# Transform and inspect as before
transformer = Html2TextTransformer()
docs = transformer.transform_documents(html_docs)

splitter = RecursiveCharacterTextSplitter(
    chunk_size=1500,
    chunk_overlap=200,
    length_function=len
)
ddb = splitter.split_documents(docs)


Fetching and parsing first 127 available pages:


Fetching pages:   0%|          | 0/1 [00:00<?, ?it/s]Failed to decode content from https://ipa-news.com/assets/pdfs/guidance/chapter-3-pgm-guide.pdf
Fetching pages: 100%|##########| 1/1 [00:03<00:00,  3.72s/it]


[1/127] ✓ Fetched https://ipa-news.com/assets/pdfs/guidance/chapter-3-pgm-guide.pdf


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  8.11it/s]


[2/127] ✓ Fetched https://www.metal-am.com/articles/the-advantages-of-metal-3d-printing-for-the-processing-of-platinum-group-metals/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.38it/s]


[3/127] ✓ Fetched https://technology.matthey.com/content/journals/10.1595/003214063X74136143


Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.32s/it]


[4/127] ✓ Fetched https://www.sciencedirect.com/science/article/pii/S1044580323008707


Fetching pages: 100%|##########| 1/1 [00:00<00:00, 14.42it/s]


[5/127] ✓ Fetched https://www.researchgate.net/publication/323949041_Refining_Approaches_in_the_Platinum_Group_Metal_Processing_Value_Chain-A_Review


Fetching pages: 100%|##########| 1/1 [00:00<00:00, 14.59it/s]


[6/127] ✓ Fetched https://www.researchgate.net/publication/362707264_Life_Cycle_Assessment_of_Pencils


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.99it/s]


[7/127] ✓ Fetched https://graphography.org/index.php/2022/12/02/mechanical-pencil-month-day-2-the-life-cycle-of-the-pencil/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.99it/s]


[8/127] ✓ Fetched https://studylib.net/doc/9193036/life-cycle-of-a-pencil


Fetching pages: 100%|##########| 1/1 [00:00<00:00, 15.89it/s]


[9/127] ✓ Fetched https://www.scribd.com/document/573071950/LIFE-CYCLE-ASSESSMENT-OF-LEAD-PENCIL


Fetching pages:   0%|          | 0/1 [00:00<?, ?it/s]Failed to decode content from https://sharnbrook.academy/wp-content/uploads/2023/03/Science-Work-Years-9-10-Article.pdf
Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.02s/it]


[10/127] ✓ Fetched https://sharnbrook.academy/wp-content/uploads/2023/03/Science-Work-Years-9-10-Article.pdf


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  7.46it/s]


[11/127] ✓ Fetched https://citizensustainable.com/pencils-eco-friendly/


Fetching pages: 100%|##########| 1/1 [00:03<00:00,  3.63s/it]


[12/127] ✓ Fetched https://agreeableco.com/25-best-eco-friendly-pens-pencils-recycled-sustainable-materials-best-of-collection/


Fetching pages: 100%|##########| 1/1 [00:02<00:00,  2.45s/it]


[13/127] ✓ Fetched https://www.trvst.world/sustainable-living/eco-friendly/pens-and-pencils/


Fetching pages: 100%|##########| 1/1 [00:00<00:00, 11.83it/s]


[14/127] ✓ Fetched https://www.sustainablejungle.com/eco-friendly-school-supplies/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  2.16it/s]


[15/127] ✓ Fetched https://allianzmedia.com/pencil-materials-to-replace-wood/


Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.08s/it]


[16/127] ✓ Fetched https://khatabook.com/blog/pencil-manufacturing-process/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  4.50it/s]


[17/127] ✓ Fetched https://www.madehow.com/Volume-1/Pencil.html


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.85it/s]


[18/127] ✓ Fetched https://www.youtube.com/watch?v=PMjaanfE21M


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  2.02it/s]


[19/127] ✓ Fetched https://www.artisticaly.com/how-pencils-are-made-raw-material-manufacturing-process-and-types/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.37it/s]


[20/127] ✓ Fetched https://mechforged.com/manufacturing-process-of-a-pencil/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.16it/s]


[21/127] ✓ Fetched https://blog.osum.com/pencil-making-machine/


Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.51s/it]


[22/127] ✓ Fetched https://www.pftautomation.com/product/showproduct.php?id=88


Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.11s/it]


[23/127] ✓ Fetched https://dspace.mit.edu/handle/1721.1/83707


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  5.22it/s]


[24/127] ✓ Fetched https://www.cantechletter.com/2025/02/how-might-tariffs-affect-the-worlds-production-of-pencils/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  2.66it/s]


[25/127] ✓ Fetched https://www.sciencedirect.com/science/article/pii/S0921889021001974


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  3.15it/s]


[26/127] ✓ Fetched https://sharonandinge.com/drawing-robot


Fetching pages: 100%|##########| 1/1 [00:00<00:00, 20.32it/s]


[27/127] ✓ Fetched https://journals.sagepub.com/doi/10.1177/09544062221096946?icid=int.sj-abstract.similar-articles.1


Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.01s/it]


[28/127] ✓ Fetched https://newatlas.com/industrial-robot-pencil-sketches/21547/


Fetching pages: 100%|##########| 1/1 [00:00<00:00, 22.53it/s]


[29/127] ✓ Fetched https://www.mdpi.com/2227-9717/13/3/832


Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.15s/it]


[30/127] ✓ Fetched https://www.sciencedirect.com/science/article/pii/S2214785323012427


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  2.66it/s]


[31/127] ✓ Fetched https://www.mckinsey.com/~/media/McKinsey/Business+Functions/Operations/Our+Insights/Industry+40+How+to+navigate+digitization+of+the+manufacturing+sector/Industry-40-How-to-navigate-digitization-of-the-manufacturing-sector.ashx


Fetching pages: 100%|##########| 1/1 [00:02<00:00,  2.04s/it]


[32/127] ✓ Fetched https://newji.ai/japan-industry/unlocking-the-art-of-precision-exploring-japanese-mastery-in-colored-pencil-oem-manufacturing/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  2.55it/s]


[33/127] ✓ Fetched https://www.cbh.com/insights/articles/key-steps-to-digital-transformation-for-manufacturers/


Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.31s/it]


[34/127] ✓ Fetched https://www.6sigma.us/manufacturing/manufacturing-process-simulation/


Fetching pages: 100%|##########| 1/1 [00:00<00:00, 18.03it/s]


[35/127] ✓ Fetched https://dl.acm.org/doi/10.1145/3587889.3587904


Fetching pages:   0%|          | 0/1 [00:00<?, ?it/s]Failed to decode content from https://static.sw.cdn.siemens.com/siemens-disw-assets/public/5btOA1Vuic3O7MPUaT4Tza/en-US/White+paper+CIMdata+-+Product+Verification+and+Validation+-+Accelerated+Product+Development.pdf
Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.31it/s]


[36/127] ✓ Fetched https://static.sw.cdn.siemens.com/siemens-disw-assets/public/5btOA1Vuic3O7MPUaT4Tza/en-US/White+paper+CIMdata+-+Product+Verification+and+Validation+-+Accelerated+Product+Development.pdf


Fetching pages: 100%|##########| 1/1 [00:00<00:00, 20.94it/s]


[37/127] ✓ Fetched https://resources.sw.siemens.com/en-US/fact-sheet-process-simulate-manufacturing-process-verification-in-a-powerful-3d/


Fetching pages:   0%|          | 0/1 [00:00<?, ?it/s]Failed to decode content from https://tsapps.nist.gov/publication/get_pdf.cfm?pub_id=822090
Fetching pages: 100%|##########| 1/1 [00:00<00:00,  5.96it/s]


[38/127] ✓ Fetched https://tsapps.nist.gov/publication/get_pdf.cfm?pub_id=822090


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.96it/s]


[39/127] ✓ Fetched https://chinastationery.com/knowledge/graphite-pencil-scale/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  7.00it/s]


[40/127] ✓ Fetched https://pencils.com/pages/hb-graphite-grading-scale


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  3.62it/s]


[41/127] ✓ Fetched https://penvibe.com/the-graphite-pencil-scale-ultimate-guide/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  4.45it/s]


[42/127] ✓ Fetched https://cecelyv.com/pencil-grading-scale/


Fetching pages: 100%|##########| 1/1 [00:00<00:00, 11.82it/s]


[43/127] ✓ Fetched https://artincontext.org/hardness-of-pencils/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  4.02it/s]


[44/127] ✓ Fetched https://calcedar.com/pencils-environmental-profile/


Fetching pages: 100%|##########| 1/1 [00:00<00:00, 11.56it/s]


[45/127] ✓ Fetched https://www.researchgate.net/publication/343469139_Eco-Efficiency_of_Pencil_Preduction_Using_Life_Cycle_Assessment_to_Increase_the_Manufacture_Sustainability


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  2.11it/s]


[46/127] ✓ Fetched https://greenchicafe.com/are-pencils-environmentally-friendly/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  2.75it/s]


[47/127] ✓ Fetched https://prezi.com/jfrplxrhxt_s/how-a-pencil-is-made-and-its-environmental-impacts/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.70it/s]


[48/127] ✓ Fetched https://www.img4you.com/knowledge/10004780


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.85it/s]


[49/127] ✓ Fetched https://www.cesmii.org/education/roadmap-tools/


Fetching pages: 100%|##########| 1/1 [00:00<00:00, 13.83it/s]


[50/127] ✓ Fetched https://visualdecisions.com/cesmii-smart-mfg-roadmap


Fetching pages:   0%|          | 0/1 [00:00<?, ?it/s]Failed to decode content from https://ijcrt.org/papers/IJCRT24A4648.pdf
Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.36it/s]


[51/127] ✓ Fetched https://ijcrt.org/papers/IJCRT24A4648.pdf


Fetching pages: 100%|##########| 1/1 [00:00<00:00, 15.32it/s]


[52/127] ✓ Fetched https://www.gartner.com/smarterwithgartner/6-key-actions-for-a-successful-smart-manufacturing-strategy


Fetching pages:   0%|          | 0/1 [00:00<?, ?it/s]Failed to decode content from https://www.verizon.com/business/resources/articles/creating-your-roadmap-to-smarter-manufacturing.pdf
Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.07s/it]


[53/127] ✓ Fetched https://www.verizon.com/business/resources/articles/creating-your-roadmap-to-smarter-manufacturing.pdf


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  3.20it/s]


[54/127] ✓ Fetched https://mechforged.com/manufacturing-process-of-a-mechanical-pencil/


Fetching pages: 100%|##########| 1/1 [00:00<00:00, 21.47it/s]


[55/127] ✓ Fetched https://www.researchgate.net/publication/334643030_A_Robot_That_Draws_and_Shades_with_Tactile_Force_Feedback_Sensed_Through_a_Pencil


Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.84s/it]


[56/127] ✓ Fetched https://kawasakirobotics.com/applications/assembly/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.01it/s]


[57/127] ✓ Fetched https://www.emerald.com/insight/content/doi/10.1108/ir-09-2018-0189/full/html


Fetching pages:   0%|          | 0/1 [00:00<?, ?it/s]Failed to decode content from https://xactmetal.com/wp-content/uploads/2020/04/Xact-Metal-Design-Guide.pdf
Fetching pages: 100%|##########| 1/1 [00:00<00:00,  3.13it/s]


[58/127] ✓ Fetched https://xactmetal.com/wp-content/uploads/2020/04/Xact-Metal-Design-Guide.pdf


Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.19s/it]


[59/127] ✓ Fetched https://www.sciencedirect.com/science/article/pii/S2214785319322072


Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.32s/it]


[60/127] ✓ Fetched https://www.unionfab.com/blog/2024/09/additive-vs-subtractive-manufacturing


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  3.74it/s]


[61/127] ✓ Fetched https://www.rapiddirect.com/blog/additive-vs-subtractive-manufacturing/


Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.87s/it]


[62/127] ✓ Fetched https://kdmfab.com/subtractive-additive-manufacturing/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  2.26it/s]


[63/127] ✓ Fetched https://shop.machinemfg.com/comparing-additive-manufacturing-vs-subtractive-manufacturing-what-are-the-differences/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.53it/s]


[64/127] ✓ Fetched https://www.team-mfg.com/additive-vs-subtractive-manufacturing.html


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.26it/s]


[65/127] ✓ Fetched https://www.stationery-supplier.com/the-sustainable-journey-of-pencil-manufacturing/


Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.15s/it]


[66/127] ✓ Fetched https://durzerd.com/new/How-Innovations-in-Pencil-Manufacturing-are-Revolutionizing-Sustainability.html


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.09it/s]


[67/127] ✓ Fetched https://thesustainable.life/how-are-recycled-pencils-made/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  2.34it/s]


[68/127] ✓ Fetched https://www.facetsjournal.com/doi/10.1139/facets-2023-0170


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  2.65it/s]


[69/127] ✓ Fetched https://www.teachsustainability.org/post/the-great-pencil-challenge-exploring-the-eco-footprint-of-the-humble-pencil


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.04it/s]


[70/127] ✓ Fetched https://www.nsta.org/journals/science-scope/science-scope-april-2000/life-cycle-pencil


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.85it/s]


[71/127] ✓ Fetched https://www.theveganreview.com/the-6-rs-of-sustainability-what-does-sustainable-really-mean/


Fetching pages: 100%|##########| 1/1 [00:00<00:00, 18.06it/s]


[72/127] ✓ Fetched https://www.researchgate.net/publication/333466082_Process_sustainability_evaluation_for_manufacturing_of_a_component_with_the_6R_application


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  6.14it/s]


[73/127] ✓ Fetched https://practicalaction.org/learning/waste/the-6-rs/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  3.23it/s]


[74/127] ✓ Fetched https://ecohan.com/sustainable-living/the-6-rs-of-sustainability-a-comprehensive-guide-to-eco-conscious-living/


Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.04s/it]


[75/127] ✓ Fetched https://koliwoodpencil.com/how-are-pencils-manufactured/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  6.68it/s]


[76/127] ✓ Fetched https://musgravepencil.com/blogs/news/howapencilismade


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  2.19it/s]


[77/127] ✓ Fetched https://www.projectmanager.com/blog/manufacturing-kpis


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  5.66it/s]


[78/127] ✓ Fetched https://insightsoftware.com/blog/30-manufacturing-kpis-and-metric-examples/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.20it/s]


[79/127] ✓ Fetched https://www.dataparc.com/blog/establishing-manufacturing-kpis-for-continuous-improvement/


Fetching pages: 100%|##########| 1/1 [00:00<00:00, 10.62it/s]


[80/127] ✓ Fetched https://www.netsuite.com/portal/resource/articles/erp/manufacturing-kpis-metrics.shtml


Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.13s/it]


[81/127] ✓ Fetched https://link.springer.com/chapter/10.1007/978-3-031-72494-7_22


Fetching pages: 100%|##########| 1/1 [00:00<00:00, 15.14it/s]


[82/127] ✓ Fetched https://www.artnews.com/art-news/product-recommendations/sustainable-the-best-eco-friendly-graphite-pencils-1234626048/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.35it/s]


[83/127] ✓ Fetched https://www.stationerymanufacturers.com/pencil-innovations-the-future-of-an-age-old-tool/


Fetching pages: 100%|##########| 1/1 [00:00<00:00, 19.43it/s]


[84/127] ✓ Fetched https://journals.sagepub.com/doi/full/10.1177/0954405417736547


Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.88s/it]


[85/127] ✓ Fetched https://www.sciencedirect.com/science/article/pii/S0360835222004922


Fetching pages: 100%|##########| 1/1 [00:00<00:00, 20.32it/s]


[86/127] ✓ Fetched https://dl.acm.org/doi/10.1145/3652620.3688250


Fetching pages: 100%|##########| 1/1 [00:00<00:00, 11.73it/s]


[87/127] ✓ Fetched https://enme.umd.edu/clark/faculty/1772/Jay-Lee


Fetching pages:   0%|          | 0/1 [00:00<?, ?it/s]Failed to decode content from https://apsom.org/docs/T061_isa95-04.pdf
Fetching pages: 100%|##########| 1/1 [00:04<00:00,  4.18s/it]


[88/127] ✓ Fetched https://apsom.org/docs/T061_isa95-04.pdf


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  7.91it/s]


[89/127] ✓ Fetched https://www.mesmatters.com/post/what-is-the-isa-95-model-and-where-does-mes-fit


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.83it/s]


[90/127] ✓ Fetched https://www.emqx.com/en/blog/exploring-isa95-standards-in-manufacturing


Fetching pages: 100%|##########| 1/1 [00:00<00:00, 18.95it/s]


[91/127] ✓ Fetched https://www.sw.siemens.com/en-US/technology/isa-95-framework-layers/


Fetching pages:   0%|          | 0/1 [00:00<?, ?it/s]Failed to decode content from https://web-material3.yokogawa.com/2/11821/tabs/document_11744.pdf
Fetching pages: 100%|##########| 1/1 [00:03<00:00,  3.10s/it]


[92/127] ✓ Fetched https://web-material3.yokogawa.com/2/11821/tabs/document_11744.pdf


Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.05s/it]


[93/127] ✓ Fetched https://www.syndicatedanalytics.com/pencil-manufacturing-plant-project-report


Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.34s/it]


[94/127] ✓ Fetched https://www.neuralword.com/en/education-history-science-general-culture-society/science-nature/how-pencils-are-manufactured-an-overview-of-the-production-process


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.44it/s]


[95/127] ✓ Fetched https://www.interwell.cn/how-pencils-are-made-a-detailed-production-process-guide


Fetching pages: 100%|##########| 1/1 [00:00<00:00, 17.15it/s]


[96/127] ✓ Fetched https://www.tandfonline.com/doi/full/10.1080/00207543.2011.618147


Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.04s/it]


[97/127] ✓ Fetched https://slideplayer.com/slide/4457371/


Fetching pages: 100%|##########| 1/1 [00:00<00:00, 12.70it/s]


[98/127] ✓ Fetched https://en.wikipedia.org/wiki/Discrete-event_simulation


Fetching pages:   0%|          | 0/1 [00:00<?, ?it/s]Failed to decode content from https://ocw.mit.edu/courses/2-875-mechanical-assembly-and-its-role-in-product-development-fall-2004/e9c308d7bcb9c7bf6b7685b4bc7ac6a8_cls20_smltion04.pdf
Fetching pages: 100%|##########| 1/1 [00:00<00:00,  3.63it/s]


[99/127] ✓ Fetched https://ocw.mit.edu/courses/2-875-mechanical-assembly-and-its-role-in-product-development-fall-2004/e9c308d7bcb9c7bf6b7685b4bc7ac6a8_cls20_smltion04.pdf


Fetching pages: 100%|##########| 1/1 [00:00<00:00, 23.24it/s]


[100/127] ✓ Fetched https://www.tandfonline.com/doi/full/10.1080/0951192X.2016.1145812


Fetching pages: 100%|##########| 1/1 [00:00<00:00, 13.78it/s]


[101/127] ✓ Fetched https://ferltz.github.io/inv_oper_2/documents/books/Discrete-Event+System+Simulation-Pearson+Banks+Carson+(2013).pdf


Fetching pages: 100%|##########| 1/1 [00:03<00:00,  3.74s/it]


[102/127] ✓ Fetched https://lgilab.com/decoding-the-pencil-hardness-scale-key-insights-for-effective-packaging-validation/


Fetching pages: 100%|##########| 1/1 [00:03<00:00,  3.99s/it]


[103/127] ✓ Fetched https://lgilab.com/understanding-the-astm-d3363-pencil-test-for-packaging-validation/


Fetching pages: 100%|##########| 1/1 [00:05<00:00,  5.32s/it]


[104/127] ✓ Fetched https://lgilab.com/decoding-pencil-hardness-a-key-factor-in-packaging-validation/


Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.02s/it]


[105/127] ✓ Fetched https://www.elcometerusa.com/Laboratory/Pencil-Hardness-Testing/


Fetching pages:   0%|          | 0/1 [00:00<?, ?it/s]Failed to decode content from https://www.energy.gov/sites/default/files/2021-07/Module_6B.pdf
Fetching pages: 100%|##########| 1/1 [00:00<00:00,  6.95it/s]


[106/127] ✓ Fetched https://www.energy.gov/sites/default/files/2021-07/Module_6B.pdf


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  6.34it/s]


[107/127] ✓ Fetched https://www.ease.io/blog/14-metrics-every-quality-exec-should-monitor-how-to-calculate-them/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  6.93it/s]


[108/127] ✓ Fetched https://www.someka.net/blog/quality-kpis/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  3.93it/s]


[109/127] ✓ Fetched https://safetyculture.com/topics/quality-metrics/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  9.67it/s]


[110/127] ✓ Fetched https://bscdesigner.com/quality-kpis.htm


Fetching pages: 100%|##########| 1/1 [00:04<00:00,  4.13s/it]


[111/127] ✓ Fetched https://jurnalindustri.petra.ac.id/index.php/ind/article/view/22504


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  2.75it/s]


[112/127] ✓ Fetched https://ecommons.udayton.edu/cgi/viewcontent.cgi?article=2741&context=stander_posters


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.42it/s]


[113/127] ✓ Fetched https://sustain.ubc.ca/sites/default/files/seedslibrary/EcoFriendlyOfficeSupplies_Group+1.pdf


Fetching pages: 100%|##########| 1/1 [00:01<00:00,  1.14s/it]


[114/127] ✓ Fetched https://www.ctemag.com/news/pencil-sketch-high-tech-automation


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  5.66it/s]


[115/127] ✓ Fetched https://electrical-engineering-portal.com/automation-migration-strategy-in-3-phases


Fetching pages:   0%|          | 0/1 [00:00<?, ?it/s]Failed to decode content from https://www.makino.com/getmedia/d3ed3d7d-663c-4a28-936f-805ece30f742/The-Roadmap-to-the-Five-Levels-of-Manufacturing-Automation-(2020)-(1).pdf?ext=.pdf
Fetching pages: 100%|##########| 1/1 [00:00<00:00,  4.26it/s]


[116/127] ✓ Fetched https://www.makino.com/getmedia/d3ed3d7d-663c-4a28-936f-805ece30f742/The-Roadmap-to-the-Five-Levels-of-Manufacturing-Automation-(2020)-(1).pdf?ext=.pdf


Fetching pages: 100%|##########| 1/1 [00:02<00:00,  2.91s/it]


[117/127] ✓ Fetched https://www.ajiet.edu.in/img/mech/18+scheme/CADM-18ME72.pdf


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  3.23it/s]


[118/127] ✓ Fetched https://prophecyiot.com/digital-thread/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.62it/s]


[119/127] ✓ Fetched https://www.microsoft.com/en-us/industry/blog/manufacturing-and-mobility/2025/03/13/unlocking-the-future-of-manufacturing-with-ai-powered-digital-thread/


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.48it/s]


[120/127] ✓ Fetched https://www.ibm.com/think/topics/digital-thread-vs-digital-twin


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  2.23it/s]


[121/127] ✓ Fetched https://www.plataine.com/application/digital-thread/


Fetching pages:   0%|          | 0/1 [00:00<?, ?it/s]Failed to decode content from https://productiondigitalthread.com/wp-content/uploads/2024/04/Digital-Thread-finalwhitepapers_jan2024.pdf
Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.76it/s]


[122/127] ✓ Fetched https://productiondigitalthread.com/wp-content/uploads/2024/04/Digital-Thread-finalwhitepapers_jan2024.pdf


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.06it/s]


[123/127] ✓ Fetched https://thorntonandlowe.com/how-to-write-an-implementation-plan/


Fetching pages:   0%|          | 0/1 [00:00<?, ?it/s]Failed to decode content from https://nzbef.org.nz/wp-content/uploads/2019/05/Guide-Lean-Implementation-Steps-Plan-Roadmap-Timeline.pdf
Fetching pages: 100%|##########| 1/1 [00:02<00:00,  2.43s/it]


[124/127] ✓ Fetched https://nzbef.org.nz/wp-content/uploads/2019/05/Guide-Lean-Implementation-Steps-Plan-Roadmap-Timeline.pdf


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.91it/s]


[125/127] ✓ Fetched https://www.officetimeline.com/blog/best-examples-of-timelines-gantt-charts-and-roadmaps-for-the-manufacturing-industry


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  2.64it/s]


[126/127] ✓ Fetched https://www.projectmanager.com/blog/tips-for-project-roadmap


Fetching pages: 100%|##########| 1/1 [00:00<00:00,  9.13it/s]


[127/127] ✓ Fetched https://www.smartsheet.com/content/implementation-plan-templates

Completed: 127 succeeded, 0 failed out of 127 URLs.



### Convert fetched HTML into LangChain Documents

In [13]:
embedder = SentenceTransformerEmbeddings(
    model_name="all-MiniLM-L6-v2",
    model_kwargs={"device": "cpu"}
)

vectordb_dynamic = FAISS.from_documents(
    documents=ddb,    # your list of Document chunks
    embedding=embedder
)

## Create Retrievers

In [14]:
class CombinedRetriever(BaseRetriever):
    retrievers: List[BaseRetriever]
    k: int = 5

    def get_relevant_documents(self, query: str) -> List[Document]:
        docs: List[Document] = []
        for r in self.retrievers:
            docs.extend(r.get_relevant_documents(query))
        return docs[: self.k]


dynamic_ret = vectordb_dynamic.as_retriever(
    search_kwargs={"k": 30}
)

combined_retriever = CombinedRetriever(
    retrievers=[static_ret, dynamic_ret],
    k=12
)


  class CombinedRetriever(BaseRetriever):


## Ask the LLM

Finally, prompt the LLM with the final query

### Final Prompt

In [None]:
# Prompt 1: Sections 1–3
first_prompt = PromptTemplate.from_template(textwrap.dedent("""
You are a senior manufacturing engineer.

Use the **context** block (course‐slide excerpts + web snippets/URLs) as your only slide‐based source.
Let the slides define structure, emphasis, and terminology—shape any web facts **only** to illustrate 
slide‐prescribed methods, metrics, or priorities. Do not stray outside slide guidance.

Slides commonly cited:
  • Lecture 1 – course intro & final-project guidance  
  • Lecture 4 – sustainable-manufacturing overview, 6 R’s  
  • Lecture 5 – unit-process KPIs & material I/O tables  

**Write these three sections (≈500 words each), each starting exactly with `## <heading>`:**

## Executive Summary & Object Overview  
  • …

## Manufacturing Automation & Process Classification  
  • …

## Unit Manufacturing Processes & Workflow Mapping  
  • …

Formatting rules  
• Start each section with `## <heading>` exactly.  
• Use bullets under each heading.  
• Cite every factual claim: `(Lecture #)` or `(URL n)`.  
• Never invent data or sources.

---
CONTEXT
{context}
---

Question → "{question}"

Answer:
"""))

# Prompt 2: Sections 4–6
second_prompt = PromptTemplate.from_template(textwrap.dedent("""
You are a senior manufacturing engineer.

Use the **context** block (course‐slide excerpts + web snippets/URLs) only.
Shape web-sourced details **only** to support slide-defined frameworks.

Key slides:
  • Lecture 2 & 3 – additive & traditional processes  
  • Lecture 4 – sustainability & 6 R’s  
  • Lecture 5 – UMP KPIs & I/O tables  

**Write these three sections (≈500 words each), each starting exactly with `## <heading>`:**

## Additive vs. Subtractive (Layer-by-Layer) Manufacturing Techniques  
  • …

## Material Selection, Eco-Alternatives & the 6-R Philosophy  
  • …

## Sustainable Manufacturing & Life-Cycle Assessment (LCA)  
  • …

Formatting rules as above.
---
CONTEXT
{context}
---

Question → "{question}"

Answer:
"""))

# Prompt 3: Sections 7–9
third_prompt = PromptTemplate.from_template(textwrap.dedent("""
You are a senior manufacturing engineer.

Rely only on the **context** block (slides + URLs).  Integrate web snippets **only** 
to exemplify slide-prescribed methods or metrics.

Key slides:
  • Lecture 6 & 7 – robotics & automation levels  
  • Lecture 8 – simulation fundamentals  
  • Lecture 9 & 10 – digital/thread & network-centric frameworks  

**Write these three sections (≈500 words each), each starting exactly with `## <heading>`:**

## Industrial Robotics, Tooling Strategy & Automation Migration  
  • …

## Digital & Smart-Manufacturing Enablers & IoT Integration  
  • …

## Information Modeling, Digital Thread & Network-Centric Manufacturing  
  • …

Formatting rules as above.
---
CONTEXT
{context}
---

Question → "{question}"

Answer:
"""))

# Prompt 4: Sections 10–12
fourth_prompt = PromptTemplate.from_template(textwrap.dedent("""
You are a senior manufacturing engineer.

Use the **context** block (slides + URLs) exclusively.  Let slides drive structure;
use web details **only** to reinforce slide-defined validation, costing, or roadmap.

Key slides:
  • Lecture 8 – DES rationale  
  • Lecture 5 & 7 – KPIs, gauge R&R  
  • Lecture 4 & 10 – life-cycle costing & roadmap actions  

**Write these three sections (≈500 words each), each starting exactly with `## <heading>`:**

## Discrete-Event Simulation & Virtual Validation  
  • …

## Quality & Performance Metrics, Gauging R & R & Validation Methods  
  • …

## Environmental & Cost Impact Analysis & Implementation Roadmap  
  • …

Formatting rules as above.
---
CONTEXT
{context}
---

Question → "{question}"

Answer:
"""))

from typing import List, Tuple

section_batches: List[Tuple[str, PromptTemplate, str]] = [
    (
        "Sections 1-3",
        first_prompt,
        "\n".join([
            "Executive Summary & Object Overview",
            "Manufacturing Automation & Process Classification",
            "Unit Manufacturing Processes & Workflow Mapping",
            "",
            f"Object description: {object_desc}"
        ])
    ),
    (
        "Sections 4-6",
        second_prompt,
        "\n".join([
            "Additive vs. Subtractive (Layer-by-Layer) Manufacturing Techniques",
            "Material Selection, Eco-Alternatives & the 6-R Philosophy",
            "Sustainable Manufacturing & Life-Cycle Assessment (LCA)",
            "",
            f"Object description: {object_desc}"
        ])
    ),
    (
        "Sections 7-9",
        third_prompt,
        "\n".join([
            "Industrial Robotics, Tooling Strategy & Automation Migration",
            "Digital & Smart-Manufacturing Enablers & IoT Integration",
            "Information Modeling, Digital Thread & Network-Centric Manufacturing",
            "",
            f"Object description: {object_desc}"
        ])
    ),
    (
        "Sections 10-12",
        fourth_prompt,
        "\n".join([
            "Discrete-Event Simulation & Virtual Validation",
            "Quality & Performance Metrics, Gauging R & R & Validation Methods",
            "Environmental & Cost Impact Analysis & Implementation Roadmap",
            "",
            f"Object description: {object_desc}"
        ])
    ),
]


def run_batch(
    prompt_tpl: PromptTemplate,
    question: str
) -> str:
    qa = RetrievalQA.from_chain_type(
        llm=llm,
        retriever=combined_retriever,
        chain_type="stuff",
        chain_type_kwargs={
            "prompt": prompt_tpl,
            "document_variable_name": "context",
        },
        input_key="question",
        return_source_documents=False,
    )
    out = qa({"question": question})
    return out["result"] if isinstance(out, dict) else out

def clean_to_first_h2(md: str) -> str:
    idx = md.find("## ")
    return md[idx:].strip() if idx != -1 else md.strip()

report_sections: Dict[str,str] = {}
for batch_name, prompt_tpl, question in section_batches:
    print(f"Generating {batch_name}…")
    raw = run_batch(prompt_tpl, question)
    report_sections[batch_name] = clean_to_first_h2(raw)

In [None]:
with open("manufacturing_report.md", "w") as md:
    for key in ["Sections 1–3", "Sections 4–6", "Sections 7–9", "Sections 10–12"]:
        md.write(report_sections[key] + "\n\n")

print("✅ Done writing manufacturing_report.md")

### Executive Overview & Sustainability Foundations

In [113]:
first_prompt = PromptTemplate.from_template("""
You are a senior manufacturing engineer.

Use ONLY the information in the **context** block (course-slide excerpts
and web snippets/URLs) to draft the first three sections of a manufacturing
report.  Let the slides guide the emphasis; enrich with web details when the
context includes them.

┌───────────────────────────────────────────────────────────────┐
│  Slides commonly cited                                        │
│  • Lecture 1  – course intro & final-project guidance         │
│  • Lecture 4  – sustainable-manufacturing overview, 6 R’s     │
│  • Lecture 5  – unit-process KPIs & material I/O tables       │
└───────────────────────────────────────────────────────────────┘

**Write these three sections — ~500 words EACH — and label them exactly:**

1. Executive summary / Object overview  
2. Sustainability & life-cycle considerations  
3. Material selection & eco-alternatives  

Formatting rules  
• Start each section with `## <section title>` (Markdown H-2).  
• Inline cite every factual claim: `(Lecture #)` for slides, `(URL n)` for
  web snippets where *n* is the URL’s position in the context list.  
• If a necessary fact is missing, omit it — never invent content or sources.

---
CONTEXT
{context}
---

Question → "{question}"

Answer:
""")

question = (
    "Task: Draft a detailed section 1,2,3 sections of a manufacturing guide (≈ 1000 words)\n\n"
    "1. **Executive summary / Object overview**\n\n"
    "2. **Sustainability & life-cycle considerations**\n\n"
    "3. **Material selection & eco-alternatives**\n\n"
    "Ground rules:\n"
    "1. The **Object description** below is the object being manufactured.\n"
    "2. All other statements MUST be supported by the CONTEXT block.\n"
    "3. Do not fabricate data or references. \n\n"
    f"Object description: {object_desc}"
)


qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=combined_retriever,
    chain_type="stuff",
    chain_type_kwargs={
        "prompt": first_prompt,
        "document_variable_name": "context",
    },
    input_key="question",          # Match the prompt
    return_source_documents=True,
)

result = qa_chain({"question": question})

In [115]:
print(result["result"])

Okay, here's a draft of the three sections you requested, based on the provided context. I've aimed for approximately 1000 words total, prioritizing detail and accuracy based on the provided sources.  I've included citations at the end for easy reference. **Please read the notes at the very end of this response – they are crucial for understanding the limitations and assumptions made in this draft.**

---

**1. Executive Summary / Object Overview: The Rubik's Cube – A Manufacturing Perspective**

The Rubik's Cube, initially known as the Magic Cube, is a globally recognized mechanical puzzle invented by Hungarian architect Ernő Rubik in 1974. It quickly transcended its origins as a teaching tool, becoming a cultural icon and a symbol of problem-solving and ingenuity. This manufacturing guide details the production process of a standard 3x3x3 Rubik’s Cube, focusing on material selection, assembly, and emerging sustainability considerations.  The core challenge in Rubik's Cube manufacturi

### Process Flow, Automation & Digital Enablers

In [78]:
second_prompt = PromptTemplate.from_template("""
You are a senior manufacturing engineer.

Using ONLY the **context** block (slide excerpts and web snippets/URLs),
draft **Sections 4-6** of the manufacturing report.  Prioritise the listed
lectures when choosing evidence; add web details only if they appear in the
context.

┌───────────────────────────────────────────────────────────────┐
│  Key slide sources for these sections                         │
│  • Lectures 2-3 – additive & traditional process chains        │
│  • Lecture 5  – unit-process stage/KPI charts                  │
│  • Lecture 7  – industrial-robot anatomy, automation levels    │
│  • Lecture 1  – automation-level pyramid (overview)            │
│  • Lecture 9  – digital-manufacturing information models       │
│  • Lecture 10 – network-centric / smart-manufacturing decks    │
└───────────────────────────────────────────────────────────────┘

**Write the following sections — target ≈ 500 words EACH — with headings:**

4. Manufacturing-process flow  
   • Provide a layer-by-layer or unit-operation flow chart description.  
   • Include placeholders for cycle-time/takt-time where slides give cues.

5. Tooling, automation & industrial robotics  
   • Bill-of-tooling and robot-cell layout derived from Lecture 7 diagrams.  
   • Map each tool/robot to the step in Section 4; outline an automation-migration strategy.

6. Digital & smart-manufacturing enablers  
   • Summarise digital-thread architecture and data-interoperability standards  
     (ISA-95, RAMI 4.0) referenced in Lectures 9 & 10.  
   • List IoT sensors or MES data points that close the loop.

Formatting rules  
• Begin every section with `## <section title>` (Markdown H-2).  
• Inline-cite every factual claim: `(Lecture #)` for slides, `(URL n)` for
  web snippets where *n* is the order of the URL in the context list.  
• If the context lacks specific details, leave them blank or mark “TBD”
  — never invent numbers, standards, or sources.

---
CONTEXT
{context}
---

Question → "{question}"

Answer:
""")

question = (
    "Task: Draft a detailed section 4,5,6 of a manufacturing guide (≈ 1000 words)\n\n"
    "4. **Manufacturing-process flow**  \n\n"
    "5. **Tooling, automation & industrial robotics**  \n\n"
    "6. **Digital & smart-manufacturing enablers**  \n\n"
    "Ground rules:\n"
    "1. The **Object description** below is the object being manufactured.\n"
    "2. All other statements MUST be supported by the CONTEXT block.\n"
    "3. Do not fabricate data or references. \n\n"
    f"Object description: {object_desc}"
)


qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=combined_retriever,
    chain_type="stuff",
    chain_type_kwargs={
        "prompt": second_prompt,
        "document_variable_name": "context",
    },
    input_key="question",          #  <<< match the prompt
    return_source_documents=True,
)

result = qa_chain({"question": question})


In [79]:
print(result["result"])

## Section 4: Manufacturing Process Flow – Rubik’s Cube Production

The production of a Rubik’s Cube is a surprisingly complex process, involving multiple stages of molding, assembly, and finishing. This section details the key steps involved, drawing directly from the provided context.

**4.1 Molding the Core & Center Cubes:**

The foundation of the Rubik’s Cube is the nylon core, followed by the ABS center cubes. The process begins with the injection molding of the nylon core. The context highlights the importance of material shrinkage, stating, “Different plastics will have a different shrink rate, and each tool must be specifically designed for the material that will be used.” This emphasizes the precision required to ensure consistent cube dimensions. The molding process utilizes ABS (Acrylonitrile Butadiene Styrene) for the center cubes, again acknowledging the need for tailored tooling based on material properties. The process involves injecting molten ABS into two-piece molds, 

### Simulation, Metrics & Smart-Manufacturing Roadmap

In [80]:
third_prompt = PromptTemplate.from_template("""
You are a senior manufacturing engineer.

Rely exclusively on the **context** block (course-slide excerpts + web snippets/URLs)
to draft **Sections 7-10** of the manufacturing report.  Give priority to the lecture
slides noted below; supplement with web data only when present in context.

┌────────────────────────────────────────────────────────────────────────┐
│  Slide sets most relevant to these sections                           │
│  • Lecture 8  – simulation in manufacturing                            │
│  • Lecture 5  – KPI hierarchy & unit-process metrics                   │
│  • Lecture 4  – life-cycle costing & environmental impact              │
│  • Lecture 10 – smart-manufacturing “ten priority actions”             │
└────────────────────────────────────────────────────────────────────────┘

**Write the following sections – aim for ≈ 400–500 words EACH – and label them
exactly as shown.  Start each with `## <section title>` (Markdown H-2).**

7. Simulation & virtual validation  
   • Explain the rationale for discrete-event simulation.  
   • Sketch a SIM-model swim-lane or text diagram.  
   • List KPIs to test and add a bullet list of risks/benefits.

8. Quality & performance metrics  
   • Build a three-tier KPI table (process, product, sustainability) based on Lecture 5.  
   • Mention gauge R&R or similar validation where cited.

9. Environmental & cost impact  
   • Provide an energy/CO₂-per-unit chart description and a cost-break-down structure.  
   • Summarise key life-cycle-cost drivers and note “win-win” trade-offs.

10. Implementation roadmap / Smart-mfg priorities  
    • Draft a phased Gantt-style narrative with migration milestones.  
    • Align each phase to the “ten priority actions” slide (Lecture 10).  
    • Include a brief stakeholder-responsibility matrix.

Citation rules  
• After every factual statement, cite its source: `(Lecture #)` or `(URL n)`  
  where *n* is the URL’s order in CONTEXT.  
• If a detail is missing, mark “TBD” — do **not** invent data or sources.

---
CONTEXT
{context}
---

Question → "{question}"

Answer:
""")

question = (
    "Task: Draft a detailed section 7,8,9,10 of a manufacturing guide (≈ 1000 words)\n\n"
    "7. **Simulation & virtual validation**   \n\n"
    "8. **Quality & performance metrics**  \n\n"
    "9. **Environmental & cost impact**  \n\n"
    "10. **Implementation roadmap / Smart-mfg priorities**  \n\n"
    "Ground rules:\n"
    "1. The **Object description** below is the object being manufactured.\n"
    "2. All other statements MUST be supported by the CONTEXT block.\n"
    "3. Do not fabricate data or references. \n\n"
    f"Object description: {object_desc}"
)

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=combined_retriever,
    chain_type="stuff",
    chain_type_kwargs={
        "prompt": third_prompt,
        "document_variable_name": "context",
    },
    input_key="question",          #  <<< match the prompt
    return_source_documents=True,
)

result = qa_chain({"question": question})


In [81]:
print(result["result"])

Okay, here’s a draft of sections 7-10 of a manufacturing guide for Rubik's Cubes, aiming for approximately 1000 words and incorporating the provided context.

---

**Manufacturing Guide: Rubik’s Cube Production**

**Section 7: Simulation & Virtual Validation (Approx. 250 words)**

Before commencing physical production, a rigorous simulation and virtual validation phase is crucial. This phase leverages CAD (Computer-Aided Design) software and finite element analysis (FEA) to identify potential weaknesses and optimize the cube’s structural integrity. The simulation process begins with a detailed 3D model of each component – the core, the side pieces, and the stickers – generated from the provided context.  Specifically, the simulation will focus on stress distribution under rotational forces, mimicking the cube's intended use. 

The simulation will test various rotational speeds and forces, mirroring the expected user experience.  The context highlights the use of Nylon for the core, ABS