In [1]:
import requests

In [6]:
url = "https://api.nusmods.com/v2/2025-2026/moduleInfo.json"
data = requests.get(url)

In [7]:
data.status_code

200

In [8]:
len(data.json())

16456

In [10]:
import json
with open("module.json", "w") as outfile:
    json.dump(data.json(), outfile)

# Read and Filter Json

In [1]:
import json
with open("module.json", "r") as file:
    data = json.load(file)
len(data)

16456

In [13]:
computing_modules = [
    x for x in data
    if x['faculty'] == 'Computing' and int(''.join(filter(str.isdigit, x["moduleCode"]))) >= 4000
]


In [15]:
computing_modules[:5]

[{'moduleCode': 'AI5201',
  'title': 'Executive Mastery of Artificial Intelligence: Foundations and Strategies',
  'description': 'This course equips leaders with the essential knowledge of AI principles, technologies, and applications critical for strategic decision-making. This course covers AI and machine learning fundamentals, and the integration of AI with business strategies to drive innovation and efficiency. Designed for executives, it provides the insights needed to navigate the AI landscape, fostering a culture of innovation within organizations.',
  'moduleCredit': '4',
  'department': 'Information Systems and Analytics',
  'faculty': 'Computing',
  'workload': [2, 1, 0, 5, 2],
  'gradingBasisDescription': 'Graded',
  'semesterData': [{'semester': 2, 'covidZones': ['A']}]},
 {'moduleCode': 'AI5202',
  'title': 'Strategic Applications of Generative AI for Executives',
  'description': 'This course equips leaders with insights into the transformative power of generative artifi

# Adding into Langchain documents
Try without semester data first

In [16]:
from langchain.schema import Document

documents = []
for x in computing_modules:
    content = x["description"]
    metadata = {
        "modeuleCode": x["moduleCode"],
        "title": x["title"],
        "faculty": x["faculty"],
        "department": x["department"],
        "moduleCredit": x["moduleCredit"]
        # "semesterData" : x["semesterData"]
    }
    doc=Document(page_content=content, metadata = metadata)
    documents.append(doc)

In [17]:
len(documents)

308

In [18]:
from langchain.vectorstores import Chroma
# Try using huggingface embedding 
from langchain_huggingface import HuggingFaceEmbeddings

embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
vectorstore = Chroma.from_documents(documents, embedding, persist_directory="./computing_modules_index")

In [19]:
retriever = vectorstore.as_retriever()
results = retriever.get_relevant_documents("Which modules are about generative AI for business leaders?")

  results = retriever.get_relevant_documents("Which modules are about generative AI for business leaders?")


In [20]:
results

[Document(metadata={'faculty': 'Computing', 'department': 'Information Systems and Analytics', 'moduleCredit': '4', 'modeuleCode': 'IS4401', 'title': 'Generative AI and Business Applications'}, page_content='Generative Artificial Intelligence (generative AI) has the ability to create new content such as text, image audio and video. Generative AI thus possesses an immense potential for creating business innovations and improving productivity. This course teaches students the underpinnings of generative AI models and to understand how to evaluate and choose the most appropriate model to solve a real-world problem. More importantly, students will also learn how to create generative AI applications using various approaches such as training custom models, using pretrained models and working with APIs. The course will also expose students to various emerging business applications of generative AI as part of the solutioning process.'),
 Document(metadata={'moduleCredit': '4', 'department': 'I

In [21]:
retriever.batch(
    [
        "What module teacch graph analysis?"
    ],
)[0]

[Document(metadata={'modeuleCode': 'CS6101', 'department': 'Computer Science', 'moduleCredit': '4', 'faculty': 'Computing', 'title': 'Exploration of Computer Science Research'}, page_content='This course introduces CS graduate students to various research areas in Computer Science. Study groups are organised for major research areas. Each study group provides a forum for students to read, present and discuss research papers, and acquire the basic research skills for literature review and critical comparison of existing work. Students will also gain a first experience in technical presentation and writing. This course will be graded as “Completed Satisfactory” or “Completed Unsatisfactory” (CS/CU).'),
 Document(metadata={'faculty': 'Computing', 'title': 'Foundation in Theoretical CS', 'department': 'Computer Science', 'modeuleCode': 'CS5201', 'moduleCredit': '0'}, page_content='The purpose of this course is to test the students on basic concepts in theoretical computer science. In parti

# Whole batch of modules

In [5]:
from langchain.schema import Document

documents = []
for x in data:
    content = x["description"]
    metadata = {
        "modeuleCode": x["moduleCode"],
        "title": x["title"],
        "faculty": x["faculty"],
        "department": x["department"],
        "moduleCredit": x["moduleCredit"]
        # "semesterData" : x["semesterData"]
    }
    doc=Document(page_content=content, metadata = metadata)
    documents.append(doc)

In [7]:
documents[0]

Document(metadata={'modeuleCode': 'ABM5001', 'title': 'Leadership in Biomedicine', 'faculty': 'Yong Loo Lin Sch of Medicine', 'department': "NUS Medicine Dean's Office", 'moduleCredit': '2'}, page_content='Leadership is fundamental to the success of individuals and organizations. As you progress in your biomedicine career, you will have to lead individuals, teams and organizations. This course prepares you to lead, by equipping you with principles, skills and practices of leadership.')

In [9]:
from langchain.vectorstores import Chroma
# Try using huggingface embedding 
from langchain_huggingface import HuggingFaceEmbeddings

embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
vectorstore = Chroma.from_documents(documents, embedding, persist_directory="./nus_modules_index")

In [10]:
retriever = vectorstore.as_retriever()
results = retriever.get_relevant_documents("Which modules are about generative AI for business leaders?")

  results = retriever.get_relevant_documents("Which modules are about generative AI for business leaders?")


In [13]:
results

[Document(metadata={'moduleCredit': '4', 'department': 'Information Systems and Analytics', 'modeuleCode': 'IS4401', 'faculty': 'Computing', 'title': 'Generative AI and Business Applications'}, page_content='Generative Artificial Intelligence (generative AI) has the ability to create new content such as text, image audio and video. Generative AI thus possesses an immense potential for creating business innovations and improving productivity. This course teaches students the underpinnings of generative AI models and to understand how to evaluate and choose the most appropriate model to solve a real-world problem. More importantly, students will also learn how to create generative AI applications using various approaches such as training custom models, using pretrained models and working with APIs. The course will also expose students to various emerging business applications of generative AI as part of the solutioning process.'),
 Document(metadata={'modeuleCode': 'AI5202', 'title': 'St