Name: Vaidurya Samaga

SRN: PES2UG23CS668

1. Introduction and setup

In [None]:
!pip install transformers nltk torch


In [None]:
from transformers import pipeline, set_seed, GPT2Tokenizer

In [None]:
import os
import nltk

In [None]:
file_path='/content/unit 1.txt'

In [None]:
try:
  with open(file_path,'r',encoding="utf-8") as f:
    text=f.read()
  print("File loaded successfully!")
except FileNotFoundError:
  print(f"Error: {file_path} not found!")

In [None]:
print("--- Data Preview ---")
print(text[:500]+"...")

2. Generative AI: Dumb vs smart models

In [None]:
set_seed(42)

In [None]:
prompt="AI is a revolutionary technology"

In [None]:
fast_generator=pipeline('text-generation', model='distilgpt2')

output_fast=fast_generator(prompt,max_length=50,num_return_sequences=1)
print(output_fast[0]['generated_text'])

In [None]:
fast_generator=pipeline('text-generation', model='gpt2')

output_fast=fast_generator(prompt,max_length=50,num_return_sequences=1)
print(output_fast[0]['generated_text'])

3. NLP Fundamentals

In [None]:
#1tokenisation
tokenizer= GPT2Tokenizer.from_pretrained("gpt2")

In [None]:
sample_sentence="Transformers revolutionized NLP"

In [None]:
tokens=tokenizer.tokenize(sample_sentence)
print(f"Tokens:{tokens}")

In [None]:
#covert tokens to ids
token_ids=tokenizer.convert_tokens_to_ids(tokens)
print(f"Token IDs:{token_ids}")

POS Tagging

In [None]:
nltk.download('averaged_perceptron_tagger',quiet=True)
nltk.download('punkt',quiet=True)

In [None]:
import nltk
nltk.download('punkt_tab')


In [None]:
import nltk

nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('averaged_perceptron_tagger_eng')


In [None]:
pos_tags=nltk.pos_tag(nltk.word_tokenize(sample_sentence))
print(f"POS Tags:{pos_tags}")

NER

In [None]:
ner_pipeline=pipeline("ner",model="dbmdz/bert-large-cased-finetuned-conll03-english", aggregation_strategy="simple")

In [None]:
snippet=text[:1000]
entities=ner_pipeline(snippet)
for entity in entities:
  if entity['score']> 0.90:
    print(f"{entity['word']:<20} | {entity['entity_group']:<10} | {entity['score']:.2f}")

Advanced applications

In [None]:
transformer_section="""Machine Learning models learn from data in several fundamental ways, each suited to different types of problems.

Supervised Learning

In supervised learning, the model is trained on labeled data, meaning each data point is tagged with a correct output or label. The goal is to learn a mapping function that can predict the output for new, unseen data."""

Fast summarizer




In [None]:
fast_sum = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
res_fast = fast_sum(transformer_section, max_length=60, min_length=30, do_sample=False)
print(res_fast[0]['summary_text'])

Quality Summarizer


In [None]:
smart_sum = pipeline("summarization", model="facebook/bart-large-cnn")
res_smart = smart_sum(transformer_section, max_length=60, min_length=30, do_sample=False)
print(res_smart[0]['summary_text'])

In [None]:
qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")

In [None]:
questions = [
    "What is the fundamental innovation of the Transformer?",
    "What are the risks of using Generative AI?"
]

for q in questions:
    res = qa_pipeline(question=q, context=text[:5000])
    print(f"\nQ: {q}")
    print(f"A: {res['answer']}")

In [None]:
mask_filler = pipeline("fill-mask", model="bert-base-uncased")

In [None]:
masked_sentence = "The goal of Generative AI is to create new [MASK]."
preds = mask_filler(masked_sentence)

for p in preds:
    print(f"{p['token_str']}: {p['score']:.2f}")