# Document Loaders and Integrations

In [1]:
import openai
import os 
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI

In [2]:
os.environ['OPENAI_API_KEY'] = ''

In [3]:
openai.api_key = os.getenv('OPENAI_API_KEY')

In [4]:
llm = ChatOpenAI()

In [5]:
from langchain.document_loaders.csv_loader import CSVLoader

In [6]:
loader = CSVLoader(file_path="science_fiction.txt")

data = loader.load()

In [7]:
print(data)

[Document(page_content='Book: Dune\nAuthor: Frank Herbert', metadata={'source': 'science_fiction.txt', 'row': 0}), Document(page_content="Book: Ender's Game\nAuthor: Orson Scott Card", metadata={'source': 'science_fiction.txt', 'row': 1}), Document(page_content='Book: Foundation\nAuthor: by Isaac Asimov', metadata={'source': 'science_fiction.txt', 'row': 2}), Document(page_content='Book: Neuromancer\nAuthor: William Gibson', metadata={'source': 'science_fiction.txt', 'row': 3}), Document(page_content='Book: The Left Hand of Darkness\nAuthor: Ursula K. Le Guin', metadata={'source': 'science_fiction.txt', 'row': 4}), Document(page_content='Book: Snow Crash\nAuthor: Neal Stephenson', metadata={'source': 'science_fiction.txt', 'row': 5}), Document(page_content='Book: Hyperion\nAuthor: Dan Simmons', metadata={'source': 'science_fiction.txt', 'row': 6}), Document(page_content='Book: The Stars My Destination\nAuthor: Alfred Bester', metadata={'source': 'science_fiction.txt', 'row': 7}), Docum

In [8]:
print(data[0])

page_content='Book: Dune\nAuthor: Frank Herbert' metadata={'source': 'science_fiction.txt', 'row': 0}


In [9]:
print(data[1])

page_content="Book: Ender's Game\nAuthor: Orson Scott Card" metadata={'source': 'science_fiction.txt', 'row': 1}


In [10]:
print(data[0].page_content)

Book: Dune
Author: Frank Herbert


In [None]:
# pip install unstructured

In [11]:
from langchain.document_loaders import UnstructuredHTMLLoader

In [12]:
loader = UnstructuredHTMLLoader("simple.html")

data = loader.load()

In [13]:
data

[Document(page_content='About This Page\n\nThis is a basic HTML page created by ChatGPT. You can customize it further to suit your needs.', metadata={'source': 'simple.html'})]

In [14]:
print(data[0].page_content)

About This Page

This is a basic HTML page created by ChatGPT. You can customize it further to suit your needs.


In [None]:
# pip install pypdf

In [18]:
from langchain.document_loaders import PyPDFLoader

In [19]:
loader = PyPDFLoader("simple.pdf")

data = loader.load()

In [20]:
data

[Document(page_content=' A Simple PDF File \n This is a small demonstration .pdf file - \n just for use in the Virtual Mechanics tutorials. More text. And more \n text. And more text. And more text. And more text. \n And more text. And more text. And more text. And more text. And more \n text. And more text. Boring, zzzzz. And more text. And more text. And \n more text. And more text. And more text. And more text. And more text. \n And more text. And more text. \n And more text. And more text. And more text. And more text. And more \n text. And more text. And more text. Even more. Continued on page 2 ...', metadata={'source': 'simple.pdf', 'page': 0}),
 Document(page_content=' Simple PDF File 2 \n ...continued from page 1. Yet more text. And more text. And more text. \n And more text. And more text. And more text. And more text. And more \n text. Oh, how boring typing this stuff. But not as boring as watching \n paint dry. And more text. And more text. And more text. And more text. \n 

In [21]:
print(data[0].page_content)

 A Simple PDF File 
 This is a small demonstration .pdf file - 
 just for use in the Virtual Mechanics tutorials. More text. And more 
 text. And more text. And more text. And more text. 
 And more text. And more text. And more text. And more text. And more 
 text. And more text. Boring, zzzzz. And more text. And more text. And 
 more text. And more text. And more text. And more text. And more text. 
 And more text. And more text. 
 And more text. And more text. And more text. And more text. And more 
 text. And more text. And more text. Even more. Continued on page 2 ...


In [None]:
# pip install pytube

In [None]:
# pip install youtube-transcript-api

In [23]:
from langchain.document_loaders import YoutubeLoader

In [24]:
loader = YoutubeLoader.from_youtube_url(
    "https://www.youtube.com/watch?v=1LR6NPpFxw4", add_video_info=True
)

data = loader.load()

In [25]:
print(data[0].page_content)

sixty minutes rewind captain hopper is a whiz at mathematics some would say a genius one of that small band of brothers and sisters who ushered in the computer revolution in World War two at age 37 she left her professor's job at Vassar to serve as a lieutenant in the Navy Reserve she was sent to Harvard to help program the very first computer it had the unglamorous name of mark 1 and as far as we've progressed since this vacuum tube monster captain hopper says we ain't seen nothing yet you talk a lot about the computer revolution I thought we're in it and it's over no we're only at the beginning we've been through the preliminaries well what's it gonna be well we've got the Model T just the Model T that's where we are now she's up many mornings before 5:00 for the ride to Washington's National Airport 200 days a year she lectures two computer scientists at military bases she's also in demand on college campuses and among businessmen the Navy's proud to show off one of theirs who was i

In [26]:
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate

In [27]:
human_prompt = HumanMessagePromptTemplate.from_template("Summarize the following: \n{youtube_transcript}")

In [28]:
chat_prompt = ChatPromptTemplate.from_messages([human_prompt])

In [29]:
llm=ChatOpenAI()

In [30]:
result = llm(chat_prompt.format_prompt(youtube_transcript=data[0].page_content).to_messages())

In [31]:
print(result.content)

The article discusses the accomplishments and contributions of Grace Hopper, a mathematician and computer scientist who played a major role in the computer revolution. Hopper left her job as a professor at Vassar to join the Navy Reserve, where she helped program the first computer, called the Mark 1. She believes that the computer revolution is only in its beginning stages and that there is much more progress to be made. Hopper is highly respected in the field and is sought after for lectures and presentations. She is known for teaching computers to understand human language and for her work on increasing computational speed. The article also mentions Hopper's role in the Navy and her pride in serving in that branch.
