In [1]:
from dotenv import load_dotenv
load_dotenv()

import warnings
warnings.simplefilter("ignore", ResourceWarning)

# Vectrix Demo 👨🏻‍💻
This notebook demonstrates the functions for importing data from various sources. 
Loading it into a VectorStore, and then using it to answer questions with a Retrieval Augemented Reasoning  🦜🔗 LangGraph.

## Creating a new project



In [4]:
from vectrix import DB
db = DB()

print(db.list_projects())
db.create_project("Test", description="This is a test project")

[32m2024-08-26 13:26:35,884 - DB - INFO - Added new project: Test[0m


[]


6

In [3]:
db.remove_project("Test")

[32m2024-08-26 13:26:34,150 - DB - INFO - Removed project: Test[0m


True

## Importing Data
### 1. From a URL 🔗

**Web Crawling and Data Extraction Example**


In [1]:
from vectrix.importers import WebScraper

scraper = WebScraper(project_name='Test')
all_links = scraper.get_all_links("https://example.com/")

[32m2024-08-26 13:28:16,114 - DB - INFO - Setting/updating scrape status for project: Test[0m
[32m2024-08-26 13:28:16,117 - DB - INFO - Created new status for base URL: https://example.com/[0m


Set/updated scrape status for base URL: https://example.com/


ERROR:trafilatura.downloads:not a 200 response: 404 for URL https://example.com/robots.txt
[32m2024-08-26 13:28:16,697 - DB - INFO - Adding links to confirm for base URL: https://example.com/[0m
INFO:DB:Adding links to confirm for base URL: https://example.com/
[32m2024-08-26 13:28:16,705 - DB - INFO - Setting/updating scrape status for project: Test[0m
INFO:DB:Setting/updating scrape status for project: Test
[32m2024-08-26 13:28:16,708 - DB - INFO - Updated existing status for base URL: https://example.com/[0m
INFO:DB:Updated existing status for base URL: https://example.com/


Added links to confirm for base URL: https://example.com/
Set/updated scrape status for base URL: https://example.com/


In [2]:
scraper.download_pages(all_links, project_name="Test")

[32m2024-08-26 13:28:24,477 - DB - INFO - Setting/updating scrape status for project: Test[0m
INFO:DB:Setting/updating scrape status for project: Test
[32m2024-08-26 13:28:24,479 - DB - INFO - Updated existing status for base URL: None[0m
INFO:DB:Updated existing status for base URL: None
[32m2024-08-26 13:28:24,481 - DB - INFO - Removing uploaded links for project name: Test[0m
INFO:DB:Removing uploaded links for project name: Test


Set/updated scrape status for base URL: None
Removed uploaded links for base URL: Test


[32m2024-08-26 13:28:24,994 - DB - INFO - Setting/updating scrape status for project: Test[0m
INFO:DB:Setting/updating scrape status for project: Test
[32m2024-08-26 13:28:24,996 - DB - INFO - Updated existing status for base URL: None[0m
INFO:DB:Updated existing status for base URL: None


Set/updated scrape status for base URL: None


### 2. Upload files ⬆️
You can also upload files and add them to the vector store, Vectrix will automaticly detect the file type extract the text and chunk the content into blocks.

In [None]:
from vectrix.importers import Files
files = Files()

files.upload_file(
    document_paths=["./files/pdf_with_scannedtext.pdf"],
    project_name="Test"
)

In [None]:
files = db.list_files("Test")
print(files)

In [None]:
db.remove_file(project_name="Test", file_name="pdf_with_scannedtext.pdf")