In [1]:
from dotenv import load_dotenv
load_dotenv()

import warnings
warnings.simplefilter("ignore", ResourceWarning)

# Vectrix Demo 👨🏻‍💻
This notebook demonstrates the functions for importing data from various sources. 
Loading it into a VectorStore, and then using it to answer questions with a Retrieval Augemented Reasoning  🦜🔗 LangGraph.

## Creating a new project



In [2]:
from vectrix import DB
db = DB()

print(db.list_projects())
db.create_project("Test", description="This is a test project")

[32m2024-08-23 17:59:10,690 - DB - INFO - Added new project: Test[0m


[]


1

In [None]:
#db.remove_project("Test")

## Importing Data
### 1. From a URL 🔗

**Web Crawling and Data Extraction Example**


In [None]:
from vectrix.importers import WebScraper

scraper = WebScraper()
all_links = scraper.get_all_links("https://vectrix.ai")

In [None]:
scraper.download_pages(all_links, project_name="Test")

### 2. Upload files ⬆️
You can also upload files and add them to the vector store, Vectrix will automaticly detect the file type extract the text and chunk the content into blocks.

In [3]:
from vectrix.importers import Files
files = Files()

files.upload_file(
    document_paths=["./files/pdf_with_scannedtext.pdf"],
    project_name="Test"
)

[32m2024-08-23 17:59:33,623 - Files - INFO - Files class initialized.[0m
[32m2024-08-23 17:59:33,668 - Files - INFO - Processing files locally[0m
[32m2024-08-23 17:59:34,883 - Files - INFO - Processing file 1 of 1[0m
[32m2024-08-23 17:59:34,928 - DB - INFO - Added uploaded file: pdf_with_scannedtext.pdf[0m
[32m2024-08-23 17:59:34,928 - Files - INFO - Uploaded ./files/pdf_with_scannedtext.pdf to object storage[0m
[32m2024-08-23 17:59:54,160 - Files - INFO - Chunked the document into 19 parts[0m


In [3]:
files = db.list_files("Test")
print(files)

[32m2024-08-23 18:02:18,628 - DB - INFO - Listing uploaded files for project: Test[0m


[{'filename': 'pdf_with_scannedtext.pdf', 'signed_download_url': {'signedURL': 'http://127.0.0.1:54321/storage/v1/object/sign/Test/730f8eaf-d934-4f42-8253-cd3938ffa856.pdf?token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1cmwiOiJUZXN0LzczMGY4ZWFmLWQ5MzQtNGY0Mi04MjUzLWNkMzkzOGZmYTg1Ni5wZGYiLCJpYXQiOjE3MjQ0Mjg5MzgsImV4cCI6MTcyNDQzMjUzOH0.8LsER_rufL2NsO2EThd7K-dJKqHdcKGgfef1EObt1-M'}, 'creation_date': datetime.datetime(2024, 8, 23, 15, 59, 34, 926642, tzinfo=datetime.timezone.utc), 'update_date': datetime.datetime(2024, 8, 23, 15, 59, 34, 926653, tzinfo=datetime.timezone.utc)}]


In [4]:
db.remove_file(project_name="Test", file_name="pdf_with_scannedtext.pdf")

[32m2024-08-23 18:02:47,019 - DB - INFO - Removing uploaded file: pdf_with_scannedtext.pdf[0m
[32m2024-08-23 18:02:47,059 - DB - INFO - Removed uploaded file: pdf_with_scannedtext.pdf[0m
