Replies: 1 comment
-
|
0xa9 as the copyright character sounds like a windows character set value, probably Windows-1252. That is why the utf8 parser blew up. For your amusement here is a discussion of discovering the character set in use. |
Beta Was this translation helpful? Give feedback.
Uh oh!
There was an error while loading. Please reload this page.
-
Amazing project btw!!
I reduced the problem down to a file containing just the copyright character (which tradingview adds to all my code... I wanted to index all my code into the AI)
Here is the dump:
Traceback (most recent call last):
File "C:\Users\cave7\AppData\Local\Programs\Python\Python310\lib\site-packages\langchain\document_loaders\text.py", line 41, in
load
text = f.read()
File "C:\Users\cave7\AppData\Local\Programs\Python\Python310\lib\codecs.py", line 322, in decode
(result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa9 in position 114: invalid start byte
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\cave7\AppData\Local\Programs\Python\Python310\lib\multiprocessing\pool.py", line 125, in worker
result = (True, func(*args, **kwds))
File "C:\Users\cave7\Downloads\privateGPT\ingest.py", line 89, in load_single_document
return loader.load()[0]
File "C:\Users\cave7\AppData\Local\Programs\Python\Python310\lib\site-packages\langchain\document_loaders\text.py", line 54, in
load
raise RuntimeError(f"Error loading {self.file_path}") from e
RuntimeError: Error loading source_documents\indicator_BB THRUST_70.txt
"""
The above exception was the direct cause of the following exception:
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ C:\Users\cave7\Downloads\privateGPT\ingest.py:167 in │
│ │
│ 164 │
│ 165 │
│ 166 if name == "main": │
│ ❱ 167 │ main() │
│ 168 │
│ │
│ C:\Users\cave7\Downloads\privateGPT\ingest.py:157 in main │
│ │
│ 154 │ else: │
│ 155 │ │ # Create and store locally vectorstore │
│ 156 │ │ print("Creating new vectorstore") │
│ ❱ 157 │ │ texts = process_documents() │
│ 158 │ │ print(f"Creating embeddings. May take some minutes...") │
│ 159 │ │ db = Chroma.from_documents(texts, embeddings, persist_directory=persist_director │
│ 160 │ db.persist() │
│ │
│ C:\Users\cave7\Downloads\privateGPT\ingest.py:119 in process_documents │
│ │
│ 116 │ Load documents and split in chunks │
│ 117 │ """ │
│ 118 │ print(f"Loading documents from {source_directory}") │
│ ❱ 119 │ documents = load_documents(source_directory, ignored_files) │
│ 120 │ if not documents: │
│ 121 │ │ print("No new documents to load") │
│ 122 │ │ exit(0) │
│ │
│ C:\Users\cave7\Downloads\privateGPT\ingest.py:108 in load_documents │
│ │
│ 105 │ with Pool(processes=os.cpu_count()) as pool: │
│ 106 │ │ results = [] │
│ 107 │ │ with tqdm(total=len(filtered_files), desc='Loading new documents', ncols=80) as │
│ ❱ 108 │ │ │ for i, doc in enumerate(pool.imap_unordered(load_single_document, filtered_f │
│ 109 │ │ │ │ results.append(doc) │
│ 110 │ │ │ │ pbar.update() │
│ 111 │
│ │
│ C:\Users\cave7\AppData\Local\Programs\Python\Python310\lib\multiprocessing\pool.py:873 in next │
│ │
│ 870 │ │ success, value = item │
│ 871 │ │ if success: │
│ 872 │ │ │ return value │
│ ❱ 873 │ │ raise value │
│ 874 │ │
│ 875 │ next = next # XXX │
│ 876 │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
RuntimeError: Error loading source_documents\indicator_BB THRUST_70.txt
PS C:\Users\cave7\Downloads\privateGPT> python ingest.py
Creating new vectorstore
Loading documents from source_documents
Loading new documents: 0%| | 0/1 [00:08<?, ?it/s]
RemoteTraceback:
"""
Traceback (most recent call last):
File "C:\Users\cave7\AppData\Local\Programs\Python\Python310\lib\site-packages\langchain\document_loaders\text.py", line 41, in
load
text = f.read()
File "C:\Users\cave7\AppData\Local\Programs\Python\Python310\lib\codecs.py", line 322, in decode
(result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa9 in position 0: invalid start byte
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\cave7\AppData\Local\Programs\Python\Python310\lib\multiprocessing\pool.py", line 125, in worker
result = (True, func(*args, **kwds))
File "C:\Users\cave7\Downloads\privateGPT\ingest.py", line 89, in load_single_document
return loader.load()[0]
File "C:\Users\cave7\AppData\Local\Programs\Python\Python310\lib\site-packages\langchain\document_loaders\text.py", line 54, in
load
raise RuntimeError(f"Error loading {self.file_path}") from e
RuntimeError: Error loading source_documents\indicator_BB THRUST_70.txt
"""
The above exception was the direct cause of the following exception:
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ C:\Users\cave7\Downloads\privateGPT\ingest.py:167 in │
│ │
│ 164 │
│ 165 │
│ 166 if name == "main": │
│ ❱ 167 │ main() │
│ 168 │
│ │
│ C:\Users\cave7\Downloads\privateGPT\ingest.py:157 in main │
│ │
│ 154 │ else: │
│ 155 │ │ # Create and store locally vectorstore │
│ 156 │ │ print("Creating new vectorstore") │
│ ❱ 157 │ │ texts = process_documents() │
│ 158 │ │ print(f"Creating embeddings. May take some minutes...") │
│ 159 │ │ db = Chroma.from_documents(texts, embeddings, persist_directory=persist_director │
│ 160 │ db.persist() │
│ │
│ C:\Users\cave7\Downloads\privateGPT\ingest.py:119 in process_documents │
│ │
│ 116 │ Load documents and split in chunks │
│ 117 │ """ │
│ 118 │ print(f"Loading documents from {source_directory}") │
│ ❱ 119 │ documents = load_documents(source_directory, ignored_files) │
│ 120 │ if not documents: │
│ 121 │ │ print("No new documents to load") │
│ 122 │ │ exit(0) │
│ │
│ C:\Users\cave7\Downloads\privateGPT\ingest.py:108 in load_documents │
│ │
│ 105 │ with Pool(processes=os.cpu_count()) as pool: │
│ 106 │ │ results = [] │
│ 107 │ │ with tqdm(total=len(filtered_files), desc='Loading new documents', ncols=80) as │
│ ❱ 108 │ │ │ for i, doc in enumerate(pool.imap_unordered(load_single_document, filtered_f │
│ 109 │ │ │ │ results.append(doc) │
│ 110 │ │ │ │ pbar.update() │
│ 111 │
│ │
│ C:\Users\cave7\AppData\Local\Programs\Python\Python310\lib\multiprocessing\pool.py:873 in next │
│ │
│ 870 │ │ success, value = item │
│ 871 │ │ if success: │
│ 872 │ │ │ return value │
│ ❱ 873 │ │ raise value │
│ 874 │ │
│ 875 │ next = next # XXX │
│ 876 │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
RuntimeError: Error loading source_documents\indicator_BB THRUST_70.txt
Contents of "indicator_BB THRUST_70.txt" is just a copyright character which is automatically added to all my source code on tradingview. I wanted to index all my scripts so I could use it in the AI.
Contents of file:
©
Beta Was this translation helpful? Give feedback.
All reactions