Skip to content

Commit

Permalink
Merge pull request #299 from mdeweerd/elm_extended
Browse files Browse the repository at this point in the history
Add fallback for plain elm #294 #290
  • Loading branch information
imartinez committed May 19, 2023
2 parents ad64589 + 4cda348 commit 22945bc
Showing 1 changed file with 23 additions and 1 deletion.
24 changes: 23 additions & 1 deletion ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,36 @@
from constants import CHROMA_SETTINGS


class MyElmLoader(UnstructuredEmailLoader):
"""Wrapper to fallback to text/plain when default does not work"""

def load(self) -> List[Document]:
"""Wrapper adding fallback for elm without html"""
try:
try:
doc = UnstructuredEmailLoader.load(self)
except ValueError as e:
if 'text/html content not found in email' in str(e):
# Try plain text
self.unstructured_kwargs["content_source"]="text/plain"
doc = UnstructuredEmailLoader.load(self)
else:
raise
except Exception as e:
# Add file_path to exception message
raise type(e)(f"{self.file_path}: {e}") from e

return doc


# Map file extensions to document loaders and their arguments
LOADER_MAPPING = {
".csv": (CSVLoader, {}),
# ".docx": (Docx2txtLoader, {}),
".doc": (UnstructuredWordDocumentLoader, {}),
".docx": (UnstructuredWordDocumentLoader, {}),
".enex": (EverNoteLoader, {}),
".eml": (UnstructuredEmailLoader, {}),
".eml": (MyElmLoader, {}),
".epub": (UnstructuredEPubLoader, {}),
".html": (UnstructuredHTMLLoader, {}),
".md": (UnstructuredMarkdownLoader, {}),
Expand Down

0 comments on commit 22945bc

Please sign in to comment.