Skip to content

Commit

Permalink
fix: skip webclip notes before parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
vzhd1701 committed Dec 6, 2021
1 parent 952feb2 commit 9af7912
Show file tree
Hide file tree
Showing 6 changed files with 160 additions and 5 deletions.
4 changes: 4 additions & 0 deletions enex2notion/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ def upload(self, enex_file: Path):
logger.debug(f"Skipping note '{note.title}' (already uploaded)")
continue

if note.is_webclip:
logger.warning(f"Skipping note '{note.title}' [WEBCLIPS NOT SUPPORTED]")
continue

note_blocks = parse_note(note, self.add_meta)
if not note_blocks:
continue
Expand Down
16 changes: 16 additions & 0 deletions enex2notion/enex_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import hashlib
import logging
import mimetypes
import re
import uuid
from collections import defaultdict
from dataclasses import dataclass
Expand Down Expand Up @@ -33,6 +34,7 @@ class EvernoteNote(object):
tags: List[str]
author: str
url: str
is_webclip: bool
resources: List[EvernoteResource]
_note_hash: str = None

Expand Down Expand Up @@ -125,10 +127,24 @@ def _process_note(note_raw: dict):
tags=note_tags,
author=note_attrs.get("author", ""),
url=note_attrs.get("source-url", ""),
is_webclip=_is_webclip(note_raw),
resources=resources,
)


def _is_webclip(note_raw: dict):
note_attrs = note_raw.get("note-attributes") or {}

if "web.clip" in note_attrs.get("source", ""):
return True
if "webclipper" in note_attrs.get("source-application", ""):
return True

return bool(
re.match('<div[^>]+style="[^"]+en-clipped-content[^"]*"', note_raw["content"])
)


def _convert_resource(resource_raw):
res_attr = resource_raw.get("resource-attributes", {})
if not isinstance(res_attr, dict):
Expand Down
22 changes: 19 additions & 3 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import logging

import pytest

from enex2notion.cli import cli
Expand All @@ -18,7 +20,7 @@ def mock_api(mocker):
@pytest.fixture()
def fake_note_factory(mocker):
mock_iter = mocker.patch("enex2notion.cli.iter_notes")
mock_iter.return_value = [mocker.MagicMock(note_hash="fake_hash")]
mock_iter.return_value = [mocker.MagicMock(note_hash="fake_hash", is_webclip=False)]

return mock_iter

Expand Down Expand Up @@ -101,8 +103,8 @@ def test_done_file(mock_api, fake_note_factory, mocker, fs):
fs.create_file("done.txt")

fake_note_factory.return_value = [
mocker.MagicMock(note_hash="fake_hash1"),
mocker.MagicMock(note_hash="fake_hash2"),
mocker.MagicMock(note_hash="fake_hash1", is_webclip=False),
mocker.MagicMock(note_hash="fake_hash2", is_webclip=False),
]

cli(["--token", "fake_token", "--done-file", "done.txt", "fake.enex"])
Expand Down Expand Up @@ -143,5 +145,19 @@ def test_bad_file(mock_api, fake_note_factory):
mock_api["parse_note"].assert_called_once()


def test_webclip(mock_api, fake_note_factory, mocker, caplog):
fake_note_factory.return_value = [
mocker.MagicMock(note_hash="fake_hash1", is_webclip=True),
]

with caplog.at_level(logging.WARNING):
cli(["fake.enex"])

assert "[WEBCLIPS NOT SUPPORTED]" in caplog.text

mock_api["upload_note"].assert_not_called()
mock_api["parse_note"].assert_not_called()


def test_cli_main_import():
from enex2notion import __main__
116 changes: 114 additions & 2 deletions tests/test_enex_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def test_iter_notes_single(fs):
tags=[],
author="",
url="",
is_webclip=False,
resources=[],
),
]
Expand Down Expand Up @@ -90,6 +91,7 @@ def test_iter_notes_single_tags(fs):
tags=["tag1", "tag2"],
author="",
url="",
is_webclip=False,
resources=[],
),
]
Expand Down Expand Up @@ -123,6 +125,7 @@ def test_iter_notes_single_one_tag(fs):
tags=["tag1"],
author="",
url="",
is_webclip=False,
resources=[],
),
]
Expand All @@ -137,8 +140,8 @@ def test_iter_notes_single_attrs(fs):
<created>20211118T085332Z</created>
<updated>20211118T085920Z</updated>
<note-attributes>
<author>test@user.com</author>
<source-url>https://google.com</source-url>
<author>test@user.com</author>
<source-url>https://google.com</source-url>
</note-attributes>
<content>test</content>
</note>
Expand All @@ -157,6 +160,110 @@ def test_iter_notes_single_attrs(fs):
tags=[],
author="test@user.com",
url="https://google.com",
is_webclip=False,
resources=[],
),
]


def test_iter_notes_webclip1(fs):
test_enex = """<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE en-export SYSTEM "http://xml.evernote.com/pub/evernote-export4.dtd">
<en-export export-date="20211218T085932Z" application="Evernote" version="10.25.6">
<note>
<title>test1</title>
<created>20211118T085332Z</created>
<updated>20211118T085920Z</updated>
<note-attributes>
<source>web.clip</source>
</note-attributes>
<content>test</content>
</note>
</en-export>
"""
fs.create_file("test.enex", contents=test_enex)

notes = list(iter_notes(Path("test.enex")))

assert notes == [
EvernoteNote(
title="test1",
created=datetime.datetime(2021, 11, 18, 8, 53, 32, tzinfo=tzutc()),
updated=datetime.datetime(2021, 11, 18, 8, 59, 20, tzinfo=tzutc()),
content="test",
tags=[],
author="",
url="",
is_webclip=True,
resources=[],
),
]


def test_iter_notes_webclip2(fs):
test_enex = """<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE en-export SYSTEM "http://xml.evernote.com/pub/evernote-export4.dtd">
<en-export export-date="20211218T085932Z" application="Evernote" version="10.25.6">
<note>
<title>test1</title>
<created>20211118T085332Z</created>
<updated>20211118T085920Z</updated>
<note-attributes>
<source-application>webclipper.evernote</source-application>
</note-attributes>
<content>test</content>
</note>
</en-export>
"""
fs.create_file("test.enex", contents=test_enex)

notes = list(iter_notes(Path("test.enex")))

assert notes == [
EvernoteNote(
title="test1",
created=datetime.datetime(2021, 11, 18, 8, 53, 32, tzinfo=tzutc()),
updated=datetime.datetime(2021, 11, 18, 8, 59, 20, tzinfo=tzutc()),
content="test",
tags=[],
author="",
url="",
is_webclip=True,
resources=[],
),
]


def test_iter_notes_webclip_content(fs):
test_enex = """<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE en-export SYSTEM "http://xml.evernote.com/pub/evernote-export4.dtd">
<en-export export-date="20211218T085932Z" application="Evernote" version="10.25.6">
<note>
<title>test1</title>
<created>20211118T085332Z</created>
<updated>20211118T085920Z</updated>
<note-attributes>
</note-attributes>
<content><![CDATA[
<div style="--en-clipped-content:article;">test</div>
]]></content>
</note>
</en-export>
"""
fs.create_file("test.enex", contents=test_enex)

notes = list(iter_notes(Path("test.enex")))

assert notes == [
EvernoteNote(
title="test1",
created=datetime.datetime(2021, 11, 18, 8, 53, 32, tzinfo=tzutc()),
updated=datetime.datetime(2021, 11, 18, 8, 59, 20, tzinfo=tzutc()),
content='<div style="--en-clipped-content:article;">test</div>',
tags=[],
author="",
url="",
is_webclip=True,
resources=[],
),
]
Expand Down Expand Up @@ -197,6 +304,7 @@ def test_iter_notes_multi(fs):
tags=[],
author="",
url="",
is_webclip=False,
resources=[],
),
EvernoteNote(
Expand All @@ -207,6 +315,7 @@ def test_iter_notes_multi(fs):
tags=[],
author="",
url="",
is_webclip=False,
resources=[],
),
]
Expand Down Expand Up @@ -259,6 +368,7 @@ def test_iter_notes_single_with_resource(fs):
tags=[],
author="",
url="",
is_webclip=False,
resources=[expected_resource],
),
]
Expand Down Expand Up @@ -306,6 +416,7 @@ def test_iter_notes_single_with_noname_resource(fs, mocker):
tags=[],
author="",
url="",
is_webclip=False,
resources=[
EvernoteResource(
data_bin=(
Expand Down Expand Up @@ -367,6 +478,7 @@ def test_iter_notes_single_with_empty_resource(fs, caplog):
tags=[],
author="",
url="",
is_webclip=False,
resources=[expected_resource],
),
]
Expand Down
4 changes: 4 additions & 0 deletions tests/test_enex_uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ def test_upload_note(notion_test_page):
tags=[],
author="",
url="",
is_webclip=False,
resources=[],
)

Expand All @@ -135,6 +136,7 @@ def test_upload_note_with_meta(notion_test_page):
tags=[],
author="",
url="",
is_webclip=False,
resources=[],
)

Expand Down Expand Up @@ -165,6 +167,7 @@ def test_upload_note_with_file(notion_test_page, tiny_file):
tags=[],
author="",
url="",
is_webclip=False,
resources=[tiny_file],
)

Expand All @@ -190,6 +193,7 @@ def test_upload_note_db(notion_test_page):
tags=[],
author="",
url="",
is_webclip=False,
resources=[],
)

Expand Down
3 changes: 3 additions & 0 deletions tests/test_note_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,7 @@ def test_bad_resource(caplog):
tags=[],
author="",
url="",
is_webclip=False,
resources=[],
)

Expand All @@ -597,6 +598,7 @@ def test_bad_note(caplog):
tags=[],
author="",
url="",
is_webclip=False,
resources=[],
)

Expand All @@ -616,6 +618,7 @@ def test_note_with_meta():
tags=["tag1", "tag2"],
author="",
url="https://google.com",
is_webclip=False,
resources=[],
)

Expand Down

0 comments on commit 9af7912

Please sign in to comment.