Skip to content

Commit

Permalink
fix: trim paragraphs when condense lines option is used
Browse files Browse the repository at this point in the history
  • Loading branch information
vzhd1701 committed Apr 11, 2022
1 parent c11dbb9 commit 9403e47
Show file tree
Hide file tree
Showing 16 changed files with 228 additions and 127 deletions.
75 changes: 4 additions & 71 deletions enex2notion/note_parser.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import logging
from typing import List

from bs4 import BeautifulSoup

from enex2notion.enex_types import EvernoteNote
from enex2notion.note_parser_blocks import parse_note_blocks
from enex2notion.note_parser_condense import condense_lines
from enex2notion.note_parser_webclip import parse_webclip
from enex2notion.note_parser_webclip_pdf import parse_webclip_to_pdf
from enex2notion.notion_blocks import NotionBaseBlock, NotionTextBlock, TextProp
from enex2notion.notion_blocks_container import NotionCalloutBlock
from enex2notion.notion_blocks_text import TextProp
from enex2notion.notion_blocks_uploadable import NotionUploadableBlock

logger = logging.getLogger(__name__)
Expand All @@ -35,9 +35,9 @@ def parse_note(
note_blocks = parse_note_blocks(note_dom)

if is_condense_lines_sparse:
note_blocks = _condense_lines(note_blocks, is_sparse=True)
note_blocks = condense_lines(note_blocks, is_sparse=True)
elif is_condense_lines:
note_blocks = _condense_lines(note_blocks)
note_blocks = condense_lines(note_blocks)

if is_add_meta:
_add_meta(note_blocks, note)
Expand Down Expand Up @@ -114,70 +114,3 @@ def _get_note_meta(note: EvernoteNote):
note_meta.append(f"Tags: {note_tags}")

return "\n".join(note_meta)


def _condense_lines(blocks: List[NotionBaseBlock], is_sparse=False):
result_blocks = []
solid_block = None

blocks = _join_empty_paragraphs(blocks)

for b in blocks:
b.children = _condense_lines(b.children)

if _is_empty_paragraph(b) or not isinstance(b, NotionTextBlock):
if solid_block:
result_blocks.append(solid_block)
solid_block = None

if not _is_empty_paragraph(b) or is_sparse:
result_blocks.append(b)

else:
if solid_block:
solid_block = NotionTextBlock(
text_prop=_concat_text_props(solid_block.text_prop, b.text_prop)
)
else:
solid_block = b

if b.children:
solid_block.children = b.children
result_blocks.append(solid_block)
solid_block = None

if solid_block:
result_blocks.append(solid_block)

return result_blocks


def _join_empty_paragraphs(blocks: List[NotionBaseBlock]):
result_blocks = []
gap_started = False

for b in blocks:
if _is_empty_paragraph(b):
gap_started = True

else:
if gap_started:
result_blocks.append(NotionTextBlock())
gap_started = False

result_blocks.append(b)

return result_blocks


def _is_empty_paragraph(block: NotionBaseBlock):
if isinstance(block, NotionTextBlock):
return block == NotionTextBlock()
return False


def _concat_text_props(text_prop1: TextProp, text_prop2: TextProp) -> TextProp:
return TextProp(
text=text_prop1.text + "\n" + text_prop2.text,
properties=text_prop1.properties + [["\n"]] + text_prop2.properties,
)
3 changes: 2 additions & 1 deletion enex2notion/note_parser_blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
parse_indented,
parse_indented_plain,
)
from enex2notion.notion_blocks import NotionDividerBlock, NotionTextBlock, TextProp
from enex2notion.notion_blocks import NotionDividerBlock
from enex2notion.notion_blocks_text import NotionTextBlock, TextProp

logger = logging.getLogger(__name__)

Expand Down
83 changes: 83 additions & 0 deletions enex2notion/note_parser_condense.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
from typing import List

from enex2notion.notion_blocks import NotionBaseBlock
from enex2notion.notion_blocks_text import NotionTextBlock, TextProp


def condense_lines(blocks: List[NotionBaseBlock], is_sparse=False):
result_blocks = []
solid_block = None

blocks = _join_empty_paragraphs(blocks)

for b in blocks:
b.children = condense_lines(b.children)

if _is_empty_paragraph(b) or not isinstance(b, NotionTextBlock):
if solid_block:
result_blocks.append(solid_block)
solid_block = None

if not _is_empty_paragraph(b) or is_sparse:
result_blocks.append(b)

else:
if solid_block:
solid_block = NotionTextBlock(
text_prop=_concat_text_props(solid_block.text_prop, b.text_prop)
)
else:
solid_block = b

if b.children:
solid_block.children = b.children
result_blocks.append(solid_block)
solid_block = None

if solid_block:
result_blocks.append(solid_block)

return _strip_paragraphs(result_blocks)


def _strip_paragraphs(blocks: List[NotionBaseBlock]):
result_blocks = []

for b in blocks:
if isinstance(b, NotionTextBlock):
b.text_prop = b.text_prop.strip()

result_blocks.append(b)

return result_blocks


def _join_empty_paragraphs(blocks: List[NotionBaseBlock]):
result_blocks = []
gap_started = False

for b in blocks:
if _is_empty_paragraph(b):
gap_started = True

else:
if gap_started:
result_blocks.append(NotionTextBlock())
gap_started = False

result_blocks.append(b)

return result_blocks


def _is_empty_paragraph(block: NotionBaseBlock):
if isinstance(block, NotionTextBlock):
return block.text_prop.text.strip() == ""
return False


def _concat_text_props(text_prop1: TextProp, text_prop2: TextProp) -> TextProp:
return TextProp(
text=text_prop1.text + "\n" + text_prop2.text,
properties=text_prop1.properties + [["\n"]] + text_prop2.properties,
)
3 changes: 2 additions & 1 deletion enex2notion/note_parser_e_div.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@

from bs4 import Tag

from enex2notion.notion_blocks import NotionBookmarkBlock, NotionTextBlock
from enex2notion.notion_blocks import NotionBookmarkBlock
from enex2notion.notion_blocks_container import NotionCodeBlock
from enex2notion.notion_blocks_list import NotionTodoBlock
from enex2notion.notion_blocks_text import NotionTextBlock
from enex2notion.string_extractor import extract_string

logger = logging.getLogger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion enex2notion/note_parser_e_table.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from bs4 import Tag

from enex2notion.notion_blocks import TextProp
from enex2notion.notion_blocks_table import NotionTableBlock
from enex2notion.notion_blocks_text import TextProp
from enex2notion.string_extractor import extract_string


Expand Down
2 changes: 1 addition & 1 deletion enex2notion/note_parser_elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from bs4 import NavigableString, Tag

from enex2notion.note_parser_e_media import parse_img, parse_media
from enex2notion.notion_blocks import NotionTextBlock, TextProp
from enex2notion.notion_blocks_header import (
NotionHeaderBlock,
NotionSubheaderBlock,
Expand All @@ -14,6 +13,7 @@
NotionNumberedListBlock,
NotionTodoBlock,
)
from enex2notion.notion_blocks_text import NotionTextBlock, TextProp
from enex2notion.string_extractor import extract_string

logger = logging.getLogger(__name__)
Expand Down
39 changes: 0 additions & 39 deletions enex2notion/notion_blocks.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,6 @@
from notion import block


class TextProp(object):
def __init__(self, text, properties=None):
self.text = text

self.properties = [[text]] if properties is None else properties

if properties is None:
self.properties = [[text]] if text else []

def __eq__(self, other):
return self.text == other.text and self.properties == other.properties

def __repr__(self): # pragma: no cover
return "<{0}> {1}".format(self.__class__.__name__, self.text)


class NotionBaseBlock(object):
type = None

Expand Down Expand Up @@ -44,29 +28,6 @@ def __repr__(self): # pragma: no cover
)


class NotionTextBased(NotionBaseBlock):
def __init__(self, text_prop: TextProp = None, **kwargs):
super().__init__(**kwargs)

if text_prop:
self.attrs["title_plaintext"] = text_prop.text
self.properties["properties.title"] = text_prop.properties
else:
self.attrs["title_plaintext"] = ""
self.properties["properties.title"] = []

@property
def text_prop(self):
return TextProp(
text=self.attrs["title_plaintext"],
properties=self.properties["properties.title"],
)


class NotionTextBlock(NotionTextBased):
type = block.TextBlock


class NotionDividerBlock(NotionBaseBlock):
type = block.DividerBlock

Expand Down
2 changes: 1 addition & 1 deletion enex2notion/notion_blocks_container.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from notion import block

from enex2notion.notion_blocks import NotionTextBased
from enex2notion.notion_blocks_text import NotionTextBased


class NotionCodeBlock(NotionTextBased):
Expand Down
2 changes: 1 addition & 1 deletion enex2notion/notion_blocks_header.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from notion import block

from enex2notion.notion_blocks import NotionTextBased
from enex2notion.notion_blocks_text import NotionTextBased


class NotionHeaderBlock(NotionTextBased):
Expand Down
2 changes: 1 addition & 1 deletion enex2notion/notion_blocks_list.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from notion import block

from enex2notion.notion_blocks import NotionTextBased
from enex2notion.notion_blocks_text import NotionTextBased


class NotionBulletedListBlock(NotionTextBased):
Expand Down
3 changes: 2 additions & 1 deletion enex2notion/notion_blocks_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

from notion.block import BasicBlock

from enex2notion.notion_blocks import NotionBaseBlock, NotionTextBased, TextProp
from enex2notion.notion_blocks import NotionBaseBlock
from enex2notion.notion_blocks_text import NotionTextBased, TextProp
from enex2notion.rand_id import rand_id_list


Expand Down

0 comments on commit 9403e47

Please sign in to comment.