From 7db4700526ca36fc0fd868bd5f49378f8f5b7cb0 Mon Sep 17 00:00:00 2001 From: Dung Tran Date: Sat, 21 Aug 2021 09:58:29 +0700 Subject: [PATCH 1/4] Rename md file follow first line of md file, strip markdown header syntax - Fix #18: Lose link incase long file name of notion - Fix #16: Lost link if file name use dot . - Fix #13: Accents in the note title do not work correctly - Fix #7: Bad work with Cyrillic symbols --- N2O.py | 18 +++++++++++++----- N2Omodule.py | 19 ++++++++++++------- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/N2O.py b/N2O.py index cb75981..c56289a 100644 --- a/N2O.py +++ b/N2O.py @@ -31,7 +31,10 @@ # Generate a list of file paths for all zip content [NotionPathRaw.append(line.rstrip()) for line in notionsData.namelist()] - +verbose = False +def debug_print(msg): + if verbose: + print(msg) # Clean paths for Obsidian destination regexUID = compile("\s+\w{32}") @@ -84,7 +87,7 @@ # Convert CSV content into Obsidian Internal Links mdTitle = N2Omodule.N2Ocsv(csvFile) - + ## Make temp destination file path newfilepath = tempPath / ObsidianPaths[n] @@ -95,8 +98,8 @@ append_write = 'w' # make a new file if not # Save CSV internal links as new .md file - with open(newfilepath, append_write) as tempFile: - [print(line.rstrip().encode("utf-8"), file=tempFile) for line in mdTitle] + with open(newfilepath, append_write, encoding='utf-8') as tempFile: + [print(line.rstrip(), file=tempFile) for line in mdTitle] @@ -111,9 +114,14 @@ # Find and convert Internal Links to Obsidian style mdContent = N2Omodule.N2Omd(mdFile) + + # Exported md file include header in first line + # '# title of file' + # Get full file name by first line of exported md file instead file name ObsidianPaths[n] ## Make temp destination file path - newfilepath = tempPath / ObsidianPaths[n] + new_file_name = mdContent[0].replace('# ', '') + '.md' + newfilepath = tempPath / path.dirname(ObsidianPaths[n]) / new_file_name # Check if file exists, append if true if path.exists(newfilepath): diff --git a/N2Omodule.py b/N2Omodule.py index 4c586e4..201f395 100644 --- a/N2Omodule.py +++ b/N2Omodule.py @@ -72,7 +72,9 @@ def N2Ocsv(csvFile): # Clean Internal Links regexURLid = compile("(?:https?|ftp):\/\/") - regexSymbols = compile("[^\w\s]") + + # Clean symbol invalid window path < > : " / \ | ? * + regexSymbols = compile("[<>?:/\|*]") regexSpaces = compile("\s+") for line in oldTitle: @@ -81,11 +83,11 @@ def N2Ocsv(csvFile): line = regexURLid.sub(" ",line) line = regexSymbols.sub(" ",line) #2 Remove duplicate spaces - line = regexSpaces.sub(" ", line) + line = regexSpaces.sub(" ", line) #3 Remove any spaces at beginning line = line.lstrip() #4 Cut title at 50 characters - line = str(line)[0:50] + line = str(line) #5 Remove any spaces at end line = line.rstrip() if line: @@ -191,10 +193,13 @@ def N2Omd(mdFile): relativePath = regexSlash.sub("/",relativePath).strip() # Reconstruct Links as pretty links - if relativePath == Title: - PrettyLink = "[["+relativePath+"]] " - else: - PrettyLink = "[["+relativePath+"|"+Title+"]] " + + PrettyLink = "[["+Title+"]]" + + #if relativePath == Title: + # PrettyLink = "[["+relativePath+"]] " + #else: + # PrettyLink = "[["+relativePath+"|"+Title+"]] " line = PrettyLink From 319b770241fed0e51f983770cd2e4f6c8b06510c Mon Sep 17 00:00:00 2001 From: Dung Tran Date: Sat, 21 Aug 2021 17:41:59 +0700 Subject: [PATCH 2/4] Support unicode link, full file name of notion, image links - Fix #15: Picture name is incorrect after convert to internal link - Fix #17: Can not convert link inside Notion - Fix #19: Internal links are not correct in case multiple link in 1 line - Fix #20: Not yet support unicode character mardown link --- N2O.py | 17 +-- N2Omodule.py | 294 +++++++++++++++++++++++++-------------------------- 2 files changed, 156 insertions(+), 155 deletions(-) diff --git a/N2O.py b/N2O.py index c56289a..135bceb 100644 --- a/N2O.py +++ b/N2O.py @@ -104,8 +104,7 @@ def debug_print(msg): - - +num_link = [0, 0, 0, 0] # Process all MD files for n in mdIndex: @@ -113,8 +112,8 @@ def debug_print(msg): with notionsData.open(NotionPathRaw[n], "r") as mdFile: # Find and convert Internal Links to Obsidian style - mdContent = N2Omodule.N2Omd(mdFile) - + mdContent, cnt = N2Omodule.N2Omd(mdFile) + num_link = [cnt[i]+num_link[i] for i in range(len(num_link))] # Exported md file include header in first line # '# title of file' @@ -132,7 +131,7 @@ def debug_print(msg): # Save modified content as new .md file with open(newfilepath, append_write, encoding='utf-8') as tempFile: [print(line.rstrip(), file=tempFile) for line in mdContent] - + @@ -158,8 +157,12 @@ def debug_print(msg): print(NotionPathRaw[n], file=e) print('', file=e) - - + +print(f"\nTotal converted links:") +print(f" - Internal links: {num_link[0]}") +print(f" - Embedded links: {num_link[1]}") +print(f" - Blank links : {num_link[2]}") +print(f" - Number tags : {num_link[3]}") # Save temporary file collection to new zip diff --git a/N2Omodule.py b/N2Omodule.py index 201f395..798e0de 100644 --- a/N2Omodule.py +++ b/N2Omodule.py @@ -100,179 +100,177 @@ def N2Ocsv(csvFile): return mdTitle -def convertInternalLink(matchObj): -# converts Notion Internal links (found by regex) to Obsidian pretty links +def convertBlankLink(line): +# converts Notion about:blank links (found by regex) to Obsidian pretty links regexSymbols = compile("[^\w\s]") regexSpaces = compile("\s+") + num_matchs = 0 + # about:blank links (lost or missing links within Notion) + ## Group1:Pretty Link Title + regexBlankLink = compile("\[(.[^\[\]\(\)]*)\]\(about:blank#.[^\[\]\(\)]*\)") + matchBlank = regexBlankLink.search(line) + if matchBlank: - userTitle = matchObj.group(1) - ExternalURL = matchObj.group(2) - urlTitle = matchObj.group(3) - - # Replace symbols with space - urlTitle = regexSymbols.sub(" ",urlTitle) + InternalTitle = matchBlank.group(1) + + # Replace symbols with space + InternalLink = regexSymbols.sub(" ",InternalTitle) + + # Remove duplicate spaces + InternalLink = regexSpaces.sub( " ", InternalLink) + + # Remove any spaces at beginning + InternalLink = InternalLink.lstrip() + + # Cut title at 50 characters + InternalLink = InternalLink[0:50] + + # Remove any spaces at end + InternalLink = InternalLink.rstrip() + + # Reconstruct Internal Links as pretty links + PrettyLink = "[["+InternalLink+"]] " + + line, num_matchs = regexBlankLink.subn(PrettyLink, line) + if num_matchs > 1: + print(f"Warning: {line} replaced {num_matchs} matchs!!") + + return line, num_matchs - # Remove duplicate spaces - urlTitle = regexSpaces.sub(" ",urlTitle) - # Cut title at 50 characters - urlTitle = urlTitle[0:50] - - # Remove any spaces at end - urlTitle = urlTitle.rstrip() - - # Reconstruct Internal Links as pretty links and source footnote - if urlTitle == userTitle: - PrettyLink = "[["+urlTitle+"]] ^["+ExternalURL+"] " - else: - PrettyLink = "[["+urlTitle+"|"+userTitle+"]] ^["+ExternalURL+"] " +def embedded_link_convert(line): - # Substitute regex find with PrettyLink - return PrettyLink + # Embedded attachment links + regexAttached = compile("!\[(.[^\[\]\(\)]*)\]\((.[^\[\]\(\)]*)\)") + regexUID = compile("%20\w{32}") + regex20 = compile("%20") + regexSlash = compile("\s\/") -def convertBlankLink(matchObj): -# converts Notion about:blank links (found by regex) to Obsidian pretty links + num_matchs = 0 + matchAttach = regexAttached.search(line) + if matchAttach: + attachment = matchAttach.group(1) + # Clean UID + attachment = regexUID.sub(" ",attachment) + # correct spaces + attachment = regex20.sub(" ",attachment) + attachment = regexSlash.sub("/",attachment).strip() + + # Reconstruct Links as embedded links + embededLink = "![["+attachment+"]] " + line, num_matchs = regexAttached.subn(embededLink, line) + if num_matchs > 1: + print(f"Warning: {line} replaced {num_matchs} matchs!!") + + return line, num_matchs - regexSymbols = compile("[^\w\s]") - regexSpaces = compile("\s+") - - InternalTitle = matchObj.group(1) - - # Replace symbols with space - InternalLink = regexSymbols.sub(" ",InternalTitle) - - # Remove duplicate spaces - InternalLink = regexSpaces.sub( " ", InternalLink) - - # Remove any spaces at beginning - InternalLink = InternalLink.lstrip() - - # Cut title at 50 characters - InternalLink = InternalLink[0:50] - - # Remove any spaces at end - InternalLink = InternalLink.rstrip() - - # Reconstruct Internal Links as pretty links - PrettyLink = "[["+InternalLink+"]] " - # Substitute regex find with PrettyLink - return PrettyLink +def internal_link_convert(line): + ''' + This internal links combine: + - Link to local page + - External notion page + - Link to Database ~ exported *.csv file + - png in notion + ''' + + # folder style links + #regexPath = compile("^\[(.+)\]\(([^\(]*)(?:\.md|\.csv)\)$") # Overlap incase multiple links in same line + regexPath = compile("\[(.*?)\]\((.*?)\)") + regex20 = compile("%20") + regexRelativePathNotion = compile("https:\/\/www\.notion\.so") + regexRelativePathMdCsv = compile("(?:\.md|\.csv)") + regexRelativePathPng = compile("(?:\.png)") + + num_matchs = 0 + # Identify and group relative paths + # While for incase multiple match on single line + pathMatch = regexPath.search(line) + if pathMatch: + # modify paths into local links. just remove UID and convert spaces + # Title = pathMatch.group(1) + relativePath = pathMatch.group(2) + notionMatch = regexRelativePathNotion.search(relativePath) + is_md_or_csv = regexRelativePathMdCsv.search(relativePath) + is_png = regexRelativePathPng.search(relativePath) + + if is_md_or_csv or notionMatch: + # Replace all matchs + # line = regexPath.sub("[["++"]]", line) + line, num_matchs = regexPath.subn("[["+'\\1'''+"]]", line) + elif is_png: + regexutf8 = compile("%([A-F0-9][A-F0-9])%([A-F0-9][A-F0-9])") + regexSlash = compile("\/") + regexUID = compile("%20\w{32}") + + relativePath = regexUID.sub("", relativePath) + relativePath = regex20.sub(" ", relativePath) + + utf8_match = regexutf8.search(relativePath) + while utf8_match: + utf8_match = regexutf8.search(relativePath) + if utf8_match: + byte_1 = "0x" + utf8_match.group(1) + byte_2 = "0x" + utf8_match.group(2) + bytes_unicode = bytes([int(byte_1,0), int(byte_2,0)]) + relativePath = regexutf8.sub(str(bytes_unicode, 'utf-8'), relativePath, 1) + + line, num_matchs = regexPath.subn("[["+relativePath+"]]", line) + if num_matchs > 1: + print(f"Warning: {line} replaced {num_matchs} matchs!!") + + return line, num_matchs + + +def feature_tags_convert(line): + + # Convert tags after lines starting with "Tags:" + regexTags = "^Tags:\s(.+)" + # Search for Internal Links. Will give match.group(1) & match.group(2) + tagMatch = search(regexTags,line) + Otags = [] + num_tag = 0 + if tagMatch: + Ntags = tagMatch.group(1).split(",") + for t in enumerate(Ntags): + Otags.append("#"+t[1].strip()) + num_tag += 1 + line = "Tags: "+", ".join(Otags) + return line, num_tag def N2Omd(mdFile): - # Local Dependancies: convertInternalLink(), convertBlankLink() - + newLines = [] - + em_link_cnt = 0 + in_link_cnt = 0 + bl_link_cnt = 0 + tags_cnt = 0 + for line in mdFile: + line = line.decode("utf-8").rstrip() - - - - - # folder style links - regexPath = compile("^\[(.+)\]\(([^\(]*)(?:\.md|\.csv)\)$") - regexUID = compile("%20\w{32}") - regex20 = compile("%20") - regexSlash = compile("\s\/") - - # Identify and group relative paths - pathMatch = regexPath.search(line) - # modify paths into local links. just remove UID and convert spaces - if pathMatch: - Title = pathMatch.group(1) - relativePath = pathMatch.group(2) - # Clean UID - relativePath = regexUID.sub(" ",relativePath) - # correct spaces - relativePath = regex20.sub(" ",relativePath) - relativePath = regexSlash.sub("/",relativePath).strip() - - # Reconstruct Links as pretty links - - PrettyLink = "[["+Title+"]]" - - #if relativePath == Title: - # PrettyLink = "[["+relativePath+"]] " - #else: - # PrettyLink = "[["+relativePath+"|"+Title+"]] " - - line = PrettyLink - - - - - - # Internal style links. - ## Group1:Pretty Link Title - ## Group2: URL. - ## Group3: target file name (in web form but not in exported form without symbols) - regexInternalLink = compile("\[(.[^\[\]\(\)]*)\]\((https:\/\/www.notion.so\/(?:.[^\/]*)\/(.[^\[\]\(\)]*)-.[^\[\]\(\)]*)\)") - - match = regexInternalLink.search(line) - # Substitute regex find with PrettyLink - if match: - line = regexInternalLink.sub(convertInternalLink, line) - - - - - # about:blank links (lost or missing links within Notion) - ## Group1:Pretty Link Title - regexBlankLink = compile("\[(.[^\[\]\(\)]*)\]\(about:blank#.[^\[\]\(\)]*\)") - - matchBlank = regexBlankLink.search(line) - if matchBlank: - line = regexBlankLink.sub(convertBlankLink, line) - - + line, cnt = internal_link_convert(line) + in_link_cnt += cnt - - - # Embedded attachment links - regexAttached = compile("!\[(.[^\[\]\(\)]*)\]\((.[^\[\]\(\)]*)\)") - regexUID = compile("%20\w{32}") - regex20 = compile("%20") - regexSlash = compile("\s\/") - - matchAttach = regexAttached.search(line) - if matchAttach: - attachment = matchAttach.group(1) - # Clean UID - attachment = regexUID.sub(" ",attachment) - # correct spaces - attachment = regex20.sub(" ",attachment) - attachment = regexSlash.sub("/",attachment).strip() - - # Reconstruct Links as embedded links - embededLink = "![["+attachment+"]] " + line, cnt = embedded_link_convert(line) + em_link_cnt += cnt - line = regexAttached.sub(embededLink, line) - - - # Convert tags after lines starting with "Tags:" - regexTags = "^Tags:\s(.+)" - - # Search for Internal Links. Will give match.group(1) & match.group(2) - tagMatch = search(regexTags,line) - - Otags = [] - if tagMatch: - Ntags = tagMatch.group(1).split(",") - for t in enumerate(Ntags): - Otags.append("#"+t[1].strip()) + line, cnt = convertBlankLink(line) + bl_link_cnt += cnt - line = "Tags: "+", ".join(Otags) - + line, cnt = feature_tags_convert(line) + tags_cnt += cnt newLines.append(line) - return newLines + + + return newLines, [in_link_cnt, em_link_cnt, bl_link_cnt,tags_cnt] \ No newline at end of file From 7525261a28ba46b91188a4ec048448969cb1296e Mon Sep 17 00:00:00 2001 From: Dung Tran Date: Sun, 22 Aug 2021 15:33:07 +0700 Subject: [PATCH 3/4] Remove forbit character before naming md file --- N2O.py | 4 ++-- N2Omodule.py | 10 +++++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/N2O.py b/N2O.py index 135bceb..9f472d5 100644 --- a/N2O.py +++ b/N2O.py @@ -38,6 +38,7 @@ def debug_print(msg): # Clean paths for Obsidian destination regexUID = compile("\s+\w{32}") +regexForbitCharacter = compile("[<>?:/\|*\"]") for line in NotionPathRaw: ObsidianPathRaw.append(regexUID.sub("", line)) @@ -102,8 +103,6 @@ def debug_print(msg): [print(line.rstrip(), file=tempFile) for line in mdTitle] - - num_link = [0, 0, 0, 0] # Process all MD files for n in mdIndex: @@ -120,6 +119,7 @@ def debug_print(msg): # Get full file name by first line of exported md file instead file name ObsidianPaths[n] ## Make temp destination file path new_file_name = mdContent[0].replace('# ', '') + '.md' + new_file_name = regexForbitCharacter.sub("", new_file_name) newfilepath = tempPath / path.dirname(ObsidianPaths[n]) / new_file_name # Check if file exists, append if true diff --git a/N2Omodule.py b/N2Omodule.py index 798e0de..794e561 100644 --- a/N2Omodule.py +++ b/N2Omodule.py @@ -214,7 +214,15 @@ def internal_link_convert(line): byte_1 = "0x" + utf8_match.group(1) byte_2 = "0x" + utf8_match.group(2) bytes_unicode = bytes([int(byte_1,0), int(byte_2,0)]) - relativePath = regexutf8.sub(str(bytes_unicode, 'utf-8'), relativePath, 1) + try: + unicode_str = str(bytes_unicode, 'utf-8') + except: + print("ERROR: convert unicode failed") + print(f" {bytes_unicode} in - {line}") + break + + relativePath = regexutf8.sub(unicode_str, relativePath, 1) + line, num_matchs = regexPath.subn("[["+relativePath+"]]", line) if num_matchs > 1: From 223dd72156497cee3980a189f7251ca9a3badfe6 Mon Sep 17 00:00:00 2001 From: Dung Tran Date: Sun, 22 Aug 2021 22:03:18 +0700 Subject: [PATCH 4/4] Fix multi level sub folder --- N2Omodule.py | 170 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 118 insertions(+), 52 deletions(-) diff --git a/N2Omodule.py b/N2Omodule.py index 794e561..f8147b4 100644 --- a/N2Omodule.py +++ b/N2Omodule.py @@ -12,6 +12,41 @@ from pathlib import Path +def str_slash_char_remove(string): + + #regex_forbidden_characters = compile('[\\/*?:"<>|]') + regexSlash = compile("\/") + string = regexSlash.sub('', string) + + return string + + +def str_forbid_char_remove(string): + + #regex_forbidden_characters = compile('[\\/*?:"<>|]') + regex_forbidden_characters = compile('[\\*?:"<>|]') + string = regex_forbidden_characters.sub('', string) + + return string + + +# convert %20 to ' ' +def str_space_utf8_replace(string): + + regex_utf8_space = compile("%20") + string = regex_utf8_space.sub(' ', string) + + return string + + +def str_notion_uid_remove(string): + + regexUID = compile("%20\w{32}") + string = regexUID.sub('', string) + + return string + + def ObsIndex(contents): """ Function to return all the relevant indices @@ -74,7 +109,7 @@ def N2Ocsv(csvFile): regexURLid = compile("(?:https?|ftp):\/\/") # Clean symbol invalid window path < > : " / \ | ? * - regexSymbols = compile("[<>?:/\|*]") + regexSymbols = compile("[<>?:/\|*\"]") regexSpaces = compile("\s+") for line in oldTitle: @@ -137,35 +172,78 @@ def convertBlankLink(line): print(f"Warning: {line} replaced {num_matchs} matchs!!") return line, num_matchs - def embedded_link_convert(line): + ''' + This internal links combine: + - Link to local page + - External notion page + - Link to Database ~ exported *.csv file + - png in notion + ''' - # Embedded attachment links - regexAttached = compile("!\[(.[^\[\]\(\)]*)\]\((.[^\[\]\(\)]*)\)") - regexUID = compile("%20\w{32}") - regex20 = compile("%20") - regexSlash = compile("\s\/") + # folder style links + #regexPath = compile("^\[(.+)\]\(([^\(]*)(?:\.md|\.csv)\)$") # Overlap incase multiple links in same line + #regexRelativePathImage = compile("(?:\.png|\.jpg|\.gif|\.bmp|\.jpeg|\.svg)") + + regexPath = compile("!\[(.*?)\]\((.*?)\)") + regex20 = compile("%20") num_matchs = 0 - matchAttach = regexAttached.search(line) - if matchAttach: - attachment = matchAttach.group(1) - # Clean UID - attachment = regexUID.sub(" ",attachment) - # correct spaces - attachment = regex20.sub(" ",attachment) - attachment = regexSlash.sub("/",attachment).strip() + # Identify and group relative paths + # While for incase multiple match on single line + pathMatch = regexPath.search(line) + if pathMatch: + # modify paths into local links. just remove UID and convert spaces + Title = pathMatch.group(1) + relativePath = pathMatch.group(2) + #is_image = regexRelativePathImage.search(relativePath) + + regexSpecialUtf8 = compile("%([A-F0-9][A-F0-9])%([A-F0-9][A-F0-9])%([A-F0-9][A-F0-9])") + regexutf8 = compile("%([A-F0-9][A-F0-9])%([A-F0-9][A-F0-9])") + regexUID = compile("%20\w{32}") - # Reconstruct Links as embedded links - embededLink = "![["+attachment+"]] " - line, num_matchs = regexAttached.subn(embededLink, line) + relativePath = str_forbid_char_remove(relativePath) + relativePath = regexUID.sub("", relativePath) + relativePath = str_space_utf8_replace(relativePath) + + utf8_match = regexutf8.search(relativePath) + while utf8_match: + is_special_utf8 = False + utf8_match = regexutf8.search(relativePath) + if utf8_match: + byte_1 = "0x" + utf8_match.group(1) + byte_2 = "0x" + utf8_match.group(2) + + if (byte_1[0:3] == "0xE") and (byte_1[3] in ['1', '2', '3', '4', '5', '6']): + + special_utf8_match = regexSpecialUtf8.search(relativePath) + byte_3 = "0x" + special_utf8_match.group(3) + bytes_unicode = bytes([int(byte_1,0), int(byte_2,0), int(byte_3,0)]) + is_special_utf8 = True + else: + bytes_unicode = bytes([int(byte_1,0), int(byte_2,0)]) + + try: + unicode_str = str(bytes_unicode, 'utf-8') + except: + print("ERROR: convert unicode failed") + print(f" {bytes_unicode} in - {line}") + break + + if is_special_utf8: + relativePath = regexSpecialUtf8.sub(unicode_str, relativePath, 1) + else: + relativePath = regexutf8.sub(unicode_str, relativePath, 1) + + line, num_matchs = regexPath.subn("[["+relativePath+"]]", line) + if num_matchs > 1: print(f"Warning: {line} replaced {num_matchs} matchs!!") return line, num_matchs - + def internal_link_convert(line): ''' This internal links combine: @@ -181,7 +259,8 @@ def internal_link_convert(line): regex20 = compile("%20") regexRelativePathNotion = compile("https:\/\/www\.notion\.so") regexRelativePathMdCsv = compile("(?:\.md|\.csv)") - regexRelativePathPng = compile("(?:\.png)") + regexRelativePathImage = compile("(?:\.png|\.jpg|\.gif|\.bmp|\.jpeg|\.svg)") + regexSlash = compile("\/") num_matchs = 0 # Identify and group relative paths @@ -191,42 +270,28 @@ def internal_link_convert(line): # modify paths into local links. just remove UID and convert spaces # Title = pathMatch.group(1) relativePath = pathMatch.group(2) - notionMatch = regexRelativePathNotion.search(relativePath) + notionMatch = regexRelativePathNotion.search(relativePath) is_md_or_csv = regexRelativePathMdCsv.search(relativePath) - is_png = regexRelativePathPng.search(relativePath) + is_image = regexRelativePathImage.search(relativePath) if is_md_or_csv or notionMatch: # Replace all matchs # line = regexPath.sub("[["++"]]", line) line, num_matchs = regexPath.subn("[["+'\\1'''+"]]", line) - elif is_png: - regexutf8 = compile("%([A-F0-9][A-F0-9])%([A-F0-9][A-F0-9])") - regexSlash = compile("\/") - regexUID = compile("%20\w{32}") - - relativePath = regexUID.sub("", relativePath) - relativePath = regex20.sub(" ", relativePath) - - utf8_match = regexutf8.search(relativePath) - while utf8_match: - utf8_match = regexutf8.search(relativePath) - if utf8_match: - byte_1 = "0x" + utf8_match.group(1) - byte_2 = "0x" + utf8_match.group(2) - bytes_unicode = bytes([int(byte_1,0), int(byte_2,0)]) - try: - unicode_str = str(bytes_unicode, 'utf-8') - except: - print("ERROR: convert unicode failed") - print(f" {bytes_unicode} in - {line}") - break - - relativePath = regexutf8.sub(unicode_str, relativePath, 1) - - line, num_matchs = regexPath.subn("[["+relativePath+"]]", line) - if num_matchs > 1: - print(f"Warning: {line} replaced {num_matchs} matchs!!") + regexMarkdownLink = compile("\[\[(.*?)\]\]") + markdownLinkMatch = regexMarkdownLink.search(line) + if markdownLinkMatch: + title = markdownLinkMatch.group(1) + title = str_notion_uid_remove(title) + title = str_space_utf8_replace(title) + title = str_forbid_char_remove(title) + title = str_slash_char_remove(title) + + if title != markdownLinkMatch.group(1): + print(line) + line = regexMarkdownLink.sub("[["+title+"]]", line) + print(f" remove forbid {line}\n") return line, num_matchs @@ -250,6 +315,7 @@ def feature_tags_convert(line): return line, num_tag + def N2Omd(mdFile): newLines = [] @@ -262,12 +328,12 @@ def N2Omd(mdFile): line = line.decode("utf-8").rstrip() - line, cnt = internal_link_convert(line) - in_link_cnt += cnt - line, cnt = embedded_link_convert(line) em_link_cnt += cnt + line, cnt = internal_link_convert(line) + in_link_cnt += cnt + line, cnt = convertBlankLink(line) bl_link_cnt += cnt