support kfx

xxyzz · Nov 18, 2020 · 27a4b65 · 27a4b65
1 parent 37e666d
commit 27a4b65
Show file tree

Hide file tree

Showing 6 changed files with 36 additions and 16 deletions.
diff --git a/__init__.py b/__init__.py
@@ -7,7 +7,7 @@ class WordDumbDumb(InterfaceActionBase):
     description = 'Create Kindle Word Wise file.'
     supported_platforms = ['linux', 'osx', 'windows']
     author = 'xxyzz'
-    version = (1, 3, 0)
+    version = (1, 4, 0)
     minimum_calibre_version = (5, 0, 0)  # Python3
     actual_plugin = 'calibre_plugins.worddumb.ui:WordDumb'
 

diff --git a/data/create_ww_sql.py b/data/create_ww_sql.py
@@ -41,8 +41,9 @@
     if total_words:
         total_words = total_words[0]
 else:
-    ww_cur.execute(
-        "CREATE TABLE words (lemma TEXT, sense_id INTEGER, difficulty INTEGER)")
+    ww_cur.execute('''
+    CREATE TABLE words (lemma TEXT, sense_id INTEGER, difficulty INTEGER)
+    ''')
 
 for language_layer in args.language_layers:
     if not Path(language_layer).is_file():

diff --git a/database.py b/database.py
@@ -60,8 +60,8 @@ def create_lang_layer(asin, book_path):
     return ll_conn, ll_cur, lang_layer_path
 
 
-def match_word(start, lemma, ll_cur, ww_cur):
-    ww_cur.execute("SELECT * FROM words WHERE lemma = ?", (lemma.lower(), ))
+def match_lemma(start, word, ll_cur, ww_cur):
+    ww_cur.execute("SELECT * FROM words WHERE lemma = ?", (word.lower(), ))
     result = ww_cur.fetchone()
     if result is not None:
         (_, sense_id, difficulty) = result

diff --git a/main.py b/main.py
@@ -30,7 +30,7 @@ def check_metadata(self, book_id):
         # check book format
         has_kindle_format = False
         for fmt in fmts:
-            if fmt.lower() in ['mobi', 'azw3']:
+            if fmt.lower() in ['mobi', 'azw3', 'kfx']:
                 has_kindle_format = True
                 book_fmt = fmt
                 break

diff --git a/parse_job.py b/parse_job.py
@@ -1,11 +1,12 @@
 #!/usr/bin/env python3
+import json
 import re
 
 from calibre.ebooks.mobi.reader.mobi6 import MobiReader
 from calibre.ebooks.mobi.reader.mobi8 import Mobi8Reader
 from calibre.utils.logging import default_log
 from calibre_plugins.worddumb.database import (connect_ww_database,
-                                               create_lang_layer, match_word)
+                                               create_lang_layer, match_lemma)
 
 
 def do_job(gui, books, abort, log, notifications):
@@ -16,16 +17,34 @@ def do_job(gui, books, abort, log, notifications):
         if ll_conn is None:
             continue
 
-        for (start, lemma) in parse_book(book_path, book_fmt):
-            match_word(start, lemma, ll_cur, ww_cur)
+        for (start, word) in parse_book(book_path, book_fmt):
+            match_lemma(start, word, ll_cur, ww_cur)
 
         ll_conn.commit()
         ll_conn.close()
 
     ww_conn.close()
 
 
-def parse_book(pathtoebook, book_fmt):
+def parse_book(path_of_book, book_fmt):
+    if (book_fmt.lower() == 'kfx'):
+        return parse_kfx(path_of_book)
+    else:
+        return parse_mobi(path_of_book, book_fmt)
+
+
+def parse_kfx(path_of_book):
+    from calibre_plugins.kfx_input.kfxlib import YJ_Book
+
+    book = YJ_Book(path_of_book)
+    data = book.convert_to_json_content()
+    for entry in json.loads(data)['data']:
+        for match_word in re.finditer('[a-zA-Z]+', entry['content']):
+            word = entry['content'][match_word.start():match_word.end()]
+            yield (entry['position'] + match_word.start(), word)
+
+
+def parse_mobi(pathtoebook, book_fmt):
     mobiReader = MobiReader(pathtoebook, default_log)
     html = b''
     offset = 1
@@ -45,7 +64,7 @@ def parse_book(pathtoebook, book_fmt):
     for match_text in re.finditer(b">[^<>]+<", html):
         text = html[match_text.start():match_text.end()]
         # match each word inside text
-        for match_lemma in re.finditer(b"[a-zA-Z]+", text):
-            lemma = text[match_lemma.start():match_lemma.end()]
+        for match_word in re.finditer(b"[a-zA-Z]+", text):
+            word = text[match_word.start():match_word.end()]
             start = match_text.start() + match_word.start()
-            yield (start, lemma.decode('utf-8'))
+            yield (start, word.decode('utf-8'))
diff --git a/send_file.py b/send_file.py
@@ -53,9 +53,9 @@ def send_files(self, job):
             titles = [i.title for i in [self.mi]]
             plugboards = self.gui.current_db.new_api.pref('plugboards', {})
             self.device_manager.upload_books(
-                FunctionDispatcher(self.send_files), [
-                    self.book_path], [book_name],
-                on_card=None, metadata=[self.mi], titles=titles, plugboards=plugboards)
+                FunctionDispatcher(self.send_files), [self.book_path],
+                [book_name], on_card=None, metadata=[self.mi],
+                titles=titles, plugboards=plugboards)
             self.retry = True
 
     def move_ll_to_device(self, book_path):