Skip to content

Commit

Permalink
support kfx
Browse files Browse the repository at this point in the history
  • Loading branch information
xxyzz committed Nov 18, 2020
1 parent 37e666d commit 27a4b65
Show file tree
Hide file tree
Showing 6 changed files with 36 additions and 16 deletions.
2 changes: 1 addition & 1 deletion __init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class WordDumbDumb(InterfaceActionBase):
description = 'Create Kindle Word Wise file.'
supported_platforms = ['linux', 'osx', 'windows']
author = 'xxyzz'
version = (1, 3, 0)
version = (1, 4, 0)
minimum_calibre_version = (5, 0, 0) # Python3
actual_plugin = 'calibre_plugins.worddumb.ui:WordDumb'

Expand Down
5 changes: 3 additions & 2 deletions data/create_ww_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,9 @@
if total_words:
total_words = total_words[0]
else:
ww_cur.execute(
"CREATE TABLE words (lemma TEXT, sense_id INTEGER, difficulty INTEGER)")
ww_cur.execute('''
CREATE TABLE words (lemma TEXT, sense_id INTEGER, difficulty INTEGER)
''')

for language_layer in args.language_layers:
if not Path(language_layer).is_file():
Expand Down
4 changes: 2 additions & 2 deletions database.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ def create_lang_layer(asin, book_path):
return ll_conn, ll_cur, lang_layer_path


def match_word(start, lemma, ll_cur, ww_cur):
ww_cur.execute("SELECT * FROM words WHERE lemma = ?", (lemma.lower(), ))
def match_lemma(start, word, ll_cur, ww_cur):
ww_cur.execute("SELECT * FROM words WHERE lemma = ?", (word.lower(), ))
result = ww_cur.fetchone()
if result is not None:
(_, sense_id, difficulty) = result
Expand Down
2 changes: 1 addition & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def check_metadata(self, book_id):
# check book format
has_kindle_format = False
for fmt in fmts:
if fmt.lower() in ['mobi', 'azw3']:
if fmt.lower() in ['mobi', 'azw3', 'kfx']:
has_kindle_format = True
book_fmt = fmt
break
Expand Down
33 changes: 26 additions & 7 deletions parse_job.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
#!/usr/bin/env python3
import json
import re

from calibre.ebooks.mobi.reader.mobi6 import MobiReader
from calibre.ebooks.mobi.reader.mobi8 import Mobi8Reader
from calibre.utils.logging import default_log
from calibre_plugins.worddumb.database import (connect_ww_database,
create_lang_layer, match_word)
create_lang_layer, match_lemma)


def do_job(gui, books, abort, log, notifications):
Expand All @@ -16,16 +17,34 @@ def do_job(gui, books, abort, log, notifications):
if ll_conn is None:
continue

for (start, lemma) in parse_book(book_path, book_fmt):
match_word(start, lemma, ll_cur, ww_cur)
for (start, word) in parse_book(book_path, book_fmt):
match_lemma(start, word, ll_cur, ww_cur)

ll_conn.commit()
ll_conn.close()

ww_conn.close()


def parse_book(pathtoebook, book_fmt):
def parse_book(path_of_book, book_fmt):
if (book_fmt.lower() == 'kfx'):
return parse_kfx(path_of_book)
else:
return parse_mobi(path_of_book, book_fmt)


def parse_kfx(path_of_book):
from calibre_plugins.kfx_input.kfxlib import YJ_Book

book = YJ_Book(path_of_book)
data = book.convert_to_json_content()
for entry in json.loads(data)['data']:
for match_word in re.finditer('[a-zA-Z]+', entry['content']):
word = entry['content'][match_word.start():match_word.end()]
yield (entry['position'] + match_word.start(), word)


def parse_mobi(pathtoebook, book_fmt):
mobiReader = MobiReader(pathtoebook, default_log)
html = b''
offset = 1
Expand All @@ -45,7 +64,7 @@ def parse_book(pathtoebook, book_fmt):
for match_text in re.finditer(b">[^<>]+<", html):
text = html[match_text.start():match_text.end()]
# match each word inside text
for match_lemma in re.finditer(b"[a-zA-Z]+", text):
lemma = text[match_lemma.start():match_lemma.end()]
for match_word in re.finditer(b"[a-zA-Z]+", text):
word = text[match_word.start():match_word.end()]
start = match_text.start() + match_word.start()
yield (start, lemma.decode('utf-8'))
yield (start, word.decode('utf-8'))
6 changes: 3 additions & 3 deletions send_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ def send_files(self, job):
titles = [i.title for i in [self.mi]]
plugboards = self.gui.current_db.new_api.pref('plugboards', {})
self.device_manager.upload_books(
FunctionDispatcher(self.send_files), [
self.book_path], [book_name],
on_card=None, metadata=[self.mi], titles=titles, plugboards=plugboards)
FunctionDispatcher(self.send_files), [self.book_path],
[book_name], on_card=None, metadata=[self.mi],
titles=titles, plugboards=plugboards)
self.retry = True

def move_ll_to_device(self, book_path):
Expand Down

0 comments on commit 27a4b65

Please sign in to comment.