-
Notifications
You must be signed in to change notification settings - Fork 6
/
models.py
86 lines (70 loc) · 3.37 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# coding=utf-8
"""
everblog.models
~~~~~~~~~~~~~~~
quick_orm database models
"""
import re, json, datetime, anydbm
from quick_orm.core import Database
from sqlalchemy import Column, String, Text, DateTime, func, Integer, Enum
from toolkit_library.text_converter import TextConverter
from toolkit_library.web_client import WebClient
from toolkit_library.encryption import Encryption
from everblog import db, app
evernote_data_pattern = re.compile('(?<=Evernote\.WebClient\.Note = )\{.+?\}(?=;\s+?</script>)')
chinese_character_pattern = re.compile(u'[\u4e00-\u9fa5]')
img_url_pattern = re.compile(' src="(https?://.+?\.(?:png|jpg|jpeg|gif))"')
__metaclass__ = Database.DefaultMeta
class Article:
"""Represents an article"""
uid = Column(Integer, nullable = False, unique = True, index = True)
created = Column(DateTime, nullable = False, unique = True, index = True)
updated = Column(DateTime, nullable = False)
published = Column(DateTime, nullable = False, default = func.now())
evernote_url = Column(String(128), nullable = False, unique = True)
title = Column(String(128), nullable = False)
content = Column(Text, nullable = False)
def synchronize(self):
data = WebClient.download_binary(self.evernote_url).decode('UTF-8')
data = evernote_data_pattern.search(data).group()
dict_ = json.loads(data.replace('\<', '<').replace('\>', '>'))
self.title = dict_['title']
self.content = dict_['content']
image_urls = img_url_pattern.findall(self.content)
if image_urls:
try:
dbm = anydbm.open(app.config['IMAGE_CACHE'], 'c')
for img_url in image_urls:
key = Encryption.computer_hashcode(img_url)
value = WebClient.download_binary(img_url)
dbm[key] = value
self.content = self.content.replace(img_url, '/image/{0}/'.format(key))
finally:
dbm.close()
self.uid = dict_['created'] / 1000
self.created = datetime.datetime.utcfromtimestamp(self.uid)
self.updated = datetime.datetime.utcfromtimestamp(dict_['updated'] / 1000)
self.tags = []
if 'tagNames' in dict_:
for tagName in dict_['tagNames']:
tag = db.session.query(Tag).filter_by(name = tagName).first()
tag = tag if tag else Tag(name = tagName)
self.tags.append(tag)
class BlogEntry(Article):
"""Represents a note in evernote or a blog entry in everblog."""
snippet = Column(String(128), nullable = False)
lang = Column(Enum('en', 'cn', name = 'language_code'), nullable = False, index = True, default = 'en')
def synchronize(self):
super(BlogEntry, self).synchronize()
text_content = TextConverter.html_to_text(self.content)
self.snippet = text_content[:128]
if float(len(chinese_character_pattern.findall(text_content))) / len(text_content) > 0.1:
self.lang = 'cn'
class Page(Article):
"Represents a page in the website, such as the about page"
order = Column(Integer, nullable = False, default = 1)
@Database.many_to_many(Article)
class Tag:
"""represents a tag in Evernote"""
name = Column(String(64), nullable = False, index = True, unique = True)
Database.register()