Skip to content

Commit b8b7a56

Browse files
author
Umputun
committed
init
0 parents  commit b8b7a56

File tree

9 files changed

+267
-0
lines changed

9 files changed

+267
-0
lines changed

Dockerfile

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# feed master
2+
FROM debian:wheezy
3+
MAINTAINER Umputun <feedmaster@umputun.com>
4+
5+
6+
RUN \
7+
build_deps='binutils build-essential bzip2 cpp cpp-4.7 dpkg-dev fakeroot file g++ g++-4.7 gcc gcc-4.7' && \
8+
apt-get update && apt-get upgrade -y --no-install-recommends && \
9+
apt-get install -y python-pip && \
10+
apt-get autoremove -y && apt-get clean && \
11+
pip install feedparser plumbum pymongo && \
12+
apt-get purge -y --auto-remove $build_deps && \
13+
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
14+
rm -rf /var/lib/{apt,dpkg,cache,log}
15+
16+
RUN \
17+
groupadd -r feedmaster && useradd -r -g feedmaster feedmaster && \
18+
mkdir /srv/data && \
19+
chown -R feedmaster:feedmaster /srv
20+
21+
VOLUME ["/srv/config"]
22+
23+
USER feedmaster
24+
ADD src/feed-master.py /srv/feed-master.py
25+
ADD exec.sh /srv/exec.sh
26+
ADD src/config/__init__.py /srv/config/__init__.py
27+
28+
WORKDIR /srv
29+
ENTRYPOINT ["/srv/exec.sh"]

Makefile

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
build:
2+
docker build -t umputun/feed-master .
3+
4+
run:
5+
-docker rm -f feed-master
6+
docker run -d --name feed-master -v $(shell pwd)/config:/srv/config \
7+
-p 8099:8099 --link mongodb:mongodb umputun/feed-master
8+
9+
run-with-mongo:
10+
-docker run -d --name=mongodb -p 27017:27017 -v /data/mongo:/data/db mongo:latest mongod --smallfiles --noprealloc
11+
-docker rm -f feed-master
12+
docker run -d --name feed-master -v $(shell pwd)/config:/srv/config \
13+
-p 8099:8099 --link mongodb:mongodb umputun/feed-master
14+
15+
16+
reload:
17+
docker restart feed-master
18+
19+
.PHONY: build run reload
20+

config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
config/config.py

config/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
__author__ = 'umputun'

config/config.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# coding=utf-8
2+
3+
feeds = [
4+
("Владимир Кара-Мурза", "http://www.echo.msk.ru/programs/graniweek/rss-audio.xml"),
5+
("Народ против", "http://www.echo.msk.ru/programs/opponent/rss-audio.xml"),
6+
("Ганапольское", "http://www.echo.msk.ru/programs/ganapolskoe_itogi/rss-audio.xml"),
7+
("Без посредников", "http://www.echo.msk.ru/programs/nomed/rss-audio.xml"),
8+
("Точка", "http://www.echo.msk.ru/programs/tochka/rss-audio.xml"),
9+
("Кейс", "http://www.echo.msk.ru/programs/keys/rss-audio.xml"),
10+
("Блог-аут", "http://www.echo.msk.ru/programs/blogout1/rss-audio.xml"),
11+
("Альбац", "http://www.echo.msk.ru/contributors/7/rss-audio.xml"),
12+
("Код доступа", "http://www.echo.msk.ru/programs/code/rss-audio.xml"),
13+
("Цена Победы", "http://www.echo.msk.ru/programs/victory/rss-audio.xml"),
14+
("Все так", "http://www.echo.msk.ru/programs/vsetak/rss-audio.xml"),
15+
("Не так", "http://www.echo.msk.ru/programs/netak/rss-audio.xml"),
16+
("В круге Света", "http://echo.msk.ru/programs/sorokina/rss-audio.xml"),
17+
("Суть событий", "http://www.echo.msk.ru/programs/sut/rss-audio.xml"),
18+
("Попутчики", "http://www.echo.msk.ru/programs/poputchiki/rss-audio.xml"),
19+
("Русский бомбардир", "http://www.echo.msk.ru/programs/orekh_osin/rss-audio.xml"),
20+
("Дилентанты", "http://echo.msk.ru/programs/Diletanti/rss-audio.xml"),
21+
("Цена революции", "http://echo.msk.ru/programs/cenapobedy/rss-audio.xml"),
22+
("Большой дозор", "http://echo.msk.ru/programs/dozor/rss-audio.xml"),
23+
("Без дураков", "http://echo.msk.ru/programs/korzun/rss-audio.xml"),
24+
("Особое мнение", "http://echo.msk.ru/programs/personalno/rss-audio.xml"),
25+
("2014", "http://www.echo.msk.ru/programs/year2014/rss-audio.xml"),
26+
# ("Разворот", "http://www.echo.msk.ru/programs/razvorot/rss-audio.xml"),
27+
("Интервью", "http://www.echo.msk.ru/programs/beseda/rss-audio.xml"),
28+
("48 минут", "http://www.echo.msk.ru/programs/48minut/rss-audio.xml"),
29+
("Выбор ясен", "http://www.echo.msk.ru/programs/vyboryasen/rss-audio.xml"),
30+
("Ходорковский", "http://echo.msk.ru/guests/369/rss-audio.xml"),
31+
]
32+
33+
settings = {
34+
35+
"info": {
36+
"title": u"Эхо Москвы",
37+
"description": u"Правильный, комбинированный фид избранных передач (версия 2)",
38+
"link": "http://echo.msk.ru"
39+
},
40+
"language": "ru-ru",
41+
"max_items_per_feed": 5,
42+
"max_items_total": 100
43+
}

exec.sh

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#!/bin/bash
2+
3+
cd /srv/data
4+
python -m SimpleHTTPServer 8099 &
5+
6+
while true
7+
do
8+
/srv/feed-master.py update -m mongodb
9+
/srv/feed-master.py generate -m mongodb -f /srv/data/feed.xml
10+
sleep 600
11+
done

src/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
__author__ = 'umputun'

src/config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../config

src/feed-master.py

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
#!/usr/bin/env python
2+
3+
__author__ = 'Umputun'
4+
5+
import logging
6+
import sys
7+
import time
8+
from datetime import datetime
9+
from email import utils
10+
import os
11+
import socket
12+
import feedparser
13+
from plumbum import cli
14+
import pymongo
15+
16+
from config.config import feeds
17+
from config.config import settings
18+
19+
20+
root = logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s - %(message)s', stream=sys.stdout)
21+
NET_TIMEOUT = 15
22+
23+
24+
class App(cli.Application):
25+
'''Feed-master utility'''
26+
27+
PROGNAME = "feed-master"
28+
VERSION = "1.1"
29+
30+
31+
@App.subcommand("update")
32+
class UpdateItems(cli.Application):
33+
'''Update mongo from sources'''
34+
35+
mongo_host = "127.0.0.1:27017"
36+
37+
@cli.switch("--dbg", help="enable debug")
38+
def set_log_level(self):
39+
logging.root.setLevel(logging.DEBUG)
40+
41+
@cli.switch(["--mongo", "-m"], str, help="set mongo server")
42+
def set_mongo(self, mongo_host):
43+
logging.debug("set mongo=%s", mongo_host)
44+
self.mongo_host = mongo_host
45+
46+
def main(self):
47+
logging.info("feed loading initiated")
48+
socket.setdefaulttimeout(NET_TIMEOUT)
49+
50+
if self.mongo_host.find(":") != -1:
51+
(mongo_ip, mongo_port) = self.mongo_host.split(":")
52+
mongo_client = pymongo.MongoClient(mongo_ip, int(mongo_port))
53+
else:
54+
mongo_client = pymongo.MongoClient(self.mongo_host)
55+
56+
db = mongo_client["feed_master"]["feed"]
57+
db.create_index([("published", -1)])
58+
59+
new_items = 0
60+
for (name, feed) in feeds:
61+
logging.debug("loading %s - %s", name, feed)
62+
d = feedparser.parse(feed)
63+
last_items = d['entries'][:settings['max_items_per_feed']]
64+
for item in last_items:
65+
enclosure = [x for x in item['links'] if x['rel'] == 'enclosure'][0]
66+
title = item['title']
67+
description = item['description']
68+
if db.find_one({"_id": enclosure['href']}):
69+
logging.debug("already here, skip " + enclosure['href'])
70+
else:
71+
pub_dtime = datetime.fromtimestamp(time.mktime(item['published_parsed']))
72+
if pub_dtime > datetime.now() or abs((pub_dtime - datetime.now()).total_seconds()) < 2 * 60 * 60:
73+
# for items in the future or close enough no now - reset timestamp
74+
logging.debug("timestamp adjusted to now for %s - %s", title, pub_dtime)
75+
pub_dtime = datetime.now()
76+
mrec = {"_id": enclosure['href'], 'enclosure': enclosure, 'title': title,
77+
'description': description, 'published': pub_dtime}
78+
db.save(mrec)
79+
logging.info("new item %s %s", title, pub_dtime)
80+
new_items += 1
81+
logging.info("feed loading completed, new items=%d", new_items)
82+
83+
84+
def format_datetime_rfc2822(dt):
85+
return utils.formatdate(time.mktime(dt.timetuple()))
86+
87+
88+
@App.subcommand("generate")
89+
class GenerateFeed(cli.Application):
90+
'''Generate RSS feed'''
91+
92+
mongo_host = "127.0.0.1:27017"
93+
feed_file = "feed.xml"
94+
95+
@cli.switch("--dbg", help="enable debug")
96+
def set_log_level(self):
97+
logging.root.setLevel(logging.DEBUG)
98+
99+
@cli.switch(["--mongo", "-m"], str, help="set mongo server")
100+
def set_mongo(self, mongo_host):
101+
logging.debug("set mongo=%s", mongo_host)
102+
self.mongo_host = mongo_host
103+
104+
@cli.switch(["--file", "-f"], str, help="set feed file")
105+
def set_feed_file(self, feed_file):
106+
logging.info("set feed file=%s", feed_file)
107+
self.feed_file = feed_file
108+
109+
def main(self):
110+
logging.info("feed generation initiated")
111+
112+
if self.mongo_host.find(":") != -1:
113+
(mongo_ip, mongo_port) = self.mongo_host.split(":")
114+
mongo_client = pymongo.MongoClient(mongo_ip, int(mongo_port))
115+
else:
116+
mongo_client = pymongo.MongoClient(self.mongo_host)
117+
db = mongo_client["feed_master"]["feed"]
118+
119+
items = db.find().sort("published", -1).limit(settings['max_items_total'])
120+
last_date = format_datetime_rfc2822(db.find_one(sort=[("published", -1)])['published'])
121+
total_items = 0
122+
with open(self.feed_file + ".tmp", "w") as feed_file:
123+
124+
rss_header = """
125+
<rss xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:content="http://purl.org/rss/1.0/modules/content/"
126+
xmlns:media="http://search.yahoo.com/mrss/" xmlns:yt="http://gdata.youtube.com/schemas/2007"
127+
xmlns:atom="http://www.w3.org/2005/Atom" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" version="2.0">
128+
\n<channel>\n
129+
"""
130+
feed_file.write(rss_header)
131+
feed_file.write("<title>%s</title>\n" % settings['info']['title'].encode('utf-8'))
132+
feed_file.write("<description>%s</description>\n" % settings['info']['description'].encode('utf-8'))
133+
feed_file.write("<link>%s</link>\n" % settings['info']['link'].encode('utf-8'))
134+
feed_file.write("<pubDate>%s</pubDate>\n" % last_date)
135+
feed_file.write("<language>%s</language>\n" % settings['language'])
136+
feed_file.write("<generator>feed-master by Umputun</generator>\n")
137+
138+
for item in items:
139+
try:
140+
feed_file.write("<item>\n")
141+
feed_file.write("<title>%s</title>\n" % item['title'].encode('utf-8'))
142+
feed_file.write("<description>%s</description>\n" % item['description'].encode('utf-8'))
143+
feed_file.write("<link>%s</link>\n" % item['_id'].encode('utf-8'))
144+
feed_file.write("<pubDate>%s</pubDate>\n" % format_datetime_rfc2822(item['published']))
145+
feed_file.write("<guid>%s</guid>\n" % item['_id'].encode('utf-8'))
146+
feed_file.write('<enclosure length="%s" type="audio/mpeg" url="%s"/>' %
147+
(item['enclosure']['length'], item['enclosure']['href']))
148+
feed_file.write("</item>\n")
149+
total_items += 1
150+
except Exception, e:
151+
logging.warn("failed to write %s, error=%s", item, e)
152+
153+
feed_file.write('</channel>\n</rss>\n')
154+
155+
os.rename(self.feed_file + '.tmp', self.feed_file)
156+
logging.info("feed generation completed, total feeds=%d, total items=%d", len(feeds), total_items)
157+
158+
159+
if __name__ == "__main__":
160+
App.run()

0 commit comments

Comments
 (0)