Skip to content
Permalink
Browse files

feed-normalizerに依存しないように変更

標準のrssライブラリを使う
RSS 0.9x/2.0とAtomに対応
Entryにmodifiedを追加
RSSとAtomでsubparts_imageに対応,Atomでは説明文のimgタグを使用
Entryのdescriptionに画像のURLを含めないように変更
  • Loading branch information
yuntan committed May 4, 2019
1 parent f73bce6 commit 6bc04738dd415ae762e870296b775cea32583496
Showing with 72 additions and 66 deletions.
  1. +0 −6 Gemfile
  2. +70 −43 fetch.rb
  3. +2 −17 model/entry.rb

This file was deleted.

113 fetch.rb
@@ -1,9 +1,7 @@
# frozen_string_literal: true

# for `Time.rfc2822` and `Time.parse`
require 'time'
require 'feed-normalizer'
require 'open-uri'
require 'rss'

require_relative 'model/site'
require_relative 'model/entry'
@@ -18,18 +16,37 @@
on_rss_fetch do
UserConfig[:rss_sources].each_with_index do |url, i|
SerialThread.new do
notice "processing RSS source #{i}"

feed = FeedNormalizer::FeedNormalizer.parse open(url, HTTP_OPTIONS)
notice "processing RSS source #{i} (#{url})"

begin
feed.clean!
rescue ArgumentError # fix for GitHub issue #1
warn $!
source = open(url, HTTP_OPTIONS)
rescue OpenURI::HTTPError
warn "Failed to open #{url}"
next
end

item = begin
RSS::Parser.parse source
rescue RSS::InvalidRSSError
warn "Invalid source. Parse without validation."
RSS::Parser.parse source, false # parse without validation
end

case item
when RSS::RDF
notice "RSS source #{i} is RSS 1.0"
warn "RSS 1.0 not supported."
when RSS::Rss
notice "RSS source #{i} is RSS 0.9x/2.0"
when RSS::Atom::Feed
notice "RSS source #{i} is Atom"
else
warn "Invalid source. skipping."
next
end

site = get_site feed
entries = feed.entries.map { |entry| get_entry site, entry }
site = get_site item
entries = item.items.map { |entry| get_entry site, entry }

notice "got #{entries.length} entries for source #{i}"

@@ -39,42 +56,52 @@
end
end

def get_site(feed)
Plugin::RSS::Site.new(
title: feed.title,
perma_link: URI.parse(feed.url),
# image: feed.image
)
def get_site(item)
case item
when RSS::Rss
Plugin::RSS::Site.new(
title: item.channel.title,
perma_link: URI.parse(item.channel.link),
)
when RSS::Atom::Feed
Plugin::RSS::Site.new(
title: item.title.content,
perma_link: URI.parse(item.link.href),
)
end
end

def get_entry(site, entry)
Plugin::RSS::Entry.new(
site: site,
title: entry.title,
author: entry.authors.first,
content: entry.content,
created: get_created(entry),
perma_link: URI.parse(entry.urls.first)
)
end

def get_created(entry)
if !entry.date_published.nil?
date = entry.date_published
elsif !entry.last_updated.nil?
date = entry.last_updated
else
return Time.now
case entry
when RSS::Rss::Channel::Item
pub_date = entry.pubDate&.localtime
Plugin::RSS::Entry.new(
site: site,
title: entry.title,
created: pub_date || Time.now,
modified: pub_date || Time.now,
perma_link: URI.parse(entry.link),
).tap do |e|
e[:subparts_images] = [URI.parse(entry.enclosure.url)] if entry.enclosure
end
when RSS::Atom::Feed::Entry
published = entry.published&.content&.localtime
updated = entry.updated&.content&.localtime
Plugin::RSS::Entry.new(
site: site,
title: entry.title.content,
created: published || updated || Time.now,
modified: updated || published || Time.now,
perma_link: URI.parse(entry.link.href),
).tap do |e|
content = entry.content&.content
e[:subparts_images] = get_image_urls content if content
end
end
end

return date.localtime if date.is_a? Time

begin
Time.rfc2822(date).localtime
rescue ArgumentError
Time.parse(date).localtime
rescue ArgumentError
Time.now
end
def get_image_urls(html)
doc = Nokogiri::HTML html
doc.search('img').map { |img| img['src'] }
end
end
@@ -13,10 +13,9 @@ class Entry < Diva::Model
register :rss_entry, name: 'RSS entry', timeline: true

field.has :site, Site, required: true
field.string :author
field.string :title, required: true # for basis model
field.string :content, required: true
field.time :created, required: true
field.time :modified, required: true
# should be implemented for message model
field.uri :perma_link, required: true

@@ -35,18 +34,9 @@ def user

# should be implemented for message model
def description
@description ||=
"#{dehtmlize title} #{get_image_urls(content).join(' ')}".strip
@description ||= dehtmlize title
end

# for mikutter-subparts_image plugin
# def subparts_images
# return @_subparts_images if @_subparts_images
#
# doc = Nokogiri::HTML html
# @_subparts_images = doc.search('img').map { |img| img['src'] }
# end

# * replace <a> tags with plain text
# * remove HTML tags
def dehtmlize(html)
@@ -57,11 +47,6 @@ def dehtmlize(html)

doc.text.delete("\n").strip
end

def get_image_urls(html)
doc = Nokogiri::HTML html
doc.search('img').map { |img| img['src'] }
end
end
end
end

0 comments on commit 6bc0473

Please sign in to comment.
You can’t perform that action at this time.