Skip to content

Commit

Permalink
Merge pull request #62 from hogewest/fix_onsen
Browse files Browse the repository at this point in the history
Fix onsen scraping and download
  • Loading branch information
yayugu committed Nov 10, 2020
2 parents 17eb586 + e3e4bf1 commit ad1cbc9
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 37 deletions.
23 changes: 12 additions & 11 deletions lib/onsen/downloading.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,26 @@ class Downloading
def download(program)
path = filepath(program)
Main::prepare_working_dir(CH_NAME)
succeed = Main::download(program.file_url, path)
unless succeed
arg = "\
-loglevel warning \
-y \
-i #{Shellwords.escape(program.file_url)} \
-vcodec libx264 -acodec copy -bsf:a aac_adtstoasc \
#{Shellwords.escape(path)}"

exit_status, output = Main::ffmpeg(arg)
unless exit_status.success? && output.blank?
Rails.logger.error "rec failed. onsen program:#{program.id}, exit_status:#{exit_status}, output:#{output}"
return false
end
if Settings.force_mp4 && /\.([a-zA-Z0-9]+?)$/.match(path)[1] == 'mp3'
mp4_path = path.gsub(/\.([a-zA-Z0-9]+?)$/,'.mp4')
Main::convert_ffmpeg_to_mp4_with_blank_video(path, mp4_path, program)
path = mp4_path
end
Main::move_to_archive_dir(CH_NAME, program.date, path)
true
end

def filepath(program)
url_path = URI.parse(program.file_url).path
ext = /\.([a-zA-Z0-9]+?)$/.match(url_path)[1]
date = program.date.strftime('%Y_%m_%d')
title = "#{date}_#{program.title}_#{program.personality}"
Main::file_path_working(CH_NAME, title, ext)
title = "#{date}_#{program.title}_#{program.number}_#{program.personality}"
Main::file_path_working(CH_NAME, title, 'mp4')
end
end
end
52 changes: 26 additions & 26 deletions lib/onsen/scraping.rb
Original file line number Diff line number Diff line change
@@ -1,64 +1,64 @@
require 'net/http'
require 'time'
require 'pp'
require 'digest/md5'
require 'moji'

module Onsen
class Program < Struct.new(:title, :number, :update_date, :file_url, :personality)
end

class Scraping
def initialize
@a = Mechanize.new
@a.user_agent_alias = 'Windows Chrome'
end

def main
get_program_list
end

def get_program_list
dom = get_dom()
parse_dom(dom).reject do |program|
programs = get_programs()
parse_programs(programs).reject do |program|
program == nil
end
end

def parse_dom(dom)
programs = dom.css('program')
programs.to_a.map do |program|
def parse_programs(programs)
programs.map do |program|
parse_program(program)
end
end

def parse_program(dom)
title = Moji.normalize_zen_han(dom.css('title').text)
number = dom.css('program_number').text
update_date_str = dom.css('up_date').text
def parse_program(program)
content = program['contents'].find do |content|
content['latest'] && !content['premium']
end
return nil if content.nil?

title = Moji.normalize_zen_han(program['title'])
number = Moji.normalize_zen_han(content['title'])
update_date_str = content['delivery_date']
if update_date_str == ""
return nil
end
update_date = Time.parse(update_date_str)

# well known file type: mp3, mp4(movie)
file_url = dom.css('iphone_url').text
file_url = content['streaming_url']
if file_url == ""
return nil
end

personality = Moji.normalize_zen_han(dom.css('actor_tag').text)
personality = program['performers'].map do |performer|
Moji.normalize_zen_han(performer['name'])
end.join(',')
Program.new(title, number, update_date, file_url, personality)
end

def get_dom()
url = "http://www.onsen.ag/app/programs.xml"
code_date = Time.now.strftime("%w%d%H")
code = Digest::MD5.hexdigest("onsen#{code_date}")
res = Net::HTTP.post_form(
URI.parse(url),
'code' => code,
'file_name' => "regular_1"
)
unless res.kind_of?(Net::HTTPSuccess)
Rails.logger.error "onsen scraping error: #{url}, #{res.code}"
end
Nokogiri::XML.parse(res.body)
def get_programs()
url = "https://www.onsen.ag/web_api/programs"
res = @a.get(url)
JSON.parse(res.body)
end
end
end

0 comments on commit ad1cbc9

Please sign in to comment.