/
cooklib.rb
173 lines (152 loc) · 3.64 KB
/
cooklib.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# coding: utf-8
require 'rubygems'
require 'open-uri'
require 'nokogiri'
require 'csv'
require 'json'
class Recipe
attr_accessor :title, :url, :count
def initialize(title, url)
@title = title
@url = url
@count = 0
end
def get_hatebu()
4.times do
begin
html = open("http://b.hatena.ne.jp/entry/jsonlite/?url=#{@url}").read
rescue
puts "はてぶ取得エラー...10秒待機 #{@url}"
sleep(10)
next
end
begin
if html != 'null'
obj = JSON.parse(html)
@count = obj["count"].to_i
end
rescue
puts "JSONパースエラー...1秒待機 #{@url} #{html}"
sleep(1)
next
end
break
end
end
def csv()
[@title, @url, @count]
end
end
class Cook
def initialize(opts)
@opts = opts
end
def get_recipe_url(page=1)
if @opts[:category]
"http://cookpad.com/category/#{@opts[:category]}?page=#{page}"
elsif @opts[:recipe]
URI.escape("http://cookpad.com/search/#{@opts[:recipe]}?order=date&page=#{page}")
end
end
def get_recipe_maxnum()
site_url = get_recipe_url()
html = open(site_url).read
doc = Nokogiri::HTML(html)
if @opts[:category]
page = doc.xpath('//span[@class="page_num"]')[0].text.strip
page[/ ([0-9,]+)/].strip.gsub(",", "").to_i
elsif @opts[:recipe]
page = doc.xpath('//div[@class="paginator"]/span')[0].text.strip
page = page[/\/ ([0-9,]+)/].strip
page[1..page.length].gsub(",", "").to_i
end
end
def get_recipe(page)
html = nil
3.times do
begin
site_url = get_recipe_url(page)
html = open(site_url).read
rescue
puts "レシピ取得エラー 10秒待機"
sleep(10)
next
end
break
end
doc = Nokogiri::HTML(html)
rows = doc.xpath('//div[@class="recipe-preview"]')
recipes = []
rows.each do |row|
a = row.xpath('.//a[@class="recipe-title font13 "]')[0]
title = a.text.strip
url = a[:href]
recipes.push(Recipe.new(title, url))
end
recipes
end
def get_recipes(max_num)
puts "解析開始"
recipes = []
for page in 1..max_num
puts "#{page} ページ目を解析中..."
recipes += get_recipe(page)
sleep(0.3)
end
puts "解析完了"
p recipes
recipes
end
def save_recipes(recipes, path)
puts "ファイル書き出し開始"
CSV.open(path, 'w') do |io|
recipes.each do |recipe|
io << recipe.csv
end
end
puts "書き出し完了"
end
def load_recipes(file)
puts "ファイル読み込み開始..."
recipes = []
CSV.foreach(file) do |row|
recipes.push(Recipe.new(row[0], row[1]))
end
puts "読み込み完了"
recipes
end
def get_recipes_hatebu(tmp)
puts "はてブ取得開始"
num = 1
recipes = []
tmp.each do |recipe|
recipe.get_hatebu
if recipe.count > 10
recipes.push(recipe)
end
puts "#{recipe.title} #{recipe.url} #{recipe.count} #{num}"
num += 1
end
puts "はてブ取得完了"
puts "ソート開始"
recipes.sort!{|a, b|
a.count <=> b.count
}.reverse!
puts "ソート完了"
recipes
end
def do()
now = Time.now.to_i
if @opts[:file]
recipes = load_recipes(@opts[:file])
else
# Cookpad
max_num = get_recipe_maxnum()
recipes = get_recipes(max_num)
save_recipes(recipes, "cookdo_#{now}.csv")
end
# Hatebu
recipes = get_recipes_hatebu(recipes)
save_recipes(recipes, "hatebudo_#{now}.csv")
end
end