Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Branch: master
Fetching contributors…

Cannot retrieve contributors at this time

550 lines (484 sloc) 21.526 kB
# websearch.rb
# Performs a search on the web using any of various search engines.
# By Pistos - irc.freenode.net#mathetes
# This is not a standalone Ruby script; it is meant to be run from Reby
# (http://purepistos.net/eggdrop/reby).
# Usage:
# !google [number of results] <search terms>
# !teoma [number of results] <search terms>
# !atw [number of results] <search terms>
# etc.
require "open-uri"
require "cgi"
require 'rubygems'
require 'hpricot'
begin
require 'rubyful_soup'
rescue Exception => e
require_gem 'rubyful_soup'
end
class WebSearch
VERSION = '1.1.5'
LAST_MODIFIED = '2009-05-07'
MAX_RESULTS = 5
ENGINE_GOOGLE = 0
ENGINE_TEOMA = 1
ENGINE_ALLTHEWEB = 2
ENGINE_ALTAVISTA = 3
ENGINE_WIKIPEDIA = 4
ENGINE_ETYMONLINE = 5
ENGINE_GEOSHELL_WIKI = 6
ENGINE_SYNONYM_COM = 7
ENGINE_BADPUNS_COM = 8
MAX_IRC_LINE_LENGTH = 400
GOOGLEFIGHT_VERBS = [
[ 1000.0, "completely DEMOLISHES" ],
[ 100.0, "utterly destroys" ],
[ 10.0, "destroys" ],
[ 5.0, "demolishes" ],
[ 3.0, "crushes" ],
[ 2.0, "shames" ],
[ 1.2, "beats" ],
[ 1.0, "barely beats" ],
]
def initialize
$reby.bind( "pub", "-", "!google", "google", "$websearch" )
$reby.bind( "pub", "-", "!teoma", "teoma", "$websearch" )
$reby.bind( "pub", "-", "!atw", "allTheWeb", "$websearch" )
$reby.bind( "pub", "-", "!alltheweb", "allTheWeb", "$websearch" )
$reby.bind( "pub", "-", "!alta", "altaVista", "$websearch" )
$reby.bind( "pub", "-", "!altavista", "altaVista", "$websearch" )
$reby.bind( "pub", "-", "!wiki", "wikipedia", "$websearch" )
$reby.bind( "pub", "-", "!wikip", "wikipedia", "$websearch" )
$reby.bind( "pub", "-", "!pedia", "wikipedia", "$websearch" )
$reby.bind( "pub", "-", "!wikipedia", "wikipedia", "$websearch" )
$reby.bind( "pub", "-", "!etym", "etymOnline", "$websearch" )
$reby.bind( "pub", "-", "!syn", "synonym", "$websearch" )
# $reby.bind( "pub", "-", "!pun", "badPuns", "$websearch" )
$reby.bind( "pub", "-", "!pun", "pun", "$websearch" )
$reby.bind( "pub", "-", "!googlefight", "googlefight", "$websearch" )
$reby.bind( "pub", "-", "!gf", "googlefight", "$websearch" )
$reby.bind( "pub", "-", "!meme", "meme", "$websearch" )
$reby.bind( 'pub', '-', '!gloss', 'gloss', '$websearch' )
$reby.bind( 'pub', '-', '!define', 'gloss', '$websearch' )
$reby.bind( 'pub', '-', '!dict', 'wordsmyth', '$websearch' )
$reby.bind( 'pub', '-', '?down', 'downforme', '$websearch' )
$reby.bind( 'pub', '-', 'down?', 'downforme', '$websearch' )
$reby.bind( 'pub', '-', '!down', 'downforme', '$websearch' )
$reby.bind( 'pub', '-', '!down?', 'downforme', '$websearch' )
$reby.bind( 'pub', '-', '?up', 'downforme', '$websearch' )
$reby.bind( 'pub', '-', 'up?', 'downforme', '$websearch' )
$reby.bind( 'pub', '-', '!up', 'downforme', '$websearch' )
$reby.bind( 'pub', '-', '!up?', 'downforme', '$websearch' )
$reby.bind( "pub", "-", "!docs", "searchGeoShellDocs", "$websearch" )
$reby.bind( "pub", "-", "!rubybook", "searchPickAxe", "$websearch" )
$reby.bind( "pub", "-", "!rubydoc", "searchRubyDoc", "$websearch" )
$reby.bind( "pub", "-", "!rw", "search_ramaze_wiki", "$websearch" )
$reby.bind( "pub", "-", "!ramaze", "search_ramaze_wiki", "$websearch" )
end
def searchSite( nick, userhost, handle, channel, args, site )
search( nick, channel, args.to_a.push( "site:#{site}" ) )
end
# -----------
# You can setup some custom searches here.
def searchPickAxe( nick, uhost, handle, chan, arg )
searchSite( nick, uhost, handle, chan, arg, "phrogz.net" )
end
def searchRubyDoc( nick, uhost, handle, chan, arg )
searchSite( nick, uhost, handle, chan, arg, "www.ruby-doc.org" )
end
# -----------
def google( nick, userhost, handle, channel, args )
search( nick, channel, args, ENGINE_GOOGLE )
end
def search_ramaze_wiki( nick, userhost, handle, channel, args )
search( nick, channel, args + " site:ramaze.net -site:darcs.ramaze.net -site:hg.ramaze.net -site:p.ramaze.net", ENGINE_GOOGLE )
end
def searchGeoShellDocs( nick, uhost, handle, chan, args )
search( nick, chan, args, ENGINE_GEOSHELL_WIKI )
end
def teoma( nick, userhost, handle, channel, args )
search( nick, channel, args, ENGINE_TEOMA )
end
def allTheWeb( nick, userhost, handle, channel, args )
search( nick, channel, args, ENGINE_ALLTHEWEB )
end
def altaVista( nick, userhost, handle, channel, args )
search( nick, channel, args, ENGINE_ALTAVISTA )
end
def wikipedia( nick, userhost, handle, channel, args )
search( nick, channel, args, ENGINE_WIKIPEDIA )
end
def etymOnline( nick, userhost, handle, channel, args )
search( nick, channel, args, ENGINE_ETYMONLINE )
end
def synonym( nick, userhost, handle, channel, args )
search( nick, channel, args, ENGINE_SYNONYM_COM )
end
def badPuns( nick, userhost, handle, channel, args )
getResults(
"http://www.badpuns.com/jokes.php?section=oneline&pos=random",
/<br><br>(.+?)<br><br>/m,
channel,
1
)
end
def pun( nick, userhost, handle, channel, args )
doc = Hpricot( open( "http://www.punoftheday.com/cgi-bin/randompun.pl" ) )
p = doc.search( '#main-content p' )[ 0 ].inner_text
put p, channel
end
def gloss( nick, userhost, handle, channel, args )
search( nick, channel, args, :search_glossary )
end
def wordsmyth( nick, userhost, handle, channel, args )
search( nick, channel, args, :search_wordsmyth )
end
def google_count( term_array )
terms = CGI.escape( term_array.join( ' ' ) )
doc = Hpricot( open( "http://www.google.com/search?q=#{terms}" ) )
doc.at( '#ssb//b[3]' ).inner_text.gsub( ',', '' ).to_i
end
def number_with_delimiter( number, delimiter="," )
number.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1#{delimiter}")
end
def googlefight( nick, userhost, handle, channel, args )
a = args.split( /,/ )
if a.size != 2
a = args.split( /\bv(?:ersu)?s(?:\.|\b)/ )
if a.size != 2
a = args.split( /\s+/, 2 )
end
end
if a.size != 2
put( "#{nick}: !googlefight <term(s)> [vs | ,] <term(s)>", channel )
else
a.collect! { |t| t.strip }
count1 = google_count( a[ 0 ] )
count2 = google_count( a[ 1 ] )
ratio1 = ( count2 != 0 ) ? count1.to_f / count2 : 99
ratio2 = ( count1 != 0 ) ? count2.to_f / count1 : 99
ratio = [ ratio1, ratio2 ].max
verb = GOOGLEFIGHT_VERBS.find { |x| ratio > x[ 0 ] }[ 1 ]
c1 = number_with_delimiter( count1 )
c2 = number_with_delimiter( count2 )
if count1 > count2
msg = "#{a[0]} #{verb} #{a[1]}! (#{c1} to #{c2})"
else
msg = "#{a[1]} #{verb} #{a[0]}! (#{c2} to #{c1})"
end
put( "#{nick}: #{msg}", channel )
end
end
def meme( nick, userhost, handle, channel, args )
n = 1
if args.to_s.to_i > 0
n = args.to_s.to_i
end
memes = open( "http://meme.boxofjunk.ws/moar.txt" ).readlines
memes[ 0...n ].each do |meme|
put meme, channel
end
end
def downforme( nick, userhost, handle, channel, args )
site = args.to_s.downcase[ /([a-z0-9.-]+)($|\/)/, 1 ]
doc = Hpricot( open( "http://downforeveryoneorjustme.com/#{site}" ) )
put( "#{nick}: [#{site}] " + doc.at( 'div#container' ).children.select{ |e| e.text? }.join( ' ' ).gsub( /\s+/, ' ' ).strip, channel )
end
def splitput( channel, text )
text.scan( /.{1,400}/ ) do |text_part|
$reby.putserv "PRIVMSG #{channel} :#{text_part}"
end
end
def put( message, destination = @channel )
$reby.putserv "PRIVMSG #{destination} :#{message}"
end
def search( nick, channel, args, engine = ENGINE_GOOGLE )
num_results = 1
@channel = channel
args_array = args.split( /\s+/ )
if args_array.length < 1
$reby.putserv "PRIVMSG #{channel} :!google [number of results] <search terms>"
return
end
if args_array[ 0 ].to_i.to_s == args_array[ 0 ]
# A number of results has been specified
num_results = args_array[ 0 ].to_i
if num_results > MAX_RESULTS
num_results = MAX_RESULTS
end
arg = args_array[ 1..-1 ].join( "+" )
unescaped_arg = args_array[ 1..-1 ].join( " " )
else
arg = args_array.join( "+" )
unescaped_arg = args_array.join( " " )
end
arg = CGI.escape( arg )
$reby.log "arg: #{arg}"
case engine
when ENGINE_GOOGLE
max_results = num_results
open( "http://www.google.com/search?q=#{ CGI.escape( args ) }&safe=active" ) do |html|
text = html.read
File.open( "websearch.last", "w" ) { |f| f.puts text }
counter = 0
text.scan /<a href="?([^"]+)" class=l.*?>(.+?)<\/a>/m do |match|
url, title = match
title.gsub!( /<.+?>/, "" )
ua = unescaped_arg.gsub( /-?site:\S+/, '' ).strip
put "[#{ua}]: #{url} - #{title}"
counter += 1
if counter >= max_results
break
end
end
if counter == 0
put "(no results)"
end
end
when ENGINE_TEOMA
getResults(
"http://s.teoma.com/search?q=#{ arg }",
/<div id="result".+?<a href=".+?u=([^"]+)"/m,
channel,
num_results
)
when ENGINE_ALLTHEWEB
getResults(
"http://www.alltheweb.com/search?q=#{ arg }",
/<span class="resURL">(.+?)[ <]/m,
channel,
num_results
)
when ENGINE_ALTAVISTA
getResults(
"http://www.altavista.com/web/results?q=#{ arg }",
/<span class=ngrn>(.+?)[ <]/m,
channel,
num_results
)
when ENGINE_ETYMONLINE
getResults(
"http://www.etymonline.com/index.php?term=#{ arg }",
/<dt(?: class="highlight")?>(.+?)<\/dd>/m,
channel,
num_results,
arg
)
when ENGINE_SYNONYM_COM
#if arg =~ /[a-zA-Z -]/
begin
open( "http://thesaurus.reference.com/search?q=#{ CGI.escape( unescaped_arg ) }" ) do |html|
text = html.read
num_printed = 0
text.scan( /Main Entry:(.+?)Source:/m ) do |entr|
entry = entr[ 0 ]
entry.gsub!( /<[^>]+?>/, "" )
entry.gsub!( /&nbsp;/, " " )
main_entry = entry[ /([a-zA-Z -]+)/, 1 ].strip
if main_entry.downcase == unescaped_arg.downcase
definition = entry[ /Definition:(.+)/, 1 ].strip
syns = entry[ /Synonyms:(.+)/, 1 ].strip
if num_printed < 2 or channel == "#mathetes"
dest = "PRIVMSG #{channel}"
else
dest = "NOTICE #{nick}"
if num_printed == 2
$reby.putserv "PRIVMSG #{channel} :(more results given in private to #{nick})"
end
end
$reby.putserv "#{dest} :[#{unescaped_arg}] #{definition} - #{syns}"
num_printed += 1
end
end
if num_printed == 0
$reby.putserv "PRIVMSG #{channel} :[syn #{unescaped_arg}] No synonyms found."
end
end
rescue Exception => e
$reby.putserv "PRIVMSG #{channel} :[syn #{unescaped_arg}] No synonyms found."
$reby.log( e.message + "\n" + e.backtrace.join( "\n\t" ) )
end
#else
#$reby.putserv "PRIVMSG #{channel} :[syn] Invalid input."
#end
when ENGINE_WIKIPEDIA
open( "http://en.wikipedia.org/w/wiki.phtml?search=#{ arg }" ) do |html|
soup = BeautifulSoup.new( html.read )
heading_tag = soup.find( 'h1', :attrs => { 'class' => 'firstHeading' } )
if heading_tag
title = heading_tag.string
case title
when 'Main_Page'
$reby.putserv "PRIVMSG #{channel} :No wikipedia entries found for '#{arg}'."
when 'Search'
count = -2
soup.find_all( 'a', :attrs => { 'href' => %r{^/wiki/} } ).each do |a|
if count >= 0
$reby.putserv "PRIVMSG #{channel} :[#{arg}] http://en.wikipedia.org#{a['href']}"
end
count += 1
if count >= num_results
break
end
end
else
$reby.putserv "PRIVMSG #{channel} :[#{arg}] http://en.wikipedia.org/wiki/#{title}"
end
end
end
when ENGINE_GEOSHELL_WIKI
open( "http://docs.geoshell.org/dosearchsite.action?searchQuery.queryString=#{ arg }" ) do |html|
text = html.read
counter = 0
text.scan( /<a href="(\/display[^"]+).+?<br\/>.+?(<span.+?)<\/td>/m ) do |url,desc|
d = desc.gsub( /<[^>]+>/, "" )
$reby.putserv "PRIVMSG #{channel} :[#{arg}] http://docs.geoshell.org#{url} - #{d}"
counter += 1
if counter >= max_results
break
end
end
if counter == 0
$reby.putserv "PRIVMSG #{channel} :[#{args}] No results."
end
end
when :search_glossary
index = num_results
open( "http://www.google.com/search?q=define%3A+#{arg}" ) do |html|
text = html.read
if text =~ /No definitions were found for/
$reby.putserv "PRIVMSG #{channel} :No definitions found for #{arg}."
else
definition_text = text[ /<ul.*?>(.+)<\/ul>/m, 1 ]
if definition_text != nil
definitions = definition_text.scan( /li>\s*([^<>]+?)</ )
counter = 1
definitions.each do |defn|
if index <= counter
$reby.putserv "PRIVMSG #{channel} :" + CGI.unescapeHTML( defn.to_s )
break
end
counter += 1
end
end
end
end
when :search_wordsmyth
open( "http://www.wordsmyth.net/live/home.php?script=search&matchent=#{arg}&matchtype=exact" ) do |html|
parse_wordsmyth( html.read, channel )
end
end
end
def parse_wordsmyth( text, channel )
soup = BeautifulSoup.new( text )
not_found_p = soup.find( Proc.new { |el|
el.respond_to?( :name ) &&
el.name == 'p' &&
el.find_text( /Sorry, we could not find/ )
} )
if not_found_p
suggestions = []
not_found_p.find_all( 'a' ).each do |a|
suggestions << a.string
end
output = '(no results)'
if not suggestions.empty?
output << " Close matches: #{suggestions.join( ', ' )}"
end
splitput channel, output
return
end
maintable = soup.find( 'table', :attrs => { 'cellspacing'=>'0', 'border'=>"0", 'cellpadding'=>"2", 'width'=>"100%", 'bgcolor' => nil } )
wordtag = maintable.find( 'div', { :attrs => { 'class' => 'headword' } } )
if wordtag
word = wordtag.contents[ 0 ]
end
# Iterate through all <tr>s, find relevant bits.
output = ""
maintable.next_parsed_items do |tr|
next if not tr.respond_to?( :name ) or tr.name != 'tr'
main_td = tr.find( 'td', :attrs => { 'width' => '70%' } )
middle_td = tr.find( 'td', :attrs => { 'width' => '5%', 'valign' => 'baseline' } )
# Part of Speech
if tr[ 'bgcolor' ] == '#DDDDFF'
pos = main_td.span.string
if not output.empty?
splitput channel, output
end
output = "#{word} - [#{pos}]"
end
if tr[ 'bgcolor' ] == '#FFFFFF'
# Pronunciation
prontag = tr.find( 'div', :attrs => { 'class' => 'pron' } )
if prontag
syllabification = []
prontag.each do |syllable|
if syllable.respond_to? :string and syllable.string
syllable_class = syllable[ 'class' ]
if syllable_class
stress_level = syllable_class[ /(\d)/, 1 ].to_i
case stress_level
when 1
stress = "'"
when 2
stress = '"'
else
stress = ''
end
syllabification << stress + syllable.string
else
syllabification << syllable.string
end
end
end
output << " (" + syllabification.join( ' ' ) + ")"
end
# Definition
if main_td
def_span = main_td.find( 'span', :attrs => { 'style' => 'font-weight: normal;' } )
if def_span
output << " " + middle_td.span.string + " " + def_span.string
end
end
end
end
if not output.empty?
splitput channel, output
end
end
def getResults( search_url, regexp, channel, max_results, search_term = "" )
open( search_url ) do |html|
text = html.read
counter = 0
text.scan regexp do |url|
case url
when Array
url.collect! do |u|
u.gsub( /\n/m, " " ).gsub( /<.+?>/, "" )
end
when String
u.gsub!( /\n/m, " " )
u.gsub!( /<.+?>/, "" )
end
output = CGI.unescapeHTML( url.to_s )
while output.length > MAX_IRC_LINE_LENGTH
segment = output[ 0...MAX_IRC_LINE_LENGTH ]
output = output[ MAX_IRC_LINE_LENGTH..-1 ]
st = search_term.gsub( /-?site:\S+/, '' )
$reby.puthelp "PRIVMSG #{channel} :[#{st}] #{segment}"
end
$reby.puthelp "PRIVMSG #{channel} :#{output}"
counter += 1
if counter >= max_results
break
end
end
if counter == 0
st = search_term.gsub( /-?site:\S+/, '' )
$reby.puthelp "PRIVMSG #{channel} :[#{st}] No results found."
end
end
end
end
$websearch = WebSearch.new
Jump to Line
Something went wrong with that request. Please try again.