Skip to content

Commit

Permalink
add missing files
Browse files Browse the repository at this point in the history
  • Loading branch information
Johannes Kroll committed Feb 25, 2014
1 parent e28821b commit 7808052
Show file tree
Hide file tree
Showing 3 changed files with 169 additions and 0 deletions.
38 changes: 38 additions & 0 deletions filtermodules/geocoords.py
@@ -0,0 +1,38 @@
#!/usr/bin/python
# -*- coding:utf-8 -*-
import time
from tlgflaws import *
from utils import *


##
class FGeotags(FlawFilter):
shortname= 'Geotags'
label= _('Geotags')
description= _('Get pages with Geotags')

# our action class
class Action(TlgAction):
def execute(self, resultQueue):
cur= getCursors()[self.wiki]
format_strings = ' OR '.join(['page.page_id=%s'] * len(self.pageIDs))
cur.execute("""SELECT page.page_id, page.page_namespace, page.page_title, page.page_restrictions, page.page_counter,
page.page_is_new, page.page_random, page.page_touched, page.page_latest, page.page_len,
geo_tags.gt_lat, geo_tags.gt_lon
FROM page
JOIN geo_tags ON geo_tags.gt_page_id=page.page_id
WHERE (page.page_namespace=0 OR page.page_namespace=6) AND (%s)""" % format_strings, self.pageIDs)
res= cur.fetchall()

for row in res:
filtertitle= '<a target="_blank" href="http://www.openstreetmap.org/?mlat=%s&mlon=%s#map=14/%s/%s">%s,%s</a>' % (row['gt_lat'], row['gt_lon'], row['gt_lat'], row['gt_lon'], row['gt_lat'], row['gt_lon'])
#~ filtertitle= '%s,%s' % (row['gt_lat'], row['gt_lon'])
resultQueue.put(TlgResult(self.wiki, row, self.parent, filtertitle))

def getPreferredPagesPerAction(self):
return 50

def createActions(self, language, pages, actionQueue):
actionQueue.put(self.Action(self, language, pages))

FlawFilters.register(FGeotags)
70 changes: 70 additions & 0 deletions filtermodules/pagehits.py
@@ -0,0 +1,70 @@
#!/usr/bin/python
# -*- coding:utf-8 -*-
import time
import datetime
import requests
from tlgflaws import *
from utils import *

# snippet from http://stackoverflow.com/questions/1265665/python-check-if-a-string-represents-an-int-without-using-try-except ...
def isInt_str(v):
v = str(v).strip()
return v=='0' or (v if v.find('..') > -1 else v.lstrip('-+').rstrip('0').rstrip('.')).isdigit()

##
class FPagehits(FlawFilter):
shortname= 'Pagehits'
label= _('Page Hits')
description= _('Sort articles by hit count. Uses data from stats.grok.se from previous month.')

@staticmethod
def makeGrokSession():
grokSession= requests.Session()
# if they ever need to complain about our requests, they'll know where to look:
grokSession.headers.update({ 'User-Agent': 'Article List Generator (http://tools.wmflabs.org/render/stools/alg'})
return grokSession

@cache_region('disklongterm')
def getHitcount(self, year, month, title):
try:
# the requests library is totally thread-safe, except when it isn't.
# so we need to work around by creating a separate session for each worker thread
session= CachedThreadValue("grokSession", self.makeGrokSession)
res= session.get('http://stats.grok.se/json/de/%s%02d/%s' % (year, int(month), title))
if res.status_code==200:
json= res.json()
total= 0
for day in json['daily_views']:
total+= int(json['daily_views'][day])
return total
except Exception as ex:
return str(ex) # ....
return '?'

# our action class
class Action(TlgAction):
def execute(self, resultQueue):
cur= getCursors()[self.wiki]
format_strings = ' OR '.join(['page_id=%s'] * len(self.pageIDs))
cur.execute("""SELECT page_id, page_namespace, page_title, page_restrictions, page_counter, page_is_redirect,
page_is_new, page_random, page_touched, page_latest, page_len
FROM page WHERE (page_namespace=0 OR page_namespace=6) AND page_is_redirect=0 AND (%s)""" % format_strings, self.pageIDs)
res= cur.fetchall()

lastmonth= datetime.datetime.fromtimestamp(time.time())
statyear= lastmonth.year
statmonth= lastmonth.month

for row in res:
count= self.parent.getHitcount(statyear, statmonth, row['page_title'])
filtertitle= 'count: %s' % count
sortkey= -int(count) if isInt_str(count) else 1
resultQueue.put(TlgResult(self.wiki, row, self.parent, filtertitle, sortkey= sortkey))

def getPreferredPagesPerAction(self):
return 50

def createActions(self, language, pages, actionQueue):
actionQueue.put(self.Action(self, language, pages))

FlawFilters.register(FPagehits)
61 changes: 61 additions & 0 deletions geobbox.py
@@ -0,0 +1,61 @@
# code to create a bounding box from a center geocoord and a 'radius' (half-side of square) in km.
# this is copypasta from SO question at http://stackoverflow.com/questions/3182260/python-geocode-filtering-by-distance
# answered by http://stackoverflow.com/users/84270/john-machin

from math import sin, cos, asin, sqrt, degrees, radians

Earth_radius_km = 6371.0
RADIUS = Earth_radius_km

def haversine(angle_radians):
return sin(angle_radians / 2.0) ** 2

def inverse_haversine(h):
return 2 * asin(sqrt(h)) # radians

def distance_between_points(lat1, lon1, lat2, lon2):
# all args are in degrees
# WARNING: loss of absolute precision when points are near-antipodal
lat1 = radians(lat1)
lat2 = radians(lat2)
dlat = lat2 - lat1
dlon = radians(lon2 - lon1)
h = haversine(dlat) + cos(lat1) * cos(lat2) * haversine(dlon)
return RADIUS * inverse_haversine(h)

def bounding_box(lat, lon, distance):
# Input and output lats/longs are in degrees.
# Distance arg must be in same units as RADIUS.
# Returns (dlat, dlon) such that
# no points outside lat +/- dlat or outside lon +/- dlon
# are <= "distance" from the (lat, lon) point.
# Derived from: http://janmatuschek.de/LatitudeLongitudeBoundingCoordinates
# WARNING: problems if North/South Pole is in circle of interest
# WARNING: problems if longitude meridian +/-180 degrees intersects circle of interest
# See quoted article for how to detect and overcome the above problems.
# Note: the result is independent of the longitude of the central point, so the
# "lon" arg is not used.
dlat = distance / RADIUS
dlon = asin(sin(dlat) / cos(radians(lat)))
return degrees(dlat), degrees(dlon)

if __name__ == "__main__":

# Examples from Jan Matuschek's article

def test(lat, lon, dist):
print "test bounding box", lat, lon, dist
dlat, dlon = bounding_box(lat, lon, dist)
print "dlat, dlon degrees", dlat, dlon
print "lat min/max rads", map(radians, (lat - dlat, lat + dlat))
print "lon min/max rads", map(radians, (lon - dlon, lon + dlon))

print "liberty to eiffel"
print distance_between_points(40.6892, -74.0444, 48.8583, 2.2945) # about 5837 km
print
print "calc min/max lat/lon"
degs = map(degrees, (1.3963, -0.6981))
test(*degs, dist=1000)
print
degs = map(degrees, (1.3963, -0.6981, 1.4618, -1.6021))
print degs, "distance", distance_between_points(*degs) # 872 km

0 comments on commit 7808052

Please sign in to comment.