forked from kroll-j/tlgbackend
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Johannes Kroll
committed
Feb 25, 2014
1 parent
e28821b
commit 7808052
Showing
3 changed files
with
169 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
#!/usr/bin/python | ||
# -*- coding:utf-8 -*- | ||
import time | ||
from tlgflaws import * | ||
from utils import * | ||
|
||
|
||
## | ||
class FGeotags(FlawFilter): | ||
shortname= 'Geotags' | ||
label= _('Geotags') | ||
description= _('Get pages with Geotags') | ||
|
||
# our action class | ||
class Action(TlgAction): | ||
def execute(self, resultQueue): | ||
cur= getCursors()[self.wiki] | ||
format_strings = ' OR '.join(['page.page_id=%s'] * len(self.pageIDs)) | ||
cur.execute("""SELECT page.page_id, page.page_namespace, page.page_title, page.page_restrictions, page.page_counter, | ||
page.page_is_new, page.page_random, page.page_touched, page.page_latest, page.page_len, | ||
geo_tags.gt_lat, geo_tags.gt_lon | ||
FROM page | ||
JOIN geo_tags ON geo_tags.gt_page_id=page.page_id | ||
WHERE (page.page_namespace=0 OR page.page_namespace=6) AND (%s)""" % format_strings, self.pageIDs) | ||
res= cur.fetchall() | ||
|
||
for row in res: | ||
filtertitle= '<a target="_blank" href="http://www.openstreetmap.org/?mlat=%s&mlon=%s#map=14/%s/%s">%s,%s</a>' % (row['gt_lat'], row['gt_lon'], row['gt_lat'], row['gt_lon'], row['gt_lat'], row['gt_lon']) | ||
#~ filtertitle= '%s,%s' % (row['gt_lat'], row['gt_lon']) | ||
resultQueue.put(TlgResult(self.wiki, row, self.parent, filtertitle)) | ||
|
||
def getPreferredPagesPerAction(self): | ||
return 50 | ||
|
||
def createActions(self, language, pages, actionQueue): | ||
actionQueue.put(self.Action(self, language, pages)) | ||
|
||
FlawFilters.register(FGeotags) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
#!/usr/bin/python | ||
# -*- coding:utf-8 -*- | ||
import time | ||
import datetime | ||
import requests | ||
from tlgflaws import * | ||
from utils import * | ||
|
||
# snippet from http://stackoverflow.com/questions/1265665/python-check-if-a-string-represents-an-int-without-using-try-except ... | ||
def isInt_str(v): | ||
v = str(v).strip() | ||
return v=='0' or (v if v.find('..') > -1 else v.lstrip('-+').rstrip('0').rstrip('.')).isdigit() | ||
|
||
## | ||
class FPagehits(FlawFilter): | ||
shortname= 'Pagehits' | ||
label= _('Page Hits') | ||
description= _('Sort articles by hit count. Uses data from stats.grok.se from previous month.') | ||
|
||
@staticmethod | ||
def makeGrokSession(): | ||
grokSession= requests.Session() | ||
# if they ever need to complain about our requests, they'll know where to look: | ||
grokSession.headers.update({ 'User-Agent': 'Article List Generator (http://tools.wmflabs.org/render/stools/alg'}) | ||
return grokSession | ||
|
||
@cache_region('disklongterm') | ||
def getHitcount(self, year, month, title): | ||
try: | ||
# the requests library is totally thread-safe, except when it isn't. | ||
# so we need to work around by creating a separate session for each worker thread | ||
session= CachedThreadValue("grokSession", self.makeGrokSession) | ||
res= session.get('http://stats.grok.se/json/de/%s%02d/%s' % (year, int(month), title)) | ||
if res.status_code==200: | ||
json= res.json() | ||
total= 0 | ||
for day in json['daily_views']: | ||
total+= int(json['daily_views'][day]) | ||
return total | ||
except Exception as ex: | ||
return str(ex) # .... | ||
return '?' | ||
|
||
# our action class | ||
class Action(TlgAction): | ||
def execute(self, resultQueue): | ||
cur= getCursors()[self.wiki] | ||
format_strings = ' OR '.join(['page_id=%s'] * len(self.pageIDs)) | ||
cur.execute("""SELECT page_id, page_namespace, page_title, page_restrictions, page_counter, page_is_redirect, | ||
page_is_new, page_random, page_touched, page_latest, page_len | ||
FROM page WHERE (page_namespace=0 OR page_namespace=6) AND page_is_redirect=0 AND (%s)""" % format_strings, self.pageIDs) | ||
res= cur.fetchall() | ||
|
||
lastmonth= datetime.datetime.fromtimestamp(time.time()) | ||
statyear= lastmonth.year | ||
statmonth= lastmonth.month | ||
|
||
for row in res: | ||
count= self.parent.getHitcount(statyear, statmonth, row['page_title']) | ||
filtertitle= 'count: %s' % count | ||
sortkey= -int(count) if isInt_str(count) else 1 | ||
resultQueue.put(TlgResult(self.wiki, row, self.parent, filtertitle, sortkey= sortkey)) | ||
|
||
def getPreferredPagesPerAction(self): | ||
return 50 | ||
|
||
def createActions(self, language, pages, actionQueue): | ||
actionQueue.put(self.Action(self, language, pages)) | ||
|
||
FlawFilters.register(FPagehits) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
# code to create a bounding box from a center geocoord and a 'radius' (half-side of square) in km. | ||
# this is copypasta from SO question at http://stackoverflow.com/questions/3182260/python-geocode-filtering-by-distance | ||
# answered by http://stackoverflow.com/users/84270/john-machin | ||
|
||
from math import sin, cos, asin, sqrt, degrees, radians | ||
|
||
Earth_radius_km = 6371.0 | ||
RADIUS = Earth_radius_km | ||
|
||
def haversine(angle_radians): | ||
return sin(angle_radians / 2.0) ** 2 | ||
|
||
def inverse_haversine(h): | ||
return 2 * asin(sqrt(h)) # radians | ||
|
||
def distance_between_points(lat1, lon1, lat2, lon2): | ||
# all args are in degrees | ||
# WARNING: loss of absolute precision when points are near-antipodal | ||
lat1 = radians(lat1) | ||
lat2 = radians(lat2) | ||
dlat = lat2 - lat1 | ||
dlon = radians(lon2 - lon1) | ||
h = haversine(dlat) + cos(lat1) * cos(lat2) * haversine(dlon) | ||
return RADIUS * inverse_haversine(h) | ||
|
||
def bounding_box(lat, lon, distance): | ||
# Input and output lats/longs are in degrees. | ||
# Distance arg must be in same units as RADIUS. | ||
# Returns (dlat, dlon) such that | ||
# no points outside lat +/- dlat or outside lon +/- dlon | ||
# are <= "distance" from the (lat, lon) point. | ||
# Derived from: http://janmatuschek.de/LatitudeLongitudeBoundingCoordinates | ||
# WARNING: problems if North/South Pole is in circle of interest | ||
# WARNING: problems if longitude meridian +/-180 degrees intersects circle of interest | ||
# See quoted article for how to detect and overcome the above problems. | ||
# Note: the result is independent of the longitude of the central point, so the | ||
# "lon" arg is not used. | ||
dlat = distance / RADIUS | ||
dlon = asin(sin(dlat) / cos(radians(lat))) | ||
return degrees(dlat), degrees(dlon) | ||
|
||
if __name__ == "__main__": | ||
|
||
# Examples from Jan Matuschek's article | ||
|
||
def test(lat, lon, dist): | ||
print "test bounding box", lat, lon, dist | ||
dlat, dlon = bounding_box(lat, lon, dist) | ||
print "dlat, dlon degrees", dlat, dlon | ||
print "lat min/max rads", map(radians, (lat - dlat, lat + dlat)) | ||
print "lon min/max rads", map(radians, (lon - dlon, lon + dlon)) | ||
|
||
print "liberty to eiffel" | ||
print distance_between_points(40.6892, -74.0444, 48.8583, 2.2945) # about 5837 km | ||
print "calc min/max lat/lon" | ||
degs = map(degrees, (1.3963, -0.6981)) | ||
test(*degs, dist=1000) | ||
degs = map(degrees, (1.3963, -0.6981, 1.4618, -1.6021)) | ||
print degs, "distance", distance_between_points(*degs) # 872 km |