-
Notifications
You must be signed in to change notification settings - Fork 73
/
trackers.py
110 lines (83 loc) · 3.65 KB
/
trackers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
from collections import defaultdict
from jinja2 import Markup
from whotracksme.website.utils import print_progress
from whotracksme.website.templates import (
get_template,
render_template,
)
from whotracksme.website.plotting.colors import site_category_colors
from whotracksme.website.plotting.trackers import ts_trend
def recent_tracker_reach(reach):
def format_reach(r):
if r >= 0.1:
return round(r, 1)
elif 0.01 <= r <= 0.1:
return round(r, 2)
return "<0.01"
return {
"pages": format_reach(reach['page'][-1] * 100),
"sites": reach['site'][-1]
}
def tag_cloud_data(tracker_id, data):
all_sites = [{
'site': s.site,
'frequency': s.tracker_proportion,
'url': data.url_for('site', s.site, path_to_root='..')
if s.site in data.sites.site_category else None,
'site_freq': s.site_proportion,
'site_cat': site_category_colors.get(
data.sites.site_category.get(s.site, None), '#000'
),
'category': data.sites.site_category.get(s.site, '')
} for s in data.trackers.iter_sites(tracker_id)]
n_unlinked = len(list(filter(lambda s: s['url'] is None, all_sites)))
# decide whether non-topsite sites should be included
if len(all_sites) - n_unlinked > 30:
all_sites = list(filter(lambda s: s['url'] is not None, all_sites))
sites_by_category = defaultdict(list)
for s in all_sites:
sites_by_category[s['category']].append(s)
return all_sites, sites_by_category
def build_trackers_list(data):
with open('_site/trackers.html', 'w') as output:
output.write(render_template(
template=get_template(data, name="trackers.html"),
tracker_list=data.trackers.sort_by(metric="reach"),
trackers_list_company=data.trackers.sort_by(
metric="company_id",
descending=False
),
header_stats=data.trackers.summary_stats()
))
print_progress(text="Generate tracker list")
def tracker_page(template, tracker_id, tracker, data):
# Tracker Reach ts
reach = data.trackers.get_reach(tracker_id)
# page_reach trend line
page_trend = Markup(ts_trend(ts=reach.get('page'), t=reach.get('ts')))
# domain_reach trend line - may not reach all the way back in time
site_trend = Markup(ts_trend(ts=reach.get('site'), t=reach.get('ts')[-len(reach.get('site')):], percent=False))
# tag cloud data
all_sites, sites_by_cat = tag_cloud_data(tracker_id, data)
# for horizontal bar chart in profile
website_types = data.trackers.get_presence_by_site_category(tracker_id, data.sites)
with open(f'_site/{data.url_for("tracker", tracker_id)}', 'w') as output:
output.write(render_template(
path_to_root='..',
template=template,
app=tracker,
profile=data.trackers.get_tracker(tracker_id), # profile-card hack
reach=recent_tracker_reach(reach),
tracking_methods=data.trackers.get_tracking_methods(tracker_id),
website_list=all_sites,
sites_by_cat=sites_by_cat,
website_types=website_types[:5], # top 5
similar_trackers=data.trackers.similar_trackers(tracker_id),
trends={'page': page_trend, 'site': site_trend},
trackers=data.trackers.summary_stats()['count']
))
def build_tracker_pages(data):
template = get_template(data, name='tracker-page.html', path_to_root='..')
for (tracker_id, tracker) in data.trackers.iter():
tracker_page(template, tracker_id, tracker, data)
print_progress(text="Generate tracker pages")