-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathutils.py
154 lines (114 loc) · 4.32 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import csv
import xml.sax
import pytz
import iso8601
from collections import Counter
from tqdm import tqdm
from dateutil import parser, tz
from django.conf import settings
from django.contrib.gis.gdal import DataSource
from django.contrib.gis.geos import Point, GEOSGeometry
from django.utils import timezone
from .models import Ingest, Tree, PropertySet
def parse_point(point_string):
if not point_string:
return None
try:
(x, y) = (float(n) for n in point_string.split(','))
except ValueError:
raise ParseError('Invalid geometry string supplied for parameter {0}'.format(self.point_param))
p = Point(x, y, srid=4326)
p.transform(25833)
return p
def get_timestamp(filename):
class GMLHandler(xml.sax.ContentHandler):
timestamp = None
def startElement(self, name, attrs):
if name == "wfs:FeatureCollection":
self.timestamp = attrs['timeStamp']
handler = GMLHandler()
parser = xml.sax.make_parser()
parser.setContentHandler(handler)
parser.parse(filename)
timestamp = iso8601.parse_date(handler.timestamp, default_timezone=None)
return pytz.timezone(settings.TIME_ZONE).localize(timestamp)
def ingest_trees_from_file(dataset, filename):
try:
column_names = _parse_column_names_csv()
except AttributeError:
column_names = {}
# parse the file using a sax parser to get the timestamp
downloaded_at = get_timestamp(filename)
# parse the file (probably gml) with the gdal DataSource class
data_source = DataSource(filename)
# create an object in the ingest table
ingest = Ingest.objects.create(
dataset=dataset,
filename=filename,
downloaded_at=downloaded_at,
ingested_at=timezone.now()
)
# prepare counter
counter = Counter()
# loop over features in the data source (i.e. the trees)
for feature in tqdm(data_source[0]):
# parse the point from the point in the feature
point = GEOSGeometry(str(feature.geom), srid=25833)
# try to get the tree with the same location or create a new one
try:
tree = Tree.objects.get(location=point)
except Tree.DoesNotExist:
tree = Tree(location=point)
# create attributes dict for this tree
ingest_properties = {}
for key in feature.fields:
if key in column_names:
column_name = column_names[key]
else:
column_name = key
ingest_properties[column_name] = feature[key].value
if tree.properties:
update = True
for propertyset in tree.propertysets.all():
if ingest_properties == propertyset.properties:
update = False
break
if update:
# the properties have changed, we will add the new properties to the history
propertyset = PropertySet.objects.create(
tree=tree,
ingest=ingest,
properties=ingest_properties
)
# now we need to update the tree for the current_propertyset
tree.current_propertyset = propertyset
tree.save()
counter['updated'] += 1
else:
# nothing has changed, we will skip this tree
counter['skipped'] += 1
else:
# this tree has no properties, it must be a new tree
# first we need to save the tree
tree.save()
# then we store the properties
propertyset = PropertySet.objects.create(
tree=tree,
ingest=ingest,
properties=ingest_properties
)
# now we need to update the tree for the current_propertyset
tree.current_propertyset = propertyset
tree.save()
counter['new'] += 1
return counter
def _parse_column_names_csv():
column_names = {}
with open(settings.COLUMN_NAMES_CSV, 'rb') as csvfile:
reader = csv.reader(csvfile, delimiter=',')
for row in reader:
if row:
for column_name in row[1].split(';'):
if column_name:
column_names[column_name] = row[0]
return column_names