-
Notifications
You must be signed in to change notification settings - Fork 233
/
datasets.py
130 lines (110 loc) · 4.96 KB
/
datasets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import bz2
import gzip
import os
import platform
try:
from urllib import urlretrieve # py2
except ImportError:
from urllib.request import urlretrieve # py3
import numpy as np
osname = platform.system().lower()
osname = dict(darwin="osx", linux="linux", windows="windows").get(osname, osname)
data_dir = os.path.expanduser("~/.ipyvolume/datasets")
if not os.path.exists(data_dir):
try:
os.makedirs(data_dir, exist_ok=True)
except Exception:
pass
class UrlCached(object):
def __init__(self, url):
self.url = url
self.path = os.path.join(data_dir, os.path.split(url)[1])
def download(self, force=False):
if not os.path.exists(self.path) or force:
print("Downloading %s to %s" % (self.url, self.path))
code = os.system(self.download_command_wget())
if not os.path.exists(self.path):
print("Download failed, exit code was: " + str(code) + " will try with curl")
code = os.system(self.download_command_curl())
if not os.path.exists(self.path):
print("Download failed again, exit code was: " + str(code) + " using urlretrieve")
self.download_urlretrieve()
def fetch(self):
self.download()
if os.path.exists(self.path):
return self.path
else:
raise Exception("file not found and/or download failed")
def download_command_wget(self):
return "wget --progress=bar:force -c -P %s %s" % (data_dir, self.url)
def download_command_curl(self):
return "cd %s; curl -O -L %s" % (data_dir, self.url)
def download_urlretrieve(self):
urlretrieve(self.url, self.path)
class Dataset(object):
def __init__(self, name, density=True, numpy=False, raw=False, shape=None, dtype=None):
self.name = name
self.density = density
self.numpy = numpy
self.raw = raw
self.shape = shape
self.dtype = dtype
if density:
if raw:
self.url = "https://github.com/maartenbreddels/ipyvolume/raw/master/datasets/%s.raw.bz2" % name
self.path = os.path.join(data_dir, name + ".raw.bz2")
else:
self.url = "https://github.com/maartenbreddels/ipyvolume/raw/master/datasets/%s.npy.bz2" % name
self.path = os.path.join(data_dir, name + ".npy.bz2")
else:
if numpy:
self.url = "https://github.com/maartenbreddels/ipyvolume/raw/master/datasets/%s.npy.bz2" % name
self.path = os.path.join(data_dir, name + ".npy.bz2")
else:
self.url = "https://github.com/maartenbreddels/ipyvolume/raw/master/datasets/%s.csv.gz" % name
self.path = os.path.join(data_dir, name + ".csv.gz")
def download(self, force=False):
if not os.path.exists(self.path) or force:
print("Downloading %s to %s" % (self.url, self.path))
code = os.system(self.download_command_wget())
if not os.path.exists(self.path):
print("Download failed, exit code was: " + str(code) + " will try with curl")
code = os.system(self.download_command_curl())
if not os.path.exists(self.path):
print("Download failed again, exit code was: " + str(code) + " using urlretrieve")
self.download_urlretrieve()
def fetch(self):
self.download()
if os.path.exists(self.path):
if self.density:
with bz2.BZ2File(self.path) as f:
if self.raw:
self.data = np.frombuffer(f.read(), self.dtype).reshape(self.shape)
else:
self.data = np.load(f)
else:
if self.numpy:
with bz2.BZ2File(self.path) as f:
self.data = np.load(f)
else:
f = gzip.GzipFile(self.path)
header = f.readline().decode("utf-8")[1:].strip()
data = np.loadtxt(f, delimiter=",", unpack=False)
for i, name in enumerate(header.split(",")):
setattr(self, name, data[i])
else:
raise Exception("file not found and/or download failed")
return self
def download_command_wget(self):
return "wget --progress=bar:force -c -P %s %s" % (data_dir, self.url)
def download_command_curl(self):
return "cd %s; curl -O -L %s" % (data_dir, self.url)
def download_urlretrieve(self):
urlretrieve(self.url, self.path)
hdz2000 = Dataset("hdz2000")
aquariusA2 = Dataset("aquarius-A2")
egpbosLCDM = Dataset("egpbos-LCDM")
zeldovich = Dataset("zeldovich", density=False)
animated_stream = Dataset("stream-animation", density=False, numpy=True)
head = Dataset('male', raw=True, shape=(256, 256, 128), dtype=np.uint8)
# low poly cat from: https://sketchfab.com/models/1e7143dfafd04ff4891efcb06949a0b4#