forked from EmilAvalon/gdgwarsaw2019
-
Notifications
You must be signed in to change notification settings - Fork 0
/
manual.py
80 lines (68 loc) · 2.1 KB
/
manual.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
from google.cloud import storage
from tornado.escape import url_unescape
import random
import argparse
import sys
def addtype(data, type):
for i in range(int(len(data))):
data[i] = [type, data[i][0], data[i][1]]
print(data[i])
return data
def createData(projectId):
tag1 = "cat"
tag2 = "dog"
storage_client = storage.Client()
inputbucket = projectId+"-vcm"
random.seed(273)
data = []
bucket = storage_client.get_bucket(inputbucket)
blobs = bucket.list_blobs()
for file in blobs:
name = url_unescape(file.path).rsplit("/", 1)[-1]
if name.find("jpg") > -1:
if name.find(tag1) > -1:
type = tag1
elif name.find(tag2) > -1:
type = tag2
bucket_name = "gs://"+inputbucket+"/"+str(name)
data.append([bucket_name, type])
random.shuffle(data)
data_len = int(len(data))
train = int(data_len*.8)
valid = int(data_len*.9)
train_data = addtype(data[:train], 'TRAIN')
valid_data = addtype(data[train:valid], 'VALIDATION')
test_data = addtype(data[valid:], 'TEST')
full_data = train_data + valid_data + test_data
str_data = ''
for i in full_data:
str_data = str_data+str(i[0])+","+str(i[1])+","+str(i[2])+'\n'
return str_data
def upload_data_to_gcs(projectId, data):
csvdestination = "cats-dogs.csv"
bucket_name = projectId+"-vcm"
try:
client = storage.Client()
bucket = client.bucket(bucket_name)
bucket.blob(csvdestination).upload_from_string(data)
except Exception as e:
print(e)
def getValues():
try:
parser = argparse.ArgumentParser()
parser.add_argument("projectID",
help="add project ID",
type=str)
# args = parser.parse_args()
# print sys.argv
return sys.argv[1]
# print args
except:
e = sys.exc_info()[0]
print e
def startCreate():
projectId = getValues()
# print projectID
data = createData(projectId)
upload_data_to_gcs(projectId, data)
startCreate()