-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathspark-command-line-wrapper.py
111 lines (80 loc) · 3.02 KB
/
spark-command-line-wrapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# execute spark-shell with file,
# file contains variables
# variable will be readed from command line during execution
import sys
import subprocess
from string import Template
import tempfile
def __clear_binary_line__(b_line):
next_line = b_line.decode("utf-8")
if next_line[len(next_line)-1] == "\n":
next_line = next_line[:-1]
return next_line.strip()
def replace_variables(file_name, variables):
with open(file_name, "r") as input:
file_data = input.read()
temp_file = tempfile.NamedTemporaryFile(delete=False)
temp_file.write(bytes(Template(file_data).substitute(variables), 'utf-8'))
temp_file.flush()
temp_file.close()
return temp_file.name
def execute_spark_script(argument_holder):
file_name = replace_variables(argument_holder[ArgumentHolder.__SCRIPT_FILE_NAME__], argument_holder)
process = subprocess.Popen(["spark2-shell","--deploy-mode","client","-i",file_name], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = process.communicate()
# process.stdin.write(":quit".encode("utf-8"))
# process.stdin.close()
return __clear_binary_line__(stdout)
class ArgumentHolder(dict):
__SCRIPT_FILE_NAME__="__script_file_name__"
def __setitem__(self, key, item):
self.__dict__[key] = item
def __getitem__(self, key):
return self.__dict__[key]
def __repr__(self):
return repr(self.__dict__)
def __len__(self):
return len(self.__dict__)
def __delitem__(self, key):
del self.__dict__[key]
def clear(self):
return self.__dict__.clear()
def copy(self):
return self.__dict__.copy()
def has_key(self, k):
return k in self.__dict__
def update(self, *args, **kwargs):
return self.__dict__.update(*args, **kwargs)
def keys(self):
return self.__dict__.keys()
def values(self):
return self.__dict__.values()
def items(self):
return self.__dict__.items()
def pop(self, *args):
return self.__dict__.pop(*args)
def __cmp__(self, dict_):
return self.__cmp__(self.__dict__, dict_)
def __contains__(self, item):
return item in self.__dict__
def __iter__(self):
return iter(self.__dict__)
def __unicode__(self):
return unicode(repr(self.__dict__))
@staticmethod
def parse_arguments(arguments):
instance = ArgumentHolder()
if len(sys.argv)<2:
print("at least filename should be specified")
sys.exit(1)
instance[ArgumentHolder.__SCRIPT_FILE_NAME__]=sys.argv[1]
for index in range(0, int((len(sys.argv)-1)/2) ):
instance[sys.argv[2+index*2].strip("--")]=sys.argv[2+index*2+1]
return instance
def get_first_prefix(data, prefix):
for each_line in data.split("\n"):
if each_line.startswith(prefix):
return each_line[len(prefix):]
if __name__=='__main__':
output = execute_spark_script(ArgumentHolder.parse_arguments(sys.argv))
print(get_first_prefix(output, "result: "))