In [91]:
import os
import sys
import json
import time
import re
from shutil import copy
from subprocess import check_output
import pandas
from pandas import DataFrame


from execo import Process, SshProcess, Put, Remote
from execo_engine import Engine, logger, ParamSweeper, sweep
from execo_g5k import oarsub, oardel, OarSubmission, get_current_oar_jobs, get_oar_job_subnets, get_oar_job_nodes, get_oar_job_info, \
Deployment, deploy

In [92]:
jobname="fl-measure"
nodecount=7
walltime="02:00:00"
resources_selection="-t exotic -p estats"
site="toulouse"
force_redeploy=False
environment_dsc_file='./fl_jetson_image.yaml'

In [93]:
jobs = get_current_oar_jobs()
jobid = None
waiting_jobs = []
while jobs:
    j, site = jobs.pop()
    info = get_oar_job_info(j, site)
    if info['name'] == jobname:
        if info['state'] == 'Running':
            jobid = j
            print("A {} job is already running, using it. jobid is {}".format(jobname, jobid))
            break
        else:
            waiting_jobs.append(j)
if not jobid and not waiting_jobs:
    jobspec = OarSubmission(resources="/cluster=1/nodes={}".format(nodecount), walltime=walltime,
                            additional_options=resources_selection, job_type="deploy", name=jobname,
                            queue='testing')
    jobid, _ = oarsub([(jobspec, site)]).pop()
    print("New job submitted, jobid is {}".format(jobid))
elif not jobid:
    print("One or more {} jobs exist ({}) but are not running.\n"
          " Connect to the frontend to see what is happening, and/or run the cell again.".format(
          jobname, ", ".join([str(j) for j in waiting_jobs])))

New job submitted, jobid is 448296


In [96]:
nodes = get_oar_job_nodes(jobid, site)
nodes.sort(key=lambda n: n.address)
nodes

[Host('estats-1.toulouse.grid5000.fr'),
 Host('estats-10.toulouse.grid5000.fr'),
 Host('estats-3.toulouse.grid5000.fr'),
 Host('estats-5.toulouse.grid5000.fr'),
 Host('estats-6.toulouse.grid5000.fr'),
 Host('estats-7.toulouse.grid5000.fr'),
 Host('estats-9.toulouse.grid5000.fr')]

In [97]:
server=nodes[0]
clients=nodes[1:]
print("Server:{} \n Clients: {}".format(server,clients))

Server:Host('estats-1.toulouse.grid5000.fr') 
 Clients: [Host('estats-10.toulouse.grid5000.fr'), Host('estats-3.toulouse.grid5000.fr'), Host('estats-5.toulouse.grid5000.fr'), Host('estats-6.toulouse.grid5000.fr'), Host('estats-7.toulouse.grid5000.fr'), Host('estats-9.toulouse.grid5000.fr')]


In [98]:
deployment = Deployment(hosts=nodes, env_file=os.path.abspath(environment_dsc_file))
deploy_ok, deploy_failed = deploy(deployment, check_deployed_command=not force_redeploy,
                              stdout_handlers=[sys.stdout],
                              stderr_handlers=[sys.stderr])
print("Deployement status:\n* ok: {}\n* failed: {}".format(deploy_ok, deploy_failed))

Deployement status:
* ok: {'estats-1.toulouse.grid5000.fr', 'estats-7.toulouse.grid5000.fr', 'estats-9.toulouse.grid5000.fr', 'estats-6.toulouse.grid5000.fr', 'estats-10.toulouse.grid5000.fr', 'estats-3.toulouse.grid5000.fr', 'estats-5.toulouse.grid5000.fr'}
* failed: set()


# RUN EXPERIMENT

In [161]:
def get_host_ip(hostname):
    command = f"hostname -I"
    process = SshProcess(command, host=hostname)
    process.run()
    if process.ok:
        ip_address = process.stdout.strip()
        process.kill()
        return ip_address
    else:
        process.kill()
        return f"Failed to get IP for {hostname}"

# Example usage
hostname = "your_host"
ip_address = get_host_ip(server)
print(f"IP address for {hostname} is {ip_address}")

IP address for your_host is 172.16.121.1 172.17.0.1 2001:660:4406:900:a::1


In [162]:
run_server=SshProcess("cd /home/tunguyen/jetson-test/; bash run_server.sh",server, connection_params={'user':'root'})
clients_procs = []
for (host,cid) in zip(clients,range(len(clients))):
    run_client=SshProcess("cd /home/tunguyen/jetson-test/; bash run_client.sh {}".format(cid),host, connection_params={'user':'root'})
    clients_procs.append(run_client)

In [163]:
run_server.start()
#print(f"Output from {server}:\n{run_server.stdout}")

SshProcess('cd /home/tunguyen/jetson-test/; bash run_server.sh', Host('estats-1.toulouse.grid5000.fr'), connection_params={'user': 'root'})

In [164]:
for run_client in clients_procs:
    run_client.start()
    #print(f"Output from {run_client.host}:\n{run_client.stdout}")

In [None]:
print(f"Output from server {server}:\n{run_server.stdout}")
print(f"Output from client {clients_procs[0]}:\n{clients_procs[0].stdout}")

In [160]:
run_server.kill()
[run_client.kill() for run_client in clients_procs]

[SshProcess('cd /home/tunguyen/jetson-test/; bash run_client.sh 0', Host('estats-10.toulouse.grid5000.fr'), connection_params={'user': 'root'}),
 SshProcess('cd /home/tunguyen/jetson-test/; bash run_client.sh 1', Host('estats-3.toulouse.grid5000.fr'), connection_params={'user': 'root'}),
 SshProcess('cd /home/tunguyen/jetson-test/; bash run_client.sh 2', Host('estats-5.toulouse.grid5000.fr'), connection_params={'user': 'root'}),
 SshProcess('cd /home/tunguyen/jetson-test/; bash run_client.sh 3', Host('estats-6.toulouse.grid5000.fr'), connection_params={'user': 'root'}),
 SshProcess('cd /home/tunguyen/jetson-test/; bash run_client.sh 4', Host('estats-7.toulouse.grid5000.fr'), connection_params={'user': 'root'}),
 SshProcess('cd /home/tunguyen/jetson-test/; bash run_client.sh 5', Host('estats-9.toulouse.grid5000.fr'), connection_params={'user': 'root'})]

In [86]:
oardel([(jobid,site)])

NameError: name 'jobid' is not defined