In [1]:
import json
import optuna
import subprocess
import time
from deploy_target import TargetDeployer
import requests
PROMETHEUS='http://prometheus.local'

In [2]:
target = TargetDeployer()
target.update_deployment_limits(120, 120)


In [3]:
def wait_for_experiment_state_change():
    while True:
        response=requests.get(PROMETHEUS + '/api/v1/query', params={'query': 'max_over_time(k6_checks[5s])'}) 
        k = response.json()
        if len(k.get('data').get('result')) == 0:
            print('experiment ended')
            break;

In [30]:
def wait_for_failure():
    while True:
        response=requests.get(PROMETHEUS + '/api/v1/query', params={'query': 'max_over_time(k6_http_req_failed[5s])'}) 
        k = response.json()
        types = list(map(lambda x: x.get('metric').get('status'), k.get('data').get('result')))
        non200s = list(filter(lambda x: x != '200', types))
        # print("5s:", types, non200s)
        if len(non200s) > 0 or len(types) != 1:
            response=requests.get(PROMETHEUS + '/api/v1/query', params={'query': 'sum_over_time(k6_http_req_failed[30s])'})
            k = response.json()
            types = list(map(lambda x: x.get('metric').get('status'), k.get('data').get('result')))
            non200s = list(filter(lambda x: x != '200', types))
            # print("1m:", types, non200s)
            if len(non200s) > 0:
                print('run failed')
                return True
            else :
                print('run success')
                return False

In [31]:
def reset_experiment():
    r = requests.get('http://tester.local/reset')
    print(r.text)

In [32]:
def start_experiment():
    r = requests.get('http://tester.local/start')
    print(r.text)

In [33]:
def get_avg_latency():
    response=requests.get(PROMETHEUS + '/api/v1/query', params={'query': 'avg_over_time(k6_custom_target_highmem_duration_avg[3m])'}) 
    print(response.json())
    k = response.json().get('data').get('result')
    print(k)
    if len(k) == 0:
        return None
    else:
        return k[0].get('value')[1]

In [34]:
def objective(trial):
    config = {
        "cpus" : trial.suggest_int("cpus", 150, 300, step=1),
        "memory" : trial.suggest_int("memory", 150, 300, step=1),
    }

    cpus = config.get('cpus')
    memory = config.get('memory')
    print("Running study with cpu: {} and memory: {}".format(cpus, memory))
    target.update_deployment_limits(cpus, memory)
    time.sleep(2)

    reset_experiment();
    wait_for_experiment_state_change()
    print("no experiment running")

    start_experiment()
    run_failed = wait_for_failure()
    if run_failed:
        print("experiment failed")
        raise optuna.TrialPruned()
    else: 
        latency = get_avg_latency();
        print("latency: {}".format(latency))
    
    reset_experiment();
    wait_for_experiment_state_change()
    print("experiment stopped")

#     if trial.should_prune():
#         raise optuna.TrialPruned()

    return float(latency)

In [20]:
study = optuna.create_study(sampler=optuna.samplers.CmaEsSampler(), directions=["minimize"])
study.optimize(objective, n_trials=1)

[32m[I 2022-07-21 23:24:00,985][0m A new study created in memory with name: no-name-3f8e2fa5-800a-4b3a-ac07-4b44caf11500[0m


Running study with cpu: 270 and memory: 300
Reset done
experiment ended
no experiment running
Started
run success
{'status': 'success', 'data': {'resultType': 'vector', 'result': [{'metric': {'run': '2022-07-21_17-54-18', 'scenario': 'target_highmem', 'tag': 'highmem_duration'}, 'value': [1658426073.249, '475.9873601903472']}]}}
[{'metric': {'run': '2022-07-21_17-54-18', 'scenario': 'target_highmem', 'tag': 'highmem_duration'}, 'value': [1658426073.249, '475.9873601903472']}]
latency: 475.9873601903472
Reset done


[32m[I 2022-07-21 23:24:43,296][0m Trial 0 finished with value: 475.9873601903472 and parameters: {'cpus': 270, 'memory': 300}. Best is trial 0 with value: 475.9873601903472.[0m


experiment ended
experiment stopped


In [21]:
best_params = study.best_params
print(best_params)

{'cpus': 270, 'memory': 300}


In [22]:
def get_latency(trial):
    return trial.values[0]
fig = optuna.visualization.plot_contour(study, params=["cpus", "memory"], target=get_latency, target_name="Latency")
fig.show()
# def get_throughput(trial):
#     return trial.values[1]
# fig1 = optuna.visualization.plot_contour(study, params=["cpus", "memory"], target=get_throughput, target_name="Throughput")
# fig1.show()

[33m[W 2022-07-21 23:24:43,322][0m Param cpus unique value length is less than 2.[0m


ValueError: Cannot evaluate parameter importances with only a single trial.

In [None]:
from IPython.display import clear_output
for i in range(50):
    print(i)
    study.optimize(objective, n_trials=1)
#     best_params = study.best_trials
#     print(best_params)
    clear_output(wait=True)
    def get_latency(trial):
        return trial.values[0]
    fig = optuna.visualization.plot_contour(study, params=["cpus", "memory"], target=get_latency, target_name="Latency")
    fig.show()
#     fig = optuna.visualization.plot_param_importances(study, target=get_latency, target_name="Latency")
#     fig.show()

0
Running study with cpu: 200 and memory: 180


[33m[W 2022-07-22 07:35:33,433][0m Trial 71 failed because of the following error: MaxRetryError("HTTPSConnectionPool(host='127.0.0.1', port=53171): Max retries exceeded with url: /apis/apps/v1/namespaces/target/deployments/target-deployment (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:1129)')))")[0m
Traceback (most recent call last):
  File "/usr/local/lib/python3.9/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/usr/local/lib/python3.9/site-packages/urllib3/connectionpool.py", line 386, in _make_request
    self._validate_conn(conn)
  File "/usr/local/lib/python3.9/site-packages/urllib3/connectionpool.py", line 1040, in _validate_conn
    conn.connect()
  File "/usr/local/lib/python3.9/site-packages/urllib3/connection.py", line 414, in connect
    self.sock = ssl_wrap_socket(
  File "/usr/local/lib/python3.9/site-packages/urllib3/util/ssl_.py", line 453, in ssl_wrap_socket
    s

In [28]:
import joblib
joblib.dump(study, 'CmaEsSampler_study.pkl')

['CmaEsSampler_study.pkl']

In [None]:
import joblib
study = joblib.load('CmaEsSampler_study.pkl')