In [105]:
import librosa
# import numpy as np
import requests
import urllib3
import jsonpickle
import time
from typing import List, Dict, Any, Tuple
import io
import soundfile
import numpy as np

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)


save_path = "data"
sample_rate = 16000
debug = True
session_id = "test_session"
timestamp = 0
source_language = "cs"
transcript_language = "en"

In [128]:
# load audio file
audio_en, sr = librosa.load(f"{save_path}/test_en.wav", sr=sample_rate)
assert sr == sample_rate
audio_cs, sr = librosa.load(f"{save_path}/test_cs.wav", sr=sample_rate)
assert sr == sample_rate

if debug:
    print(type(audio_en), audio_en.shape, audio_en.dtype)
    print(type(audio_cs), audio_cs.shape, audio_cs.dtype)

<class 'numpy.ndarray'> (3270529,) float32
<class 'numpy.ndarray'> (3956943,) float32


In [92]:
def crate_session():
    try:
        response = requests.get(
            f"https://slt.ufal.mff.cuni.cz:5003/create_session?session_id={session_id}",
            verify=False,
        )
        print(response.text)
    except Exception as e:
        print(e)


def end_session():
    try:
        response = requests.get(
            f"https://slt.ufal.mff.cuni.cz:5003/end_session?session_id={session_id}", verify=False
        )
        print(response.text)
    except Exception as e:
        print(e)


def get_active_sessions():
    try:
        response = requests.get(
            "https://slt.ufal.mff.cuni.cz:5003/get_active_sessions", verify=False
        )
        print(response.text)
    except Exception as e:
        print(e)


def audio_to_dict(audio):
    return {str(index): value for index, value in enumerate(audio.tolist())}


def submit_audio():
    try:
        response = requests.post(
            f"https://slt.ufal.mff.cuni.cz:5003/submit_audio_chunk?session_id={session_id}",
            verify=False,
            json={
                "timestamp": timestamp,
                "chunk": audio_to_dict(audio=audio_en[:10]),
            },  # 10 seconds
        )
        print(response.text)
    except Exception as e:
        print(e)


def offload_computation_get():
    try:
        response = requests.get(
            "https://slt.ufal.mff.cuni.cz:5003/offload_ASR",
            verify=False,
        )
        print(response.text)
    except Exception as e:
        print(e)


def offload_computation_post():
    try:
        response = requests.post(
            "https://slt.ufal.mff.cuni.cz:5003/offload_ASR",
            verify=False,
            json={
                "session_id": session_id,
                "timestamp": timestamp,
                "tsw": [
                    (29.0, 29.16, " That's"),
                    (29.16, 29.28, " about"),
                    (29.28, 29.5, " four"),
                    (29.5, 29.9, " seconds."),
                ],
                "ends": [5.44, 9.16, 11.78, 17.26, 23.9, 26.2, 29.9],
                "language": "en",
            },
        )
        print(response.text)
    except Exception as e:
        print(e)


def get_latest_text_chunk_versions(language="en"):
    try:
        response = requests.get(
            f"https://slt.ufal.mff.cuni.cz:5003/get_latest_text_chunk_versions?session_id={session_id}&language={language}",
            verify=False,
        )
        print(response.text)
    except Exception as e:
        print(e)


def break_str_into_lines(text: str, max_line_length: int = 50):
    words = text.split(" ")
    lines = []
    line = ""
    for word in words:
        if len(line) + len(word) + 1 <= max_line_length:
            line += f" {word}"
        else:
            lines.append(line)
            line = word
    lines.append(line)
    return lines


def format_text_chunk_texts(text_chunks_texts: List[str]):
    print("Previous text:")
    for line in break_str_into_lines("\n".join(text_chunks_texts[:-1])):
        print(line)
    print("Current text:")
    print(text_chunks_texts[-1])
    print()


def post_latest_text_chunks(session_id="", versions={}, language="en"):
    try:
        response = requests.post(
            f"https://slt.ufal.mff.cuni.cz:5003/get_latest_text_chunks?session_id={session_id}&language={language}",
            verify=False,
            json={"versions": versions},
        )
        # print(response.text)
        # response text is a json string
        json_response = response.json()
        text_chunks_texts = [text_chunk["text"] for text_chunk in json_response["text_chunks"]]
        format_text_chunk_texts(text_chunks_texts)
        
    except Exception as e:
        print(e)


def edit_asr_chunk(timestamp=0, version=0, new_text: str = ""):
    # request_data = request.get_json()
    # session_id = request.args.get("session_id", default=None, type=str)
    # language = request.args.get("language", default=None, type=str)
    # timestamp = request_data["timestamp"]
    # version = request_data["version"]
    # text = request_data["text"]

    try:
        response = requests.post(
            f"https://slt.ufal.mff.cuni.cz:5003/edit_asr_chunk?session_id={session_id}&language=en",
            verify=False,
            json={"timestamp": timestamp, "version": version, "text": new_text},
        )
        print(response.text)
    except Exception as e:
        print(e)


def rate_text_chunk(timestamp=0, version=0, rating_update=1):
    # session_id = request.args.get("session_id", default=None, type=str)
    # language = request.args.get("language", default=None, type=str)

    # timestamp = request_data["timestamp"]
    # version = request_data["version"]
    # rating_update = request_data["rating_update"]
    try:
        response = requests.post(
            f"https://slt.ufal.mff.cuni.cz:5003/rate_text_chunk?session_id={session_id}&language=en",
            verify=False,
            json={"timestamp": timestamp, "version": version, "rating_update": rating_update},
        )
        print(response.text)
    except Exception as e:
        print(e)


def get_correction_rules():
    try:
        response = requests.get(
            f"https://slt.ufal.mff.cuni.cz:5003/get_correction_rules?session_id={session_id}&language=en",
            verify=False,
        )
        print(response.text)
    except Exception as e:
        print(e)


def submit_correction_rules():
    # rules have the following general structure:
    # [
    #     {
    #         "source_strings": [
    #             {
    #                 "string": "str",
    #                 "active": "bool"
    #             },
    #         ],
    #         "to": "str",
    #         "version": "int",
    #     },
    # ]
    rules = [
        {
            "source_strings": [
                {
                    "string": "four",
                    "active": True,
                },
            ],
            "to": "five",
            "version": 0,
        },
        {
            "source_strings": [
                {
                    "string": "five",
                    "active": True,
                },
            ],
            "to": "six",
            "version": 0,
        },
    ]
    try:
        response = requests.post(
            f"https://slt.ufal.mff.cuni.cz:5003/submit_correction_rules?session_id={session_id}&language=en",
            verify=False,
            json=rules,
        )
        print(response.text)
    except Exception as e:
        print(e)


def switch_source_language(language: str = "en"):
    # request_data = request.get_json()
    # session_id = request.args.get("session_id", default=None, type=str)
    # language = request_data["language"]
    try:
        response = requests.post(
            f"https://slt.ufal.mff.cuni.cz:5003/switch_source_language?session_id={session_id}",
            verify=False,
            json={"language": language},
        )
        print(response.text)
    except Exception as e:
        print(e)


def switch_transcript_language(language: str = "en"):
    # request_data = request.get_json()
    # session_id = request.args.get("session_id", default=None, type=str)
    # language = request_data["language"]
    try:
        response = requests.post(
            f"https://slt.ufal.mff.cuni.cz:5003/switch_transcript_language?session_id={session_id}",
            verify=False,
            json={"language": language},
        )
        print(response.text)
    except Exception as e:
        print(e)

In [43]:
#  TEST session creation and deletion
end_session()


{"success": true, "message": "Successfully ended session test_session"}


In [62]:
crate_session()
get_active_sessions()
timestamp = 0

{"success": true, "message": "Successfully created session test_session"}
{"active_sessions": ["test_session"]}


{"success": true, "session_id": "test_session"}


In [64]:
submit_audio()
offload_computation_get()
offload_computation_post()
timestamp += 1

{"success": true, "session_id": "test_session"}
{
    "session_id": "test_session",
    "timestamp": 1,
    "source_language": "cs",
    "transcript_language": "cs",
    "prompt": "",
    "audio": [
        8.939230156101985e-07,
        1.730002509248152e-06,
        -3.0721382699994138e-06,
        -1.2614202660188312e-07,
        -1.0721962553361664e-06,
        -2.666301725184894e-06,
        3.994047801825218e-06,
        -6.392704108293401e-07,
        9.277324579670676e-07,
        5.490992407430895e-06,
        8.939230156101985e-07,
        1.730002509248152e-06,
        -3.0721382699994138e-06,
        -1.2614202660188312e-07,
        -1.0721962553361664e-06,
        -2.666301725184894e-06,
        3.994047801825218e-06,
        -6.392704108293401e-07,
        9.277324579670676e-07,
        5.490992407430895e-06
    ]
}
{
    "success": true
}


In [41]:
get_latest_text_chunk_versions()
post_latest_text_chunks(session_id=session_id, versions={})

{"success": true, "session_id": "test_session", "versions": {"0": 0}}
[{'timestamp': 0, 'version': 0, 'text': " That's about four seconds."}]
Previous text:
 
Current text:
 That's about four seconds.


In [284]:
edit_asr_chunk(timestamp=0, version=0, new_text="This is a four edit.")

{"success": true, "session_id": "test_session", "text": "This is a five edit.", "timestamp": 0, "version": 1}


In [285]:
edit_asr_chunk(timestamp=0, version=0, new_text="This is five edit.")

{"success": true, "session_id": "test_session", "text": "This is six edit.", "timestamp": 0, "version": 2}


In [246]:
rate_text_chunk(timestamp=0,version=0,rating_update=1)

{"success": true, "message": "Successfully updated rating for test_session, language en, chunk_id 0, chunk_version 0, rating_update 1, new_rating 2"}


In [288]:
submit_correction_rules()
get_correction_rules()

{"success": true, "message": "Successfully uploaded rules for session test_session, language en"}
[{"source_strings": [{"string": "four", "active": true}], "to": "five", "version": 0}, {"source_strings": [{"string": "five", "active": true}], "to": "six", "version": 0}]


In [295]:
switch_source_language("cs")
switch_transcript_language("cs")

{"success": true, "session_id": "test_session"}
{"success": true, "session_id": "test_session"}


In [65]:
def integration_test():
    crate_session()

    # switch_source_language(source_language)
    # switch_transcript_language(transcript_language)

    def submit_audio_chunk(timestamp, audio):
        try:
            response = requests.post(
                f"https://slt.ufal.mff.cuni.cz:5003/submit_audio_chunk?session_id={session_id}",
                verify=False,
                json={
                    "timestamp": timestamp,
                    "chunk": audio_to_dict(
                        audio=audio[timestamp * sample_rate : (timestamp + 1) * sample_rate]
                    ),
                },  # 10 seconds
            )
            print(response.text)
        except Exception as e:
            print(e)

    for timestamp in range(0, 60):
        submit_audio_chunk(timestamp, audio_cs)
        get_latest_text_chunk_versions(language=transcript_language)
        post_latest_text_chunks(session_id=session_id, versions={}, language=transcript_language)
        time.sleep(1)
    
    # end_session()

In [71]:
integration_test()

{"success": true, "message": "Successfully created session test_session"}
{"success": true, "session_id": "test_session"}
{"success": true, "session_id": "test_session", "versions": {}}
Previous text:
 
Current text:
list index out of range
{"success": true, "session_id": "test_session"}
{"success": true, "session_id": "test_session", "versions": {}}
Previous text:
 
Current text:
list index out of range
{"success": true, "session_id": "test_session"}
{"success": true, "session_id": "test_session", "versions": {}}
Previous text:
 
Current text:
list index out of range
{"success": true, "session_id": "test_session"}
{"success": true, "session_id": "test_session", "versions": {}}
Previous text:
 
Current text:
list index out of range
{"success": true, "session_id": "test_session"}
{"success": true, "session_id": "test_session", "versions": {}}
Previous text:
 
Current text:
list index out of range
{"success": true, "session_id": "test_session"}
{"success": true, "session_id": "test_sessi

In [335]:
end_session()

{"success": true, "message": "Successfully ended session test_session"}


In [50]:
from flask import Flask, Response, make_response, request  # noqa: E402

app = Flask(__name__)

@app.route("/offload_ASR", methods=["POST", "GET"])
def offload_computation():

    global CONFIG, sessions, processing_queue
    if request.method == "POST":
        request_data = request.get_json()
        
        print(request_data)

        response_data = {
            "success": True,
        }

        response = make_response(jsonpickle.encode(response_data, unpicklable=True, indent=4))
        response.headers["Content-Type"] = "application/json"
        return response, 200

    elif request.method == "GET":
        response_data = {
            "timestamp": 0,
            "audio": audio_cs,
            "prompt": "",
            "session_id": session_id,
            "source_language": source_language,
            "transcript_language": transcript_language,
        }
        response = make_response(jsonpickle.encode(response_data, unpicklable=True, indent=4))
        response.headers["Content-Type"] = "application/json"
        return response, 200

    response_data = {"success": False, "message": "Method not allowed"}
    response = make_response(jsonpickle.encode(response_data, unpicklable=True, indent=4))
    response.headers["Content-Type"] = "application/json"
    return response, 405

In [57]:
app.run(
    port=5003,
    host="slt.ufal.mff.cuni.cz",
)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://slt.ufal.mff.cuni.cz:5003
[33mPress CTRL+C to quit[0m
195.113.21.6 - - [13/Dec/2023 19:57:57] "GET /offload_ASR HTTP/1.1" 200 -
195.113.21.6 - - [13/Dec/2023 19:58:15] "POST /offload_ASR HTTP/1.1" 200 -
195.113.21.6 - - [13/Dec/2023 19:58:15] "GET /offload_ASR HTTP/1.1" 200 -


{'session_id': 'test_session', 'timestamp': 0, 'tsw': [[7.82, 8.86, ' Hi,'], [9.66, 10.2, ' in'], [10.2, 10.56, ' this'], [10.56, 10.98, ' video'], [10.98, 11.3, ' I'], [11.3, 11.54, ' will'], [11.54, 12.02, ' show'], [12.02, 12.1, ' you'], [12.1, 12.32, ' how'], [12.32, 12.32, ' to'], [12.32, 12.7, ' create'], [12.7, 13.42, ' raster'], [13.42, 13.42, ' images.'], [13.42, 13.42, ' Hi,'], [13.42, 13.42, ' in'], [13.42, 13.42, ' this'], [13.42, 13.42, ' video'], [13.42, 13.42, ' I'], [13.42, 13.42, ' will'], [13.42, 13.42, ' show'], [13.42, 13.42, ' you'], [13.42, 13.42, ' how'], [13.42, 13.42, ' to'], [13.42, 13.42, ' create'], [13.42, 13.42, ' raster'], [13.42, 14.32, ' images.'], [14.92, 15.4, ' First'], [15.4, 15.7, ' one'], [15.7, 16.86, ' is'], [16.86, 17.48, ' called'], [17.48, 18.0, ' HDR'], [18.0, 18.64, ' image.'], [18.64, 18.64, ' In'], [18.64, 19.14, ' English'], [19.14, 19.42, ' it'], [19.42, 19.42, ' is'], [19.42, 19.66, ' called'], [19.66, 20.06, ' High'], [20.06, 20.5, ' 

In [191]:
# set first 7.5 seconds of audio_cs to 0
audio_cs_modified = audio_cs.copy()
audio_cs_modified[:int(7.5 * sample_rate)] = 0
# prepend 2.5 seconds of 0 to audio_cs_modified
print(audio_cs_modified.shape)
# audio_cs_modified = np.concatenate(
#     [np.zeros(int(2.5 * sample_rate)), audio_cs_modified]
# )
print(audio_cs_modified.shape)
import scipy.io.wavfile   # noqa: E402


# scipy.io.wavfile.write("aaaaa.wav", sample_rate, audio_cs_modified)
soundfile.write("aaaaa.wav", audio_cs_modified, sample_rate, format="WAV")


(3956943,)
(3956943,)


In [194]:
tmp_audio_file = io.BytesIO(open("aaaaa.wav", "rb").read())

In [192]:
import wave  # noqa: E402

def check_wave_format(bytes_io):
    with wave.open(bytes_io, 'rb') as wf:
        print(f'Channels: {wf.getnchannels()}')
        print(f'Sample width (bytes): {wf.getsampwidth()}')
        print(f'Frame rate: {wf.getframerate()}')
        print(f'Number of frames: {wf.getnframes()}')
        print(f'Compression type: {wf.getcomptype()}')
        print(f'Compression name: {wf.getcompname()}')

check_wave_format(io.BytesIO(open(f"{save_path}/test_cs.wav", "rb").read()))
check_wave_format(io.BytesIO(open("aaaaa.wav", "rb").read()))

Channels: 1
Sample width (bytes): 2
Frame rate: 16000
Number of frames: 3956943
Compression type: NONE
Compression name: not compressed
Channels: 1
Sample width (bytes): 2
Frame rate: 16000
Number of frames: 3956943
Compression type: NONE
Compression name: not compressed


In [152]:
io.BytesIO(open(f"{save_path}/test_cs.wav", "rb").read()).getvalue() == tmp_audio_file.getvalue()

True

In [186]:
tmp, sm = soundfile.read(io.BytesIO(open(f"{save_path}/test_cs.wav", "rb").read()))
print(tmp.shape, sm)

(3956943,) 16000


In [195]:
tmp, sm = soundfile.read(tmp_audio_file)

In [203]:
# submit audio file located at `data/test_cs.wav` to `https://slt.ufal.mff.cuni.cz:5003/submit_audio_file`
def submit_audio_file():
    try:
        response = requests.post(
            "https://slt.ufal.mff.cuni.cz:5003/submit_audio_file",
            verify=False,
            files={
                # "file": open(f"{save_path}/test_cs.wav", "rb"),
                "file": open("aaaaa.wav", "rb"),
            },
        )
        print(response.text)

    except Exception as e:
        print(e)

submit_audio_file()

{"success": true, "session_id": "WQQslPhKDHxLNiQMCUOXfiQRehPbsjxR"}


In [83]:
print(audio_cs.shape)

(3956943,)


In [88]:
get_active_sessions()

{"active_sessions": ["EMyCWNVeVaLlOuggTwFOdwZHrekrtdrx"]}


In [204]:
post_latest_text_chunks(session_id="WQQslPhKDHxLNiQMCUOXfiQRehPbsjxR", versions={})

Previous text:
  Hi, in this video I will show you how
 to create
raster images. Hi, in this
 video I will show you
how to create
 raster images. First one is called

HDR image. In English it is called
 High Dynamic
Range. In English it is
 called High Dynamic
Range. In short,
 we will try to solve the
problem, that
 real world and our eyes as
adaptive
 sensors are able to work with data
 with
high dynamic range. Digital camera
 or computer
cannot do it. I will start
 with an example of
photography. If
 you use digital camera or analog
camera,
 you have a problem, that dynamic range

of film or sensor is much smaller than
 often
dynamic range of real scene.
 Here is an example
taken at sunset,
 where there is a big dynamic
range,
 big contrast between places where the
 sun
shines behind the clouds and here
 in the shade
under the trees. Either
 you set the camera so
that the light
 parts are well exposed, that is,
a
 short exposure, but you have a great
 cloud,
clouds, reds, everything