Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Resolve security vulnerability - CWE-23 #178

Merged
merged 1 commit into from
Mar 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,5 @@ app/transcribe/logs/Transcribe.log
app/transcribe/logs/response.txt
app/transcribe/logs/ffmpeg.txt
app/transcribe/logs/whisper.cpp.txt
app/transcribe/mic.wav.bak
app/transcribe/speaker.wav.bak
9 changes: 7 additions & 2 deletions app/transcribe/args.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import sys
import os
import re
import argparse
from argparse import RawTextHelpFormatter
import yaml
Expand Down Expand Up @@ -104,10 +105,11 @@ def handle_args_batch_tasks(args: argparse.Namespace, global_vars: Transcription
if args.transcribe is not None:
with duration.Duration(name='Transcription', log=False, screen=True):
output_file = args.output_file if args.output_file is not None else "transcription.txt"
safe_filename = re.sub('[^0-9a-zA-Z\.]+', '_', output_file)
print(f'Converting the audio file {args.transcribe} to text.')
print(f'{args.transcribe} file size '
f'{utilities.naturalsize(os.path.getsize(args.transcribe))}.')
print(f'Text output will be produced in {output_file}.')
print(f'Text output will be produced in {safe_filename}.')
# For whisper.cpp STT convert the file to 16 khz
file_path = args.transcribe
if args.speech_to_text == 'whisper.cpp':
Expand All @@ -117,7 +119,7 @@ def handle_args_batch_tasks(args: argparse.Namespace, global_vars: Transcription
# process_response can be improved to make the output more palatable to human reading
text = global_vars.transcriber.stt_model.process_response(results)
if results is not None and len(text) > 0:
with open(output_file, encoding='utf-8', mode='w') as f:
with open(safe_filename, encoding='utf-8', mode='w') as f:
f.write(f"{text}\n")
print('Complete!')
else:
Expand All @@ -128,6 +130,9 @@ def handle_args_batch_tasks(args: argparse.Namespace, global_vars: Transcription


def update_args_config(args: argparse.Namespace, config: dict):
"""Update internal configuration with any overrides specified as
arguments
"""
# Command line arg for api_key takes preference over api_key specified in yaml file
# TODO: We should be able to set deepgram API key from command line as well
if args.api_key is not None:
Expand Down