Skip to content

Commit

Permalink
fix exception that happens because of PT language
Browse files Browse the repository at this point in the history
  • Loading branch information
zackees committed May 10, 2023
1 parent f8cd3d4 commit 9aa7f12
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 147 deletions.
167 changes: 37 additions & 130 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,144 +66,51 @@ It should now be installed.

# Language Reference

#### Short hand language codes
### Language Inputs

```
af,am,ar,as,az,ba,be,bg,bn,bo,br,bs,ca,cs,cy,da,
de,el,en,es,et,eu,fa,fi,fo,fr,gl,gu,ha,haw,he,hi,
hr,ht,hu,hy,id,is,it,ja,jw,ka,kk,km,kn,ko,la,lb,
ln,lo,lt,lv,mg,mi,mk,ml,mn,mr,ms,mt,my,ne,nl,nn,
no,oc,pa,pl,ps,pt,ro,ru,sa,sd,si,sk,sl,sn,so,sq,
sr,su,sv,sw,ta,te,tg,th,tk,tl,tr,tt,uk,ur,uz,vi,
yi,yo,zh
```
We use openai whisper for language input. See whisper ai documents for a full supported list

#### Full Language codes will also work

```
Afrikaans,Albanian,Amharic,Arabic,Armenian,Assamese,Azerbaijani,
Bashkir,Basque,Belarusian,Bengali,Bosnian,Breton,Bulgarian,Burmese,
Castilian,Catalan,Chinese,Croatian,Czech,Danish,Dutch,English,
Estonian,Faroese,Finnish,Flemish,French,Galician,Georgian,German,
Greek,Gujarati,Haitian,Haitian Creole,Hausa,Hawaiian,Hebrew,Hindi,
Hungarian,Icelandic,Indonesian,Italian,Japanese,Javanese,Kannada,
Kazakh,Khmer,Korean,Lao,Latin,Latvian,Letzeburgesch,Lingala,
Lithuanian,Luxembourgish,Macedonian,Malagasy,Malay,Malayalam,
Maltese,Maori,Marathi,Moldavian,Moldovan,Mongolian,Myanmar,Nepali,
Norwegian,Nynorsk,Occitan,Panjabi,Pashto,Persian,Polish,Portuguese,
Punjabi,Pushto,Romanian,Russian,Sanskrit,Serbian,Shona,Sindhi,
Sinhala,Sinhalese,Slovak,Slovenian,Somali,Spanish,Sundanese,Swahili,
Swedish,Tagalog,Tajik,Tamil,Tatar,Telugu,Thai,Tibetan,Turkish,
Turkmen,Ukrainian,Urdu,Uzbek,Valencian,Vietnamese,Welsh,Yiddish,
Yoruba
```
### Language outputs

# Language code -> cononical name
We use the deepl AI for translation. The language list is as follows:

```
{
"af": "Afrikaans",
"am": "Amharic",
"ar": "Arabic",
"as": "Assamese",
"az": "Azerbaijani",
"ba": "Bashkir",
"be": "Belarusian",
"bg": "Bulgarian",
"bn": "Bengali",
"bo": "Tibetan",
"br": "Breton",
"bs": "Bosnian",
"ca": "Catalan",
"cs": "Czech",
"cy": "Welsh",
"da": "Danish",
"de": "German",
"el": "Greek",
"en": "English",
"es": "Spanish",
"et": "Estonian",
"eu": "Basque",
"fa": "Persian",
"fi": "Finnish",
"fo": "Faroese",
"fr": "French",
"gl": "Galician",
"gu": "Gujarati",
"ha": "Hausa",
"haw": "Hawaiian",
"he": "Hebrew",
"hi": "Hindi",
"hr": "Croatian",
"ht": "Haitian Creole",
"hu": "Hungarian",
"hy": "Armenian",
"id": "Indonesian",
"is": "Icelandic",
"it": "Italian",
"ja": "Japanese",
"jw": "Javanese",
"ka": "Georgian",
"kk": "Kazakh",
"km": "Khmer",
"kn": "Kannada",
"ko": "Korean",
"la": "Latin",
"lb": "Luxembourgish",
"ln": "Lingala",
"lo": "Lao",
"lt": "Lithuanian",
"lv": "Latvian",
"mg": "Malagasy",
"mi": "Maori",
"mk": "Macedonian",
"ml": "Malayalam",
"mn": "Mongolian",
"mr": "Marathi",
"ms": "Malay",
"mt": "Maltese",
"my": "Burmese",
"ne": "Nepali",
"nl": "Dutch",
"nn": "Norwegian Nynorsk",
"no": "Norwegian",
"oc": "Occitan",
"pa": "Punjabi",
"pl": "Polish",
"ps": "Pashto",
"pt": "Portuguese",
"ro": "Romanian",
"ru": "Russian",
"sa": "Sanskrit",
"sd": "Sindhi",
"si": "Sinhalese",
"sk": "Slovak",
"sl": "Slovene",
"sn": "Shona",
"so": "Somali",
"sq": "Albanian",
"sr": "Serbian",
"su": "Sundanese",
"sv": "Swedish",
"sw": "Swahili",
"ta": "Tamil",
"te": "Telugu",
"tg": "Tajik",
"th": "Thai",
"tk": "Turkmen",
"tl": "Tagalog",
"tr": "Turkish",
"tt": "Tatar",
"uk": "Ukrainian",
"ur": "Urdu",
"uz": "Uzbek",
"vi": "Vietnamese",
"yi": "Yiddish",
"yo": "Yoruba",
"zh": "Chinese",
}
BG - Bulgarian
CS - Czech
DA - Danish
DE - German
EL - Greek
EN - English (unspecified variant for backward compatibility; please select EN-GB or EN-US instead)
EN-GB - English (British)
EN-US - English (American)
ES - Spanish
ET - Estonian
FI - Finnish
FR - French
HU - Hungarian
ID - Indonesian
IT - Italian
JA - Japanese
KO - Korean
LT - Lithuanian
LV - Latvian
NB - Norwegian (Bokmål)
NL - Dutch
PL - Polish
PT-BR - Portuguese (Brazilian)
PT-PT - Portuguese (all Portuguese varieties excluding Brazilian Portuguese)
RO - Romanian
RU - Russian
SK - Slovak
SL - Slovenian
SV - Swedish
TR - Turkish
UK - Ukrainian
ZH - Chinese (simplified)
```

Please see [https://www.deepl.com/docs-api/translate-text/](https://www.deepl.com/docs-api/translate-text/) for more information

# Windows

Expand Down
11 changes: 1 addition & 10 deletions src/video_subtitles/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
from video_subtitles.say import say
from video_subtitles.settings import Settings
from video_subtitles.util import (
LANGUAGE_CODES,
MODELS,
GraphicsInfo,
ensure_transcribe_anything_installed,
parse_languages,
query_cuda_video_cards,
)

Expand All @@ -24,15 +24,6 @@
settings = Settings()


def parse_languages(languages_str: str) -> list[str]:
"""Parse a comma-separated list of languages and return a list of language codes."""
languages = languages_str.split(",")
for language in languages:
if language not in LANGUAGE_CODES:
raise argparse.ArgumentTypeError(f"Invalid language code: {language}")
return languages


def ensure_dependencies() -> list[GraphicsInfo]:
"""Ensure that dependencies are installed."""
cuda_cards = query_cuda_video_cards()
Expand Down
10 changes: 8 additions & 2 deletions src/video_subtitles/gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
QLabel,
QLineEdit,
QMainWindow,
QMessageBox,
QVBoxLayout,
QWidget,
)
Expand All @@ -26,7 +27,7 @@
from video_subtitles.say import say
from video_subtitles.settings import Settings
from video_subtitles.thread_processor import ThreadProcessor
from video_subtitles.util import MODELS
from video_subtitles.util import MODELS, parse_languages

settings = Settings()

Expand Down Expand Up @@ -152,7 +153,12 @@ def dropEvent(self, event):
files = [u.toLocalFile() for u in event.mimeData().urls()]
deepl_api_key = self.deepl_input.text().strip() # get api key from input field
model = self.model_select.currentText().strip()
languages = self.output_text.text().strip().split(",")
try:
languages = parse_languages(self.output_text.text())
except ValueError as ve:
QMessageBox.critical(self, "Error", str(ve))
return

languages = [lang.strip() for lang in languages]
convert_to_webvtt = self.webvtt_select.currentText().strip() == "WEBVTT"
for f in files:
Expand Down
2 changes: 1 addition & 1 deletion src/video_subtitles/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def run( # pylint: disable=too-many-locals,too-many-branches,too-many-statement
api_key=deepl_api_key,
in_srt=src_srt_file,
out_srt=out_file,
from_lang="en",
from_lang="en-us", # whisper ai is always en-us
to_lang=language,
)
assert os.path.exists(
Expand Down
44 changes: 40 additions & 4 deletions src/video_subtitles/util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Utilities for video_subtitles."""

# pylint: disable=line-too-long

import os
import subprocess
import tempfile
Expand Down Expand Up @@ -71,15 +73,49 @@ def ensure_transcribe_anything_installed() -> None:
)


def parse_languages(languages_str: str) -> list[str]:
"""Parse a comma-separated list of languages and return a list of language codes."""
languages = languages_str.split(",")
languages = [language.strip().lower() for language in languages]
for language in languages:
if language not in LANGUAGE_CODES:
raise ValueError(f"Unknown language: {language}")
return languages


LANGUAGE_CODES = {
"en": "English",
"bg": "Bulgarian",
"cs": "Czech",
"da": "Danish",
"de": "German",
"el": "Greek",
"en": "English (unspecified variant for backward compatibility; please select EN-GB or EN-US instead)",
"en-gb": "English (British)",
"en-us": "English (American)",
"es": "Spanish",
"et": "Estonian",
"fi": "Finnish",
"fr": "French",
"de": "German",
"hu": "Hungarian",
"id": "Indonesian",
"it": "Italian",
"pt": "Portuguese",
"ja": "Japanese",
"ko": "Korean",
"lt": "Lithuanian",
"lv": "Latvian",
"nb": "Norwegian (Bokmål)",
"nl": "Dutch",
"pl": "Polish",
"pt-br": "Portuguese (Brazilian)",
"pt-pt": "Portuguese (all Portuguese varieties excluding Brazilian Portuguese)",
"ro": "Romanian",
"ru": "Russian",
"zh": "Chinese",
"sk": "Slovak",
"sl": "Slovenian",
"sv": "Swedish",
"tr": "Turkish",
"uk": "Ukrainian",
"zh": "Chinese (simplified)",
}

MODELS = {
Expand Down

0 comments on commit 9aa7f12

Please sign in to comment.