Usage of oobabooga's [text-generation-webui](https://github.com/oobabooga/text-generation-webui), a Gradio web UI for accessing and using LLMs, GPTQ capabilities, ggml for fast CPU inference if need be, and more.\

This notebook is derived from an example [Colab notebook](https://github.com/oobabooga/text-generation-webui/wiki/Google-Colab-notebook/f5157d3b2168996980007877003565daa17cb70f) from oobabooga themselves, which is itself derived from an original notebook from the github user @81300, whose notebook I can not retrieve.

In [1]:
#@title 1. Keep this tab alive to prevent Colab from disconnecting { display-mode: "form" }

#@markdown Press play on the music player that will appear below:
%%html
<audio src="https://oobabooga.github.io/silence.m4a" controls>

Note: in the -requirements.txt file in text-generation-webui, changed safetensors from 0.3.0 to 0.3.1 in order to successfully download transformers. Also changed accelerate from 0.17.1 to 0.20.3.

In [9]:
#@title 2. Installation of the web UI

# This option allows you to save chat logs, characters, and softprompts
# to Google Grive automatically, so that they can persist across sessions.
save_logs_to_google_drive = True #@param {type:"boolean"}

# If True:
if save_logs_to_google_drive:
  # Importation of os, and shutil, which is useful for file copying.
  import os
  import shutil
  # Importation of the drive module in google.colab for interaction of GoogleDrive in Colab.
  from google.colab import drive
  # Mounting of the Google Drive to the Colab env.
  drive.mount('/content/drive')
  # Setting of a base folder for navigation and saving to folders inside Google Drive.
  base_folder = '/content/drive/MyDrive'

%cd /content
# Cloning of the text-generation-webui repository.
!git clone https://github.com/oobabooga/text-generation-webui
# If True:
if save_logs_to_google_drive:
  # If there is no paths that exists for the folders specified, create them.
  if not os.path.exists(f"{base_folder}/oobabooga-data"):
    os.mkdir(f"{base_folder}/oobabooga-data")
  if not os.path.exists(f"{base_folder}/oobabooga-data/logs"):
    os.mkdir(f"{base_folder}/oobabooga-data/logs")
  if not os.path.exists(f"{base_folder}/oobabooga-data/softprompts"):
    os.mkdir(f"{base_folder}/oobabooga-data/softprompts")
  if not os.path.exists(f"{base_folder}/oobabooga-data/characters"):
    # Moving of the characters folder from webui to a newly created folder in oobabooga-data.
    shutil.move("text-generation-webui/characters", f"{base_folder}/oobabooga-data/characters")
  else:
    # If the characters folder already exists, then delete the cloned repository characters directory.
    !rm -r "text-generation-webui/characters"

  # Recursively delete the softprompts directory from webui.
  !rm -r "text-generation-webui/softprompts"
  # Creation of symlinks inside the webui directories that points to the oobabooga directories.
  !ln -s "$base_folder/oobabooga-data/logs" "text-generation-webui/logs"
  !ln -s "$base_folder/oobabooga-data/softprompts" "text-generation-webui/softprompts"
  !ln -s "$base_folder/oobabooga-data/characters" "text-generation-webui/characters"

else:
  # If save_logs_to_google_drive is False, create a directory
  !mkdir text-generation-webui/logs

# Creation of symlinks that points to specific directories inside the text-generation-webui directory.
!ln -s text-generation-webui/logs .
!ln -s text-generation-webui/characters .
!ln -s text-generation-webui/models .

# The recursive removal of the sample_data directory in Colab.
%rm -r sample_data

# Go into the text-generation-webui directory and switch the current state of the repository to the state it was in
# at the given commit.
%cd text-generation-webui
!git checkout a04b7cf2643a5950fc215deac7f76b4b31336a81

# The downloading of the .json under specified name.
!wget https://oobabooga.github.io/settings-colab.json -O settings-colab-template.json

# Installation of requirements.
!pip install -r requirements.txt
!pip install -r extensions.google_translate.requirements.txt
!pip install -r extensions.silero_tts/requirements.txt
print(f"\033[1;32;1m\n --> If you see a warning about \"pydevd_plugins\", just ignore it and move on to Step 3. There is no need to restart the runtime.\n\033[0;37;0m")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content
fatal: destination path 'text-generation-webui' already exists and is not an empty directory.
ln: failed to create symbolic link 'text-generation-webui/logs/logs': File exists
ln: failed to create symbolic link './logs': File exists
ln: failed to create symbolic link './characters': File exists
ln: failed to create symbolic link './models': File exists
rm: cannot remove 'sample_data': No such file or directory
/content/text-generation-webui
D	characters/Example.json
D	characters/Example.png
M	requirements.txt
D	softprompts/place-your-softprompts-here.txt
HEAD is now at a04b7cf Merge pull request #585 from fkusche/also-download-markdown
--2023-08-27 18:42:40--  https://oobabooga.github.io/settings-colab.json
Resolving oobabooga.github.io (oobabooga.github.io)... 185.199.108.153, 185.199.109.153, 185.199.110.153, ...
Connecting to oobabooga.github.io (

In [14]:
#@title 3. Launch

import json


"""Parameters.

   Allowance of options for use in interaction with the bot. Can control text streaming, loading of model in 8bit, and more.
"""
model = "Pygmalion 6B original (sharded, rehosted)" #@param ["Pygmalion 6B original (sharded, rehosted)", "Pygmalion 6B main (sharded, rehosted)", "Pygmalion 6B dev (sharded, rehosted)", "GALACTICA 125m (for debugging)"] {allow-input: false}
# Streams the text output in real time.
text_streaming = True #@param {type:"boolean"}
# Loading of the model with 8bit precision. Small hit in accuracy, speed, in exchange for memory conservation.
load_in_8bit = False #@param {type: "boolean"}
# Responses will be audios instead of text. 118 voices available.
activate_silero_text_to_speech = False #@param {type: "boolean"}
# Adds a menu for sending pictures to the bot, which are automatically captioned using BLIP.
activate_sending_pictures = False #@param {type: "boolean"}
# Adds a user-defined, hidden string at the beginning of the bot's reply, with the goal of biasing the rest of the reponse.
activate_character_bias = False #@param {type: "boolean"}
# If using a language thats different from Eng, activates an automatic translation using Google Translate, allowing for communication of the not in a different language.
chat_language = "English" #@param ['Afrikaans', 'Albanian', 'Amharic', 'Arabic', 'Armenian', 'Azerbaijani', 'Basque', 'Belarusian', 'Bengali', 'Bosnian', 'Bulgarian', 'Catalan', 'Cebuano', 'Chinese (Simplified)', 'Chinese (Traditional)', 'Corsican', 'Croatian', 'Czech', 'Danish', 'Dutch', 'English', 'Esperanto', 'Estonian', 'Finnish', 'French', 'Frisian', 'Galician', 'Georgian', 'German', 'Greek', 'Gujarati', 'Haitian Creole', 'Hausa', 'Hawaiian', 'Hebrew', 'Hindi', 'Hmong', 'Hungarian', 'Icelandic', 'Igbo', 'Indonesian', 'Irish', 'Italian', 'Japanese', 'Javanese', 'Kannada', 'Kazakh', 'Khmer', 'Korean', 'Kurdish', 'Kyrgyz', 'Lao', 'Latin', 'Latvian', 'Lithuanian', 'Luxembourgish', 'Macedonian', 'Malagasy', 'Malay', 'Malayalam', 'Maltese', 'Maori', 'Marathi', 'Mongolian', 'Myanmar (Burmese)', 'Nepali', 'Norwegian', 'Nyanja (Chichewa)', 'Pashto', 'Persian', 'Polish', 'Portuguese (Portugal, Brazil)', 'Punjabi', 'Romanian', 'Russian', 'Samoan', 'Scots Gaelic', 'Serbian', 'Sesotho', 'Shona', 'Sindhi', 'Sinhala (Sinhalese)', 'Slovak', 'Slovenian', 'Somali', 'Spanish', 'Sundanese', 'Swahili', 'Swedish', 'Tagalog (Filipino)', 'Tajik', 'Tamil', 'Telugu', 'Thai', 'Turkish', 'Ukrainian', 'Urdu', 'Uzbek', 'Vietnamese', 'Welsh', 'Xhosa', 'Yiddish', 'Yoruba', 'Zulu']

# If the chat language is not English, this will be set to True in order for Google Translation to occur.
activate_google_translate = (chat_language != "English")

# Data.
models = {
    "Pygmalion 6B original (sharded, rehosted)": ("waifu-workshop", "pygmalion-6b", "original-sharded", "pygmalion-6b_original-sharded"),
    "Pygmalion 6B main (sharded, rehosted)": ("waifu-workshop", "pygmalion-6b", "sharded", "pygmalion-6b_sharded"),
    "Pygmalion 6B dev (sharded, rehosted)": ("waifu-workshop", "pygmalion-6b", "dev-sharded", "pygmalion-6b_dev-sharded"),
    "GALACTICA 125M (for debugging)": ("facebook", "galactica-125m", "main", "galactica-125m"),
}

language_codes = {'Afrikaans': 'af', 'Albanian': 'sq', 'Amharic': 'am', 'Arabic': 'ar', 'Armenian': 'hy', 'Azerbaijani': 'az', 'Basque': 'eu', 'Belarusian': 'be', 'Bengali': 'bn', 'Bosnian': 'bs', 'Bulgarian': 'bg', 'Catalan': 'ca', 'Cebuano': 'ceb', 'Chinese (Simplified)': 'zh-CN', 'Chinese (Traditional)': 'zh-TW', 'Corsican': 'co', 'Croatian': 'hr', 'Czech': 'cs', 'Danish': 'da', 'Dutch': 'nl', 'English': 'en', 'Esperanto': 'eo', 'Estonian': 'et', 'Finnish': 'fi', 'French': 'fr', 'Frisian': 'fy', 'Galician': 'gl', 'Georgian': 'ka', 'German': 'de', 'Greek': 'el', 'Gujarati': 'gu', 'Haitian Creole': 'ht', 'Hausa': 'ha', 'Hawaiian': 'haw', 'Hebrew': 'iw', 'Hindi': 'hi', 'Hmong': 'hmn', 'Hungarian': 'hu', 'Icelandic': 'is', 'Igbo': 'ig', 'Indonesian': 'id', 'Irish': 'ga', 'Italian': 'it', 'Japanese': 'ja', 'Javanese': 'jw', 'Kannada': 'kn', 'Kazakh': 'kk', 'Khmer': 'km', 'Korean': 'ko', 'Kurdish': 'ku', 'Kyrgyz': 'ky', 'Lao': 'lo', 'Latin': 'la', 'Latvian': 'lv', 'Lithuanian': 'lt', 'Luxembourgish': 'lb', 'Macedonian': 'mk', 'Malagasy': 'mg', 'Malay': 'ms', 'Malayalam': 'ml', 'Maltese': 'mt', 'Maori': 'mi', 'Marathi': 'mr', 'Mongolian': 'mn', 'Myanmar (Burmese)': 'my', 'Nepali': 'ne', 'Norwegian': 'no', 'Nyanja (Chichewa)': 'ny', 'Pashto': 'ps', 'Persian': 'fa', 'Polish': 'pl', 'Portuguese (Portugal, Brazil)': 'pt', 'Punjabi': 'pa', 'Romanian': 'ro', 'Russian': 'ru', 'Samoan': 'sm', 'Scots Gaelic': 'gd', 'Serbian': 'sr', 'Sesotho': 'st', 'Shona': 'sn', 'Sindhi': 'sd', 'Sinhala (Sinhalese)': 'si', 'Slovak': 'sk', 'Slovenian': 'sl', 'Somali': 'so', 'Spanish': 'es', 'Sundanese': 'su', 'Swahili': 'sw', 'Swedish': 'sv', 'Tagalog (Filipino)': 'tl', 'Tajik': 'tg', 'Tamil': 'ta', 'Telugu': 'te', 'Thai': 'th', 'Turkish': 'tr', 'Ukrainian': 'uk', 'Urdu': 'ur', 'Uzbek': 'uz', 'Vietnamese': 'vi', 'Welsh': 'cy', 'Xhosa': 'xh', 'Yiddish': 'yi', 'Yoruba': 'yo', 'Zulu': 'zu'}

# Download the model (if it hasn't been downloaded already).
huggingface_org, huggingface_repo, huggingface_branch, model_name = models[model]
![[ ! -f models/$model_name/config.json ]] && python download-model.py $huggingface_org/$huggingface_repo --branch $huggingface_branch

# Applying the selected language and setting of the prompt size to 2048.
# If 8bit mode is selected.
j = json.loads(open('settings-colab-template.json', 'r').read())
j["google_translate-language string"] = language_codes[chat_language]
if load_in_8bit:
  j["chat_prompt_size"] = 2048

# Open the settings-colab.json file, and the modified .json 'j' is converted into string format using json.dumps.
# indent=4 makes sure that the resulting .json string is pretty-printed with and indentation of 4 spaces for better
# readability. this string is then written to the settings-colab.json file.
with open('settings-colab.json', 'w') as f:
  f.write(json.dumps(j, indent=4))

params = set(['--chat'])

# Add load-in-8bit to the params set.
if load_in_8bit:
  params.add('--load-in-8bit')

# If the above parameters are set to True, we will append the given strings to our newly created active_extensions list.
active_extensions = []
if activate_sending_pictures:
  active_extensions.append('send_pictures')
if activate_character_bias:
  active_extensions.append('character_bias')
if activate_google_translate:
  active_extensions.append('google_translate')
if activate_silero_text_to_speech:
  active_extensions.append('silero_tts')
active_extensions.append('gallery')

# If anything have been appended to the active_extensions list, then add those strings to the params set.
if len(active_extensions) > 0:
  params.add(f'--extensions {" ".join(active_extensions)}')

if not text_streaming or activate_google_translate or activate_silero_text_to_speech:
  params.add("--no-stream")
if activate_character_bias:
  params.add("--verbose")

# Starting the webui.
cmd = f"python server.py --share --model {model_name} --settings settings-colab.json {' '.join(params)}"
print(cmd)
!$cmd


Traceback (most recent call last):
  File "/content/text-generation-webui/download-model.py", line 169, in <module>
    links, is_lora = get_download_links_from_huggingface(model, branch)
  File "/content/text-generation-webui/download-model.py", line 113, in get_download_links_from_huggingface
    fname = dict[i]['path']
KeyError: 0
python server.py --share --model pygmalion-6b_original-sharded --settings settings-colab.json --extensions gallery --chat
Loading settings from settings-colab.json...
Loading pygmalion-6b_original-sharded...
[31m╭─[0m[31m────────────────────[0m[31m [0m[1;31mTraceback [0m[1;2;31m(most recent call last)[0m[31m [0m[31m─────────────────────[0m[31m─╮[0m
[31m│[0m [2;33m/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/[0m[1;33m_errors.py[0m:[94m261[0m [31m│[0m
[31m│[0m in [92mhf_raise_for_status[0m                                                       [31m│[0m
[31m│[0m                                                 