[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/yezhengkai/LawGPT/blob/main/notebooks/demo.ipynb)

In [None]:
# Autoreload modules
%load_ext autoreload
%autoreload 2

# Mount google drive and move in project directory

In [None]:
# Mount google drive
from google.colab import drive
drive.mount("/content/drive")

In [None]:
# Move in the project directory
%mkdir -p /content/drive/MyDrive/side-project/LawGPT
%cd /content/drive/MyDrive/side-project/LawGPT

# Set up poetry to manage dependencies in virtual env and instantiate project

In [None]:
# ref:
# - https://stackoverflow.com/questions/75245758/how-to-use-poetry-in-google-colab
# - https://github.com/elise-chin/poetry-and-colab/blob/main/Using_python_poetry_in_Google_Colab.ipynb
!pip install -qqq --progress-bar off poetry # install poetry
!poetry install --no-ansi --without dev --extras "app"  # instantiate project

In [None]:
# Add venv to the first position of the search path
import re
import sys
import subprocess

def get_env_path() -> str:
    subproc_out = subprocess.run(
        'poetry env info -p',
        shell=True,
        capture_output=True,
        encoding='utf-8'
    )
    if subproc_out.returncode != 0:
        raise RuntimeError('Unable to get env path')
    return subproc_out.stdout

PY_VERSION = re.search(r"(?<=py)\d\.\d+", get_env_path().strip()).group(0)
VENV_PATH = f"{get_env_path().strip()}/lib/python{PY_VERSION}/site-packages"
sys.path.insert(0, VENV_PATH)
sys.path.insert(0, "/content/drive/MyDrive/side-project/LawGPT/src")

# Finetune using lora

In [None]:
!poetry run lawgpt finetune lora \
  --base-model "bigscience/bloom-3b" \
  --data-path "./data/processed/roc_law_corpus.json" \
  --output-dir "./output/lawgpt-bloom-3b-lora-sft-v1" \
  --batch-size 100 \
  --micro-batch-size 4 \
  --num-epochs 3 \
  --learning-rate 3e-4 \
  --cutoff-len 256 \
  --val-set-size 100 \
  --lora-r 8 \
  --lora-alpha 16 \
  --lora-dropout 0.05 \
  --lora-target-modules "query_key_value" \
  --train-on-inputs \
  --add-eos-token \
  --no-group-by-length \
  --wandb-project "" \
  --wandb-run-name "" \
  --wandb-watch "" \
  --wandb-log-model "" \
  --resume-from-checkpoint "./output/lawgpt-bloom-3b-lora-sft-v1" \
  --prompt-template-name "roc_law"

# Infer

In [None]:
!poetry run lawgpt infer \
  --load-8bit \
  --base-model "bigscience/bloom-3b" \
  --lora-weights "./output/lawgpt-bloom-3b-lora-sft-v1" \
  --prompt-template "roc_law"

# Web UI

In [None]:
!poetry run lawgpt webui \
  --no-load-8bit \
  --base-model "bigscience/bloom-3b" \
  --lora-weights "./output/lawgpt-bloom-3b-lora-sft-v1" \
  --prompt-template "roc_law" \
  --server-name "0.0.0.0" \
  --share-gradio