In [1]:
import os
import subprocess
from pathlib import Path

platform = os.getenv('WORDSLAB_PLATFORM', 'UnknownLinux')
platform

'WindowsSubsystemForLinux'

In [2]:
# Get Ubuntu version
try:
    # Try reading from /etc/os-release first
    with open('/etc/os-release', 'r') as f:
        for line in f:
            if line.startswith('PRETTY_NAME='):
                os_version = line.split('=')[1].strip().strip('"')
                break
except:
    try:
        # Fallback to reading from /etc/issue
        with open('/etc/issue', 'r') as f:
            os_version = f.read().strip().split('\\')[0].strip()
    except:
        os_version = "Unknown"

os_version

'Ubuntu 24.04.2 LTS'

In [3]:
wordslab_version = os.getenv('WORDSLAB_VERSION', 'Unknown')
wordslab_version

'2025-04'

In [6]:
from importlib.metadata import version
import pynvml

cpu_only_file = Path(os.getenv('WORDSLAB_WORKSPACE', '')) / '.cpu-only'
if not cpu_only_file.exists():
    try:
        pynvml.nvmlInit()
        handle = pynvml.nvmlDeviceGetHandleByIndex(0)
        driver_cuda_version = pynvml.nvmlSystemGetCudaDriverVersion_v2()
        cuda_version = f"{driver_cuda_version//1000}.{(driver_cuda_version%1000)//10}"
        pynvml.nvmlShutdown()
    except:
        pass

cuda_version

'12.9'

### Get URL - Windows Subsystem for Linux

```
- https if the file $WORDSLAB_WORKSPACE/.secrets/certificate.pem exists, http otherwise

- windows machine IP address from the file $WORDSLAB_HOME/.WORDSLAB_WINDOWS_IP if it exists, else 127.0.0.1

- environment variables for ports

JUPYTERLAB_PORT=8880
VSCODE_PORT=8881
OPENWEBUI_PORT=8882

USER_APP1_PORT=8883
USER_APP2_PORT=8884
USER_APP3_PORT=8885
USER_APP4_PORT=8886
USER_APP5_PORT=8887

DASHBOARD_PORT=8888
```

### Get URL - Jarvislabs

```
MACHINE_NAME=test-install2 (UI)
MACHINE_ID=261560 (API)

HOSTNAME=98708c9cbe44 (?)

reserved ports: 6006,7007

Native JUPYTERLAB
internal port: 8889
https://98708c261555.notebooks.jarvislabs.net/lab

ENDPOINTS

https://jarvislabs.ai/settings
API Tokens
[copy]

pip install git+https://github.com/jarvislabsai/jlclient.git

from jlclient import jarvisclient
from jlclient.jarvisclient import *
jarvisclient.token = 's7-bNTN1Jhh5XPSxnuVp74ypB8Fbi98jUX3rPRI_zoM'

import os

machine_id = os.getenv('MACHINE_ID')
instance = User.get_instance(machine_id)
instance.endpoints

['https://1685202829581.notebooks.jarvislabs.net',
 'https://1685202829582.notebooks.jarvislabs.net',
 'https://1685202829583.notebooks.jarvislabs.net',
 'https://1685202829584.notebooks.jarvislabs.net',
 'https://1685202829585.notebooks.jarvislabs.net',
 'https://1685202829586.notebooks.jarvislabs.net',
 'https://1685202829587.notebooks.jarvislabs.net',
 'https://1685202829588.notebooks.jarvislabs.net',
 'https://1685202829589.notebooks.jarvislabs.net',
 'https://16852028295810.notebooks.jarvislabs.net']
```

### Get URL - Runpod

```
RUNPOD_POD_ID=ebkhet9vq5flt7 (UI + used for URLs)

RUNPOD_PUBLIC_IP=69.30.85.49
RUNPOD_TCP_PORT_22=22047

RUNPOD_DC_ID=CA-MTL-1
RUNPOD_POD_HOSTNAME=ebkhet9vq5flt7-644112e4

HOSTNAME=5936ba7c007d (?)

Native JUPYTERLAB
internal port: 8888
https://ebkhet9vq5flt7-8888.proxy.runpod.net/lab

ENDPOINTS

https://$(RUNPOD_POD_ID)-8880.proxy.runpod.net/
...
https://$(RUNPOD_POD_ID)-8888.proxy.runpod.net/
```

### Get URL - Vast.ai

```
CONTAINER_ID=16855978 (UI)

HOSTNAME=9e254447c27d

PUBLIC_IPADDR=142.115.158.140
VAST_TCP_PORT_22=42790
VAST_TCP_PORT_8080=42702

Native JUPYTERLAB
internal port: 8080
https://142.115.158.140:42702/tree

Open ports

VAST_TCP_PORT_8880=42777
VAST_TCP_PORT_8881=42791
VAST_TCP_PORT_8882=42623
VAST_TCP_PORT_8883=42779
VAST_TCP_PORT_8884=42668
VAST_TCP_PORT_8885=42663
VAST_TCP_PORT_8886=42729
VAST_TCP_PORT_8887=42736

ENDPOINTS

https://$(PUBLIC_IPADDR):$(VAST_TCP_PORT_8880)
```

In [7]:
import os

# Get wordslab container ports
jupyterlab_port = os.getenv("JUPYTERLAB_PORT")
vscode_port = os.getenv("VSCODE_PORT")
openwebui_port = os.getenv("OPENWEBUI_PORT")
user_app1_port = os.getenv("USER_APP1_PORT")
user_app2_port = os.getenv("USER_APP2_PORT")
user_app3_port = os.getenv("USER_APP3_PORT")
user_app4_port = os.getenv("USER_APP4_PORT")
user_app5_port = os.getenv("USER_APP5_PORT")
dahsboard_port = os.getenv("DASHBOARD_PORT")
wordslab_ports = [jupyterlab_port, vscode_port, openwebui_port, user_app1_port, user_app2_port, user_app3_port, user_app4_port, user_app5_port, dahsboard_port]

wordslab_ports

['8880', '8881', '8882', '8883', '8884', '8885', '8886', '8887', '8888']

In [8]:
from pathlib import Path

# Check if apps are exposed as https
workspace_path = Path(os.getenv("WORDSLAB_WORKSPACE"))
use_https = (workspace_path / ".secrets/certificate.pem").exists()
url_scheme = "https://" if use_https else "http://"

url_scheme

'https://'

> pip install git+https://github.com/jarvislabsai/jlclient.git

In [9]:
# Get platform and compute wordslab apps endpoints
platform = os.getenv("WORDSLAB_PLATFORM")

if platform=="WindowsSubsystemForLinux":
    home_path = Path(os.getenv("WORDSLAB_HOME"))
    windows_ip_address_file = home_path / ".WORDSLAB_WINDOWS_IP"
    if windows_ip_address_file.exists():
        ip_address = windows_ip_address_file.read_text().strip()
    else:
        ip_address = "127.0.0.1"
    endpoints = [f"{url_scheme}{ip_address}:{port}" for port in wordslab_ports]

elif platform=="Jarvislabs.ai":
    from jlclient import jarvisclient
    from jlclient.jarvisclient import *
    jarvislabs_api_token_file = workspace_path / ".secrets/api-token"
    jarvislabs_api_token = jarvislabs_api_token_file.read_text().strip()
    jarvisclient.token = jarvislabs_api_token
    machine_id = os.getenv("MACHINE_ID")   
    instance = User.get_instance(machine_id)
    endpoints = instance.endpoints

elif platform=="Runpod.io": 
    pod_id = os.getenv("RUNPOD_POD_ID") 
    endpoints = [f"{url_scheme}{pod_id}-{port}.proxy.runpod.net" for port in wordslab_ports]

elif platform=="Vast.ai": 
    public_ip = os.getenv("PUBLIC_IPADDR")
    public_ports = [os.getenv(f"VAST_TCP_PORT_{port}") for port in wordslab_ports]
    endpoints = [f"{url_scheme}{public_ip}:{public_port}" for public_port in public_ports]

elif platform=="UnknownLinux":
    if use_https:
        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        s.connect(("8.8.8.8", 80))
        ip_address = s.getsockname()[0]
        s.close()
    else:
        ip_address = "127.0.0.1"
    endpoints = [f"{url_scheme}{ip_address}:{port}" for port in wordslab_ports]

endpoints

['https://192.168.1.197:8880',
 'https://192.168.1.197:8881',
 'https://192.168.1.197:8882',
 'https://192.168.1.197:8883',
 'https://192.168.1.197:8884',
 'https://192.168.1.197:8885',
 'https://192.168.1.197:8886',
 'https://192.168.1.197:8887',
 'https://192.168.1.197:8888']

In [10]:
print(f"export JUPYTERLAB_URL={endpoints[0]}")
print(f"export VSCODE_URL={endpoints[1]}")
print(f"export OPENWEBUI_URL={endpoints[2]}")
print(f"export USER_APP1_URL={endpoints[3]}")
print(f"export USER_APP2_URL={endpoints[4]}")
print(f"export USER_APP3_URL={endpoints[5]}")
print(f"export USER_APP4_URL={endpoints[6]}")
print(f"export USER_APP5_URL={endpoints[7]}")
print(f"export DASHBOARD_URL={endpoints[8]}")

export JUPYTERLAB_URL=https://192.168.1.197:8880
export VSCODE_URL=https://192.168.1.197:8881
export OPENWEBUI_URL=https://192.168.1.197:8882
export USER_APP1_URL=https://192.168.1.197:8883
export USER_APP2_URL=https://192.168.1.197:8884
export USER_APP3_URL=https://192.168.1.197:8885
export USER_APP4_URL=https://192.168.1.197:8886
export USER_APP5_URL=https://192.168.1.197:8887
export DASHBOARD_URL=https://192.168.1.197:8888


In [11]:
import psutil
import subprocess
from dataclasses import dataclass

@dataclass 
class CPUMetrics:
    cpu_model: str
    cpu_cores: int
    cpu_usage: float
    ram_total: float  # GB
    ram_used: float   # GB

def get_cpu_metrics() -> CPUMetrics:
    # Get CPU model
    try:
        with open('/proc/cpuinfo', 'r') as f:
            for line in f:
                if line.startswith('model name'):
                    cpu_model = line.split(':')[1].strip()
                    break
    except:
        cpu_model = "Unknown"

    cpu_frequency = int(round(psutil.cpu_freq().current, -2))
    
    # Get number of logical cores
    cpu_cores = psutil.cpu_count(logical=True)
    
    # Get CPU usage percentage
    cpu_usage = psutil.cpu_percent()
    
    # Get RAM info in GB
    ram = psutil.virtual_memory()
    ram_total = ram.total / (1024**3)  # Convert bytes to GB
    ram_used = ram.used / (1024**3)    # Convert bytes to GB
    
    return CPUMetrics(
        cpu_model=f"{cpu_model} {cpu_frequency} MHz",
        cpu_cores=cpu_cores,
        cpu_usage=cpu_usage,
        ram_total=ram_total,
        ram_used=ram_used
    )

In [12]:
get_cpu_metrics()

CPUMetrics(cpu_model='13th Gen Intel(R) Core(TM) i7-13700K 3400 MHz', cpu_cores=24, cpu_usage=2.7, ram_total=31.036624908447266, ram_used=4.351894378662109)

In [13]:
from dataclasses import dataclass
import pynvml

@dataclass
class GPUMetrics:
    gpu_model: str
    gpu_usage: float  # percentage
    vram_total: float # GB
    vram_used: float  # GB

def get_gpu_metrics(gpu_id=0):
    pynvml.nvmlInit()
    handle = pynvml.nvmlDeviceGetHandleByIndex(gpu_id)

    gpu_model = pynvml.nvmlDeviceGetName(handle)
    utilization = pynvml.nvmlDeviceGetUtilizationRates(handle)
    gpu_usage = utilization.gpu
    vram_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
    vram_total = vram_info.total / 1024**3  # Convert bytes to GB
    vram_used = vram_info.used / 1024**3   # Convert bytes to GB

    pynvml.nvmlShutdown()
    
    metrics = GPUMetrics(
        gpu_model=gpu_model,
        gpu_usage=gpu_usage,
        vram_total=vram_total,
        vram_used=vram_used
    )

    return metrics

# Example usage:
metrics = get_gpu_metrics()
print(metrics)

GPUMetrics(gpu_model='NVIDIA GeForce RTX 4090', gpu_usage=68, vram_total=23.98828125, vram_used=6.340904235839844)


In [9]:
import os

wordslab_home = os.getenv('WORDSLAB_HOME')
wordslab_workspace = os.getenv('WORDSLAB_WORKSPACE')
wordslab_models = os.getenv('WORDSLAB_MODELS')
wordslab_paths = { "/", wordslab_home, wordslab_workspace, wordslab_models }

In [15]:
wordslab_paths

{'/', '/home', '/home/models', '/home/workspace'}

In [34]:
wordslab_paths_to_env_variable = { "/":"LINUX", wordslab_home:'WORDSLAB_HOME', wordslab_workspace:'WORDSLAB_WORKSPACE', wordslab_models:'WORDSLAB_MODELS'}

In [35]:
wordslab_paths_to_env_variable

{'/': 'LINUX',
 '/home': 'WORDSLAB_HOME',
 '/home/workspace': 'WORDSLAB_WORKSPACE',
 '/home/models': 'WORDSLAB_MODELS'}

In [16]:
import psutil
mountpoints = {part.mountpoint:part.device for part in set(psutil.disk_partitions(all=True)) if part.fstype in {'ext4','xfs','fuse','overlay'} and not any(substring in part.mountpoint for substring in ["/mnt/", "nvidia", ".so"])}

In [17]:
mountpoints

{'/home/models': '/dev/sde',
 '/home/workspace': '/dev/sdc',
 '/': '/dev/sdd',
 '/usr/lib/modules/5.15.167.4-microsoft-standard-WSL2': '',
 '/usr/lib/wsl/lib': '',
 '/var/lib/docker': '/dev/sdd'}

In [18]:
def map_paths_to_devices(wordslab_paths, mountpoints):
    wordslab_paths_devices = {}
    sorted_mountpoints = sorted(mountpoints.keys(), key=len, reverse=True)
    for path in wordslab_paths:
        for mountpoint in sorted_mountpoints:
            if path.startswith(mountpoint):
                device = mountpoints[mountpoint]
                if device not in wordslab_paths_devices:
                    wordslab_paths_devices[device] = [path]
                else:
                    wordslab_paths_devices[device].append(path)
                break
    return wordslab_paths_devices 

In [19]:
wordslab_paths_devices = map_paths_to_devices(wordslab_paths, mountpoints)
wordslab_paths_devices 

{'/dev/sdc': ['/home/workspace'],
 '/dev/sdd': ['/', '/home'],
 '/dev/sde': ['/home/models']}

In [21]:
wordslab_devices_usage

{'/dev/sdc': sdiskusage(total=1081101176832, used=1094873088, free=1025013948416, percent=0.1),
 '/dev/sdd': sdiskusage(total=1081101176832, used=18279911424, free=1007828910080, percent=1.8),
 '/dev/sde': sdiskusage(total=1081101176832, used=166313140224, free=859795681280, percent=16.2)}

In [22]:
@dataclass
class DirectoryMetrics: 
    env_variable: str
    path: Path
    size_used: float # GB

@dataclass
class DiskMetrics:
    name: str
    size_total: float # GB
    size_used: float  # GB
    directories: list[DirectoryMetrics]

In [2]:
import subprocess

def get_directory_size(path):
   # Use the 'du' command to get the total size of the directory
    result = subprocess.run(['du', '-s', '--block-size=1', path], capture_output=True, text=True, check=True)
    # The output is in the format: <size>\t<path>
    size = int(result.stdout.split('\t')[0])
    return size

def get_disks_metrics():
    worsdlab_paths_sizes = {path:get_directory_size(path) for path in [wordslab_home, wordslab_workspace, wordslab_models]}
    if wordslab_workspace.startswith(wordslab_home):
        worsdlab_paths_sizes[wordslab_home] -= worsdlab_paths_sizes[wordslab_workspace]
    if wordslab_models.startswith(wordslab_home):
        worsdlab_paths_sizes[wordslab_home] -= worsdlab_paths_sizes[wordslab_models]
    wordslab_devices_usage = {item[0]:psutil.disk_usage(item[1][0]) for item in wordslab_paths_devices.items()}
    disks_metrics = []
    for disk_name,disk_usage in wordslab_devices_usage.items():
        disk_metrics = DiskMetrics(
            name=disk_name,
            size_total=disk_usage.total / 1024**3,
            size_used=disk_usage.used / 1024**3,
            directories=[] 
        )
        disks_metrics.append(disk_metrics)
        already_counted_size = 0
        for path in reversed(wordslab_paths_devices[disk_name]):
            path_size = worsdlab_paths_sizes[path] if path!='/' else disk_usage.used
            path_size = path_size - already_counted_size
            dir_metrics = DirectoryMetrics(
                env_variable=wordslab_paths_to_env_variable[path],
                path=path,
                size_used=path_size / 1024**3
            )
            disk_metrics.directories.append(dir_metrics)
            already_counted_size = already_counted_size + path_size
    return disks_metrics

In [51]:
disks_metrics  = get_disks_metrics()
disks_metrics

[DiskMetrics(name='/dev/sdc', size_total=1006.853931427002, size_used=1.0198287963867188, directories=[DirectoryMetrics(env_variable='WORDSLAB_WORKSPACE', path='/home/workspace', size_used=1.0118560791015625)]),
 DiskMetrics(name='/dev/sdd', size_total=1006.853931427002, size_used=17.024574279785156, directories=[DirectoryMetrics(env_variable='WORDSLAB_HOME', path='/home', size_used=15.093090057373047), DirectoryMetrics(env_variable='LINUX', path='/', size_used=1.9314842224121094)]),
 DiskMetrics(name='/dev/sde', size_total=1006.853931427002, size_used=154.89118194580078, directories=[DirectoryMetrics(env_variable='WORDSLAB_MODELS', path='/home/models', size_used=154.88320922851562)])]

In [43]:
from pathlib import Path

wordslab_platform = os.getenv("WORDSLAB_PLATFORM")
windows_disks = wordslab_platform == "WindowsSubsystemForLinux"

if windows_disks:
    wordslab_windows_home = (Path(wordslab_home) / ".WORDSLAB_WINDOWS_HOME").read_text().strip()
    wordslab_windows_workspace = (Path(wordslab_workspace) / ".WORDSLAB_WINDOWS_WORKSPACE").read_text().strip()
    wordslab_windows_models = (Path(wordslab_models) / ".WORDSLAB_WINDOWS_MODELS").read_text().strip()

In [44]:
print(windows_disks)
if windows_disks:
    print(wordslab_windows_home,wordslab_windows_workspace,wordslab_windows_models)

True
C:\wordslab\virtual-machines\wordslab-notebooks C:\wordslab\virtual-machines\wordslab-workspace d:\wordslab\virtual-machines\wordslab-models


In [52]:
def windows_path_to_linux_vm_file(env_variable, windows_path):
    # Replace backslashes with forward slashes
    windows_path = windows_path.replace('\\', '/')
    # Replace the drive letter with /mnt/<drive>
    if len(windows_path) > 1 and windows_path[1] == ':':
        drive_letter = windows_path[0].lower()
        drive_path = f"/mnt/{drive_letter}"
        linux_file = Path(f"{drive_path}{windows_path[2:]}") / "ext4.vhdx"
    return (windows_path[:2], psutil.disk_usage(drive_path), env_variable, f"{windows_path}\\ext4.vhdx", linux_file.stat().st_size) 

In [53]:
wordslab_windows_home_vm_file = windows_path_to_linux_vm_file("WORDSLAB_WINDOWS_HOME", wordslab_windows_home)
wordslab_windows_workspace_vm_file = windows_path_to_linux_vm_file("WORDSLAB_WINDOWS_WORKSPACE", wordslab_windows_workspace)
wordslab_windows_models_vm_file = windows_path_to_linux_vm_file("WORDSLAB_WINDOWS_MODELS", wordslab_windows_models)

In [54]:
wordslab_windows_home_vm_file, wordslab_windows_workspace_vm_file, wordslab_windows_models_vm_file

(('C:',
  sdiskusage(total=2047178960896, used=150881116160, free=1896297844736, percent=7.4),
  'WORDSLAB_WINDOWS_HOME',
  'C:/wordslab/virtual-machines/wordslab-notebooks\\ext4.vhdx',
  19152240640),
 ('C:',
  sdiskusage(total=2047178960896, used=150881116160, free=1896297844736, percent=7.4),
  'WORDSLAB_WINDOWS_WORKSPACE',
  'C:/wordslab/virtual-machines/wordslab-workspace\\ext4.vhdx',
  2160066560),
 ('d:',
  sdiskusage(total=4000768323584, used=796415913984, free=3204352409600, percent=19.9),
  'WORDSLAB_WINDOWS_MODELS',
  'd:/wordslab/virtual-machines/wordslab-models\\ext4.vhdx',
  166638649344))

In [64]:
def get_windows_disks_metrics():
    windows_disks_metrics = {}
    for disk_letter,disk_usage,env_variable,vm_file_path,file_size in [wordslab_windows_home_vm_file, wordslab_windows_workspace_vm_file, wordslab_windows_models_vm_file]:
        if disk_letter in windows_disks_metrics:
            windows_disk_metrics = windows_disks_metrics[disk_letter]
        else:
            windows_disk_metrics = DiskMetrics(
                name=disk_letter,
                size_total=disk_usage.total / 1024**3,
                size_used=disk_usage.used / 1024**3,
                directories=[] 
            )
            windows_disks_metrics[disk_letter] = windows_disk_metrics
        windows_disk_metrics.directories.append(
            DirectoryMetrics(
                env_variable=env_variable,
                path=vm_file_path,
                size_used=file_size / 1024**3
            )
        )
    return windows_disks_metrics

In [65]:
windows_disk_metrics = get_windows_disks_metrics()
windows_disk_metrics

{'C:': DiskMetrics(name='C:', size_total=1906.5839805603027, size_used=140.51898956298828, directories=[DirectoryMetrics(env_variable='WORDSLAB_WINDOWS_HOME', path='C:/wordslab/virtual-machines/wordslab-notebooks\\ext4.vhdx', size_used=17.8369140625), DirectoryMetrics(env_variable='WORDSLAB_WINDOWS_WORKSPACE', path='C:/wordslab/virtual-machines/wordslab-workspace\\ext4.vhdx', size_used=2.01171875)]),
 'd:': DiskMetrics(name='d:', size_total=3726.0058555603027, size_used=741.7201194763184, directories=[DirectoryMetrics(env_variable='WORDSLAB_WINDOWS_MODELS', path='d:/wordslab/virtual-machines/wordslab-models\\ext4.vhdx', size_used=155.1943359375)])}

### Jarvislabs
- / -> overlay size No
- /home -> /dev/rdb0 size Yes

import psutil
{part.mountpoint:part.device for part in set(psutil.disk_partitions(all=True)) if part.fstype in {'ext4','xfs','fuse','overlay'} and not "/mnt/" in part.mountpoint and not "nvidia" in part.mountpoint and not ".so" in part.mountpoint}

{'/etc/hostname': '/dev/nvme0n1p3',
 '/etc/resolv.conf': '/dev/nvme0n1p3',
 '/home': '/dev/rbd0',
 '/usr/sbin/docker-init': '/dev/nvme0n1p3',
 '/': 'overlay',
 '/etc/hosts': '/dev/nvme0n1p3'}

!df -h /

Filesystem      Size  Used Avail Use% Mounted on
overlay         1.9T  895G  886G  51% /

!df -h /home

Filesystem      Size  Used Avail Use% Mounted on
/dev/rbd1        20G  177M   20G   1% /home

!du -sh /* | sort -h

0	/bin
0	/dev
0	/lib
0	/lib32
0	/lib64
0	/libx32
0	/sbin
0	/sys
4.0K	/boot
4.0K	/docker-entrypoint.sh
4.0K	/media
4.0K	/mnt
4.0K	/srv
4.0K	/tmp
4.0K	/workspace
8.0K	/cuda-keyring_1.1-1_all.deb
12K	/proc
20K	/NGC-DL-CONTAINER-LICENSE
92K	/run
2.0M	/home
2.3M	/etc
70M	/var
1.4G	/opt
6.4G	/usr
9.2G	/root

### Runpod
- / -> overlay size ?
- /workspace -> fuse size KO

import psutil
{part.mountpoint:part.device for part in set(psutil.disk_partitions(all=True)) if part.fstype in {'ext4','xfs','fuse','overlay'} and not "/mnt/" in part.mountpoint and not "nvidia" in part.mountpoint and not ".so" in part.mountpoint}

{'/usr/sbin/docker-init': '/dev/nvme3n1p3',
 '/workspace': 'mfs\\043eu-cz-1.runpod.net:9421',
 '/etc/hosts': '/dev/md0p1',
 '/etc/resolv.conf': '/dev/md0p1',
 '/etc/hostname': '/dev/md0p1',
 '/': 'overlay'}

!df -h /workspace

Filesystem                   Size  Used Avail Use% Mounted on
mfs#eu-cz-1.runpod.net:9421  234T  110T  125T  47% /workspace

!du -sh /* | sort -h

0	/bin
0	/boot
0	/dev
0	/home
0	/lib
0	/lib32
0	/lib64
0	/libx32
0	/media
0	/mnt
0	/sbin
0	/srv
0	/sys
0	/tmp
4.0K	/start.sh
8.0K	/cuda-keyring_1.0-1_all.deb
8.0K	/jupyter.log
12K	/proc
16K	/run
20K	/NGC-DL-CONTAINER-LICENSE
999K	/workspace
1.9M	/etc
2.6M	/get-pip.py
15M	/var
44M	/root
918M	/opt
12G	/usr


### Vast
- / -> overlay size OK

{'/etc/hosts': '/dev/md127p1',
 '/': 'overlay',
 '/etc/resolv.conf': '/dev/md127p1',
 '/etc/hostname': '/dev/md127p1'}

!du -sh /* | sort -h

0	/bin
0	/boot
0	/dev
0	/lib
0	/lib32
0	/lib64
0	/libx32
0	/media
0	/mnt
0	/sbin
0	/srv
0	/sys
4.0K	/Untitled.ipynb
8.0K	/cuda-keyring_1.0-1_all.deb
12K	/proc
16K	/workspace
20K	/NGC-DL-CONTAINER-LICENSE
20K	/run
68K	/home
424K	/tmp
2.3M	/etc
21M	/root
108M	/var
1.8G	/opt
5.0G	/venv
12G	/usr

!df -h /

 Filesystem      Size  Used Avail Use% Mounted on
overlay          16G  2.0M   16G   1% /

In [140]:
jarvislabs_mountpoints = {'/etc/hostname': '/dev/nvme0n1p3', '/etc/resolv.conf': '/dev/nvme0n1p3', '/home': '/dev/rbd0', '/usr/sbin/docker-init': '/dev/nvme0n1p3', '/': 'overlay', '/etc/hosts': '/dev/nvme0n1p3'}
runpod_mountpoints = {'/usr/sbin/docker-init': '/dev/nvme3n1p3', '/workspace': 'mfs\\043eu-cz-1.runpod.net:9421', '/etc/hosts': '/dev/md0p1', '/etc/resolv.conf': '/dev/md0p1', '/etc/hostname': '/dev/md0p1', '/': 'overlay'}
vast_mountpoints = {'/etc/hosts': '/dev/md127p1', '/': 'overlay', '/etc/resolv.conf': '/dev/md127p1', '/etc/hostname': '/dev/md127p1'}

In [141]:
def map_paths_to_devices(wordslab_paths, mountpoints):
    wordslab_paths_devices = {}
    sorted_mountpoints = sorted(mountpoints.keys(), key=len, reverse=True)
    for path in wordslab_paths:
        for mountpoint in sorted_mountpoints:
            if path.startswith(mountpoint):
                wordslab_paths_devices[path] = mountpoints[mountpoint]
                break
    return  wordslab_paths_devices   

In [142]:
map_paths_to_devices(wordslab_paths, wsl_mountpoints)

{'/': '/dev/sdd',
 '/home': '/dev/sdd',
 '/home/workspace': '/dev/sdc',
 '/home/models': '/dev/sde'}

In [143]:
map_paths_to_devices(wordslab_paths, jarvislabs_mountpoints)

{'/': 'overlay',
 '/home': '/dev/rbd0',
 '/home/workspace': '/dev/rbd0',
 '/home/models': '/dev/rbd0'}

In [144]:
map_paths_to_devices({'/', '/workspace', '/workspace/models', '/workspace/workspace'}, runpod_mountpoints)

{'/': 'overlay',
 '/workspace/workspace': 'mfs\\043eu-cz-1.runpod.net:9421',
 '/workspace': 'mfs\\043eu-cz-1.runpod.net:9421',
 '/workspace/models': 'mfs\\043eu-cz-1.runpod.net:9421'}

In [145]:
map_paths_to_devices(wordslab_paths, vast_mountpoints)

{'/': 'overlay',
 '/home': 'overlay',
 '/home/workspace': 'overlay',
 '/home/models': 'overlay'}

In [3]:
os_size = get_directory_size("/usr/bin")+get_directory_size("/usr/lib/x86_64-linux-gnu/")+get_directory_size("/usr/libexec")+get_directory_size("/usr/share")
os_size

1652318208

In [4]:
root_user_size = get_directory_size("/root")
root_user_size

193507328

In [15]:
python_archive = f"{wordslab_home}/python/archive-v0/"
python_packages = []
for entry in os.listdir(python_archive):
    full_path = os.path.join(python_archive, entry)
    if os.path.isdir(full_path):
        for item in os.listdir(full_path):
            if item.endswith(".dist-info"):
                package_name = item[:-len(".dist-info")]
                size_bytes = get_directory_size(full_path)
                size_mb = size_bytes / (1024 * 1024)
                if size_mb > 10:
                    python_packages.append((package_name, size_mb))
                break  # Only process first .dist-info found
# Sort by size descending
python_packages.sort(key=lambda x: x[1], reverse=True)
python_packages

[('torch-2.6.0+cu124', 1510.54296875),
 ('vllm-0.8.5.post1', 1016.3125),
 ('nvidia_cudnn_cu12-9.1.0.70', 976.01171875),
 ('triton-3.2.0', 684.0234375),
 ('nvidia_cublas_cu12-12.4.5.8', 527.375),
 ('open_webui-0.6.9', 377.6953125),
 ('nvidia_cufft_cu12-11.2.1.3', 280.4140625),
 ('nvidia_cusparse_cu12-12.3.1.170', 268.67578125),
 ('nvidia_nccl_cu12-2.21.5', 240.0546875),
 ('cupy_cuda12x-13.4.1', 216.01953125),
 ('hf_xet-1.1.0', 209.6015625),
 ('nvidia_cusparselt_cu12-0.6.2', 202.8359375),
 ('nvidia_cusolver_cu12-11.6.1.9', 193.49609375),
 ('opencv_python-4.11.0.86', 169.109375),
 ('ray-2.46.0', 165.0390625),
 ('xformers-0.0.29.post2', 162.80859375),
 ('tree_sitter_language_pack-0.7.3', 149.046875),
 ('milvus_lite-2.4.12', 141.47265625),
 ('opencv_python_headless-4.11.0.86', 135.8671875),
 ('pyarrow-20.0.0', 134.93359375),
 ('ctranslate2-4.6.0', 131.24609375),
 ('llvmlite-0.44.0', 127.421875),
 ('faiss_cpu-1.11.0', 126.40625),
 ('playwright-1.49.1', 125.453125),
 ('scipy-1.15.3', 118.0273

In [16]:
jupyterlab_size = get_directory_size(f"{wordslab_home}/jupyterlab")
jupyterlab_size

117841920

In [17]:
codeserver_size = get_directory_size(f"{wordslab_home}/code-server")
codeserver_size

413532160

In [18]:
ollama_size = get_directory_size(f"{wordslab_home}/ollama")
ollama_size

3365883904

In [20]:
openwebui_size = get_directory_size(f"{wordslab_home}/open-webui")
openwebui_size

686034944

In [21]:
jupyterlab_data_size = get_directory_size(f"{wordslab_workspace}/.jupyter")
jupyterlab_data_size

180224

In [25]:
codeserver_data_size = get_directory_size(f"{wordslab_workspace}/.codeserver")
codeserver_data_size

318582784

In [23]:
openwebui_data_size = get_directory_size(f"{wordslab_workspace}/.openwebui")
openwebui_data_size

434176

In [29]:
workspace_projects = []
project_dirs = [
    name for name in os.listdir(wordslab_workspace)
    if not name.startswith('.') and os.path.isdir(os.path.join(wordslab_workspace, name))
]
for project_dir in project_dirs:
    size_bytes = get_directory_size(os.path.join(wordslab_workspace, project_dir))
    size_mb = size_bytes / (1024 * 1024)
    if size_mb > 10:
        workspace_projects.append((project_dir, size_mb))
# Sort by size descending
workspace_projects.sort(key=lambda x: x[1], reverse=True)
workspace_projects

[('wordslab-notebooks', 231.5859375),
 ('wordslab-notebooks-tutorials', 197.6953125)]

In [5]:
import ollama
models = ollama.list().models
models

[Model(model='nomic-embed-text:latest', modified_at=datetime.datetime(2025, 5, 17, 17, 31, 3, 636407, tzinfo=TzInfo(+02:00)), digest='0a109f422b47e3a30ba2b10eca18548e944e8a23073ee3f3e947efcf3c45e59f', size=274302450, details=ModelDetails(parent_model='', format='gguf', family='nomic-bert', families=['nomic-bert'], parameter_size='137M', quantization_level='F16')),
 Model(model='qwen2.5-coder:1.5b-base', modified_at=datetime.datetime(2025, 5, 17, 17, 30, 54, 646626, tzinfo=TzInfo(+02:00)), digest='02e0f2817a890a6de385d534465c04c5d0980abddc83615c09e79cee2c094446', size=986060385, details=ModelDetails(parent_model='', format='gguf', family='qwen2', families=['qwen2'], parameter_size='1.5B', quantization_level='Q4_K_M')),
 Model(model='gemma3:4b', modified_at=datetime.datetime(2025, 5, 17, 17, 30, 37, 67247, tzinfo=TzInfo(+02:00)), digest='a2af6cc3eb7fa8be8504abaf9b04e88f17a119ec3f04a3addf55f92841195f5a', size=3338801804, details=ModelDetails(parent_model='', format='gguf', family='gemma3'

In [10]:
models.sort(key=lambda m: m.size, reverse=True)
for model in models:
    print(f"{model.model} {model.details.quantization_level} - {model.details.parameter_size} params - {model.size/1024/1024:.1f} GB")

gemma3:4b Q4_K_M - 4.3B params - 3184.1 GB
qwen2.5-coder:1.5b-base Q4_K_M - 1.5B params - 940.4 GB
nomic-embed-text:latest F16 - 137M params - 261.6 GB


In [23]:
from pathlib import Path
import os

hf_home = Path(os.getenv("HF_HOME"))
hf_home

PosixPath('/home/models/huggingface')

In [47]:
import re

def find_hf_models(hf_home):
    hf_home_size = get_directory_size(hf_home)
    if hf_home_size == 0:
        return None
    
    cache_dir = hf_home / "hub"
    model_dirs = []
    hf_models_size = 0
    pattern = re.compile(r"models--([^/\\]+)--([^/\\]+)")
    for path in cache_dir.iterdir():
        if path.is_dir():
            match = pattern.fullmatch(path.name)
            if match:
                org, model = match.groups()
                model_id = f"{org}/{model}"
                model_size = get_directory_size(path)
                hf_models_size += model_size
                model_dirs.append((model_id, model_size, path.resolve()))

    model_dirs.sort(key=lambda x: x[1], reverse=True)
    model_dirs.append(("Models cache", hf_home_size-hf_models_size, hf_home))
    return model_dirs

hf_hub_model_dirs = find_hf_models(hf_home)
hf_hub_model_dirs

[('Qwen/Qwen3-1.7B',
  4079501312,
  PosixPath('/home/models/huggingface/hub/models--Qwen--Qwen3-1.7B')),
 ('Qwen/Qwen3-4B-AWQ',
  2681970688,
  PosixPath('/home/models/huggingface/hub/models--Qwen--Qwen3-4B-AWQ')),
 ('Models cache', 6187704320, PosixPath('/home/models/huggingface'))]