In [1]:
import importlib.metadata
importlib.metadata.version("torch")

'2.2.0'

In [None]:
pip install transformers accelerate

**Profiler Implementation**

https://github.com/pytorch/pytorch/blob/487ebcac3bc10b4b4b0631dafe2a12ddb0852f2a/torch/csrc/profiler/python/init.cpp

https://github.com/pytorch/kineto/tree/main/libkineto/src

**Profiler log level**

  VERBOSE = 0,
  INFO = 1,
  WARNING = 2,
  ERROR = 3,
  STAGE = 4,
  ENUM_COUNT = 5

In [3]:
import os
os.environ["KINETO_LOG_LEVEL"] = "0"

In [4]:
import torch

torch.__version__

'2.2.0+cu121'

In [7]:
import torch
from transformers import AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained("croissantllm/CroissantLLMBase", use_safetensors=True, torch_dtype="auto", device_map="auto")

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/397M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
from torch.profiler import profile, record_function, ProfilerActivity

input_ids = torch.randint(low=0, high=32000, size=(50,1024), dtype=torch.int64).to(model.device)
attention_mask = torch.ones(50,1024).to(model.device)
model.eval()

with profile(
  activities=[ProfilerActivity.CPU,ProfilerActivity.CUDA],
  with_stack=True,
  experimental_config=torch._C._profiler._ExperimentalConfig(verbose=True)
) as prof:
    with record_function("model_inference"):
        with torch.no_grad():
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, use_cache=False, output_attentions=False, output_hidden_states=False) 

print(prof.key_averages().table())

```
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                        model_inference         0.18%      14.742ms         9.70%     799.770ms     799.770ms       0.000us         0.00%        8.176s        8.176s             1  
                                           aten::arange         0.00%     106.000us         0.10%       8.467ms       4.234ms       1.000us         0.00%       6.000us       3.000us             2  
                                            aten::empty         0.03%       2.493ms         0.03%       2.493ms      25.439us       0.000us         0.00%       0.000us       0.000us            98  
                                          aten::resize_         0.00%      56.000us         0.01%       1.044ms     522.000us       0.000us         0.00%       0.000us       0.000us             2  
                                       cudaLaunchKernel         8.78%     723.549ms         8.78%     723.549ms     826.913us     328.621ms         4.29%     328.621ms     375.567us           875  
void (anonymous namespace)::elementwise_kernel_with_...         0.00%       0.000us         0.00%       0.000us       0.000us       1.000us         0.00%       1.000us       1.000us             1  
                                        aten::unsqueeze         0.00%     148.000us         0.00%     166.000us       3.388us       0.000us         0.00%       0.000us       0.000us            49  
                                       aten::as_strided         0.00%     113.000us         0.00%     113.000us       0.187us       0.000us         0.00%       0.000us       0.000us           603  
                                        aten::embedding         0.00%      87.000us         5.76%     474.649ms     474.649ms       0.000us         0.00%       1.314ms       1.314ms             1  
                                          aten::reshape         0.00%     256.000us         0.01%     431.000us       1.781us       0.000us         0.00%       0.000us       0.000us           242  
                                             aten::view         0.00%     225.000us         0.00%     225.000us       0.714us       0.000us         0.00%       0.000us       0.000us           315  
                                     aten::index_select         0.00%     117.000us         5.76%     474.543ms     474.543ms       1.314ms         0.02%       1.314ms       1.314ms             1  
                                  cudaStreamIsCapturing         0.00%      44.000us         0.00%      44.000us       1.128us       3.053ms         0.04%       3.053ms      78.282us            39  
                                             cudaMalloc         0.18%      14.599ms         0.18%      14.599ms     811.056us      60.387ms         0.79%      60.387ms       3.355ms            18  
void at::native::(anonymous namespace)::indexSelectL...         0.00%       0.000us         0.00%       0.000us       0.000us       1.314ms         0.02%       1.314ms       1.314ms             1  
                                               aten::eq         0.00%     101.000us         0.08%       6.894ms       6.894ms       2.000us         0.00%       2.000us       2.000us             1  
void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us       2.000us         0.00%       2.000us       2.000us             1  
                                              aten::all         0.00%      78.000us         0.06%       5.293ms       5.293ms       7.000us         0.00%       7.000us       7.000us             1  
void at::native::reduce_kernel<512, 1, at::native::R...         0.00%       0.000us         0.00%       0.000us       0.000us       7.000us         0.00%       7.000us       7.000us             1  
                                       aten::is_nonzero         0.00%       8.000us         0.00%      65.000us      65.000us       0.000us         0.00%       2.000us       2.000us             1  
                                             aten::item         0.00%       5.000us         0.00%      57.000us      57.000us       0.000us         0.00%       2.000us       2.000us             1  
                              aten::_local_scalar_dense         0.00%      13.000us         0.00%      52.000us      52.000us       2.000us         0.00%       2.000us       2.000us             1  
                                        cudaMemcpyAsync         0.00%      34.000us         0.00%      34.000us      34.000us       0.000us         0.00%       0.000us       0.000us             1  
                       Memcpy DtoH (Device -> Pageable)         0.00%       0.000us         0.00%       0.000us       0.000us       2.000us         0.00%       2.000us       2.000us             1  
                                  cudaStreamSynchronize         0.00%       5.000us         0.00%       5.000us       5.000us       0.000us         0.00%       0.000us       0.000us             1  
                                               aten::to         0.00%       3.000us         0.00%       3.000us       0.015us       0.000us         0.00%       0.000us       0.000us           196  
                                              aten::pow         0.01%     743.000us         0.32%      26.170ms     534.082us      60.137ms         0.79%     100.702ms       2.055ms            49  
                                      aten::result_type         0.00%       1.000us         0.00%       1.000us       0.020us       0.000us         0.00%       0.000us       0.000us            49  
void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      60.137ms         0.79%      60.137ms       1.227ms            49  
                                             aten::mean         0.01%     796.000us         0.06%       5.092ms     103.918us      29.301ms         0.38%      30.752ms     627.592us            49  
void at::native::reduce_kernel<512, 1, at::native::R...         0.00%       0.000us         0.00%       0.000us       0.000us      29.301ms         0.38%      29.301ms     597.980us            49  
                                              aten::add         0.02%       1.602ms         0.14%      11.149ms      76.890us     175.393ms         2.29%     205.720ms       1.419ms           145  
void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      97.000us         0.00%      97.000us       1.980us            49  
                                            aten::rsqrt         0.01%     512.000us         0.24%      19.560ms     399.184us      74.000us         0.00%      19.662ms     401.265us            49  
void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      74.000us         0.00%      74.000us       1.510us            49  
                                              aten::mul         0.03%       2.408ms         0.15%      11.959ms      54.858us     356.785ms         4.66%     394.627ms       1.810ms           218  
void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us     238.789ms         3.12%     238.789ms       1.231ms           194  
                                           aten::linear         0.01%       1.139ms         0.38%      31.700ms     187.574us       0.000us         0.00%        6.567s      38.856ms           169  
                                                aten::t         0.00%     392.000us         0.01%     735.000us       4.349us       0.000us         0.00%       0.000us       0.000us           169  
                                        aten::transpose         0.01%     616.000us         0.01%     686.000us       1.900us       0.000us         0.00%       0.000us       0.000us           361  
                                           aten::matmul         0.01%     948.000us         0.37%      30.510ms     180.533us       0.000us         0.00%        6.746s      39.918ms           169  
                                               aten::mm         0.07%       5.818ms         0.35%      29.144ms     172.450us        6.465s        84.47%        6.746s      39.918ms           169  
                                               cudaFree         0.15%      12.242ms         0.15%      12.242ms       6.121ms       1.446ms         0.02%       1.446ms     723.000us             2  
                                 cudaDeviceGetAttribute         0.00%       0.000us         0.00%       0.000us       0.000us      18.023ms         0.24%      18.023ms       1.202ms            15  
                                   cudaGetSymbolAddress         0.00%     154.000us         0.00%     154.000us     154.000us       0.000us         0.00%       0.000us       0.000us             1  
          cudaOccupancyMaxActiveBlocksPerMultiprocessor         0.03%       2.499ms         0.03%       2.499ms      24.990us      86.611ms         1.13%      86.611ms     866.110us           100  
                                 ampere_sgemm_128x64_tn         0.00%       0.000us         0.00%       0.000us       0.000us        2.033s        26.56%        2.033s      21.181ms            96  
                                     aten::_unsafe_view         0.00%     101.000us         0.00%     101.000us       0.598us       0.000us         0.00%       0.000us       0.000us           169  
                                            aten::slice         0.01%     421.000us         0.01%     433.000us       3.007us       0.000us         0.00%       0.000us       0.000us           144  
                                            aten::index         0.01%     908.000us         0.75%      61.806ms       1.288ms     289.000us         0.00%      57.657ms       1.201ms            48  
void at::native::index_elementwise_kernel<128, 4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     289.000us         0.00%     289.000us       6.021us            48  
                                              aten::neg         0.01%     543.000us         0.12%      10.080ms     210.000us      30.007ms         0.39%      56.006ms       1.167ms            48  
void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      30.007ms         0.39%      30.007ms     625.146us            48  
                                              aten::cat         0.01%     718.000us         0.10%       8.464ms     176.333us      69.500ms         0.91%      71.949ms       1.499ms            48  
void at::native::(anonymous namespace)::CatArrayBatc...         0.00%       0.000us         0.00%       0.000us       0.000us      69.500ms         0.91%      69.500ms       1.448ms            48  
                     aten::scaled_dot_product_attention         0.00%     237.000us         0.13%      10.731ms     447.125us       0.000us         0.00%     411.735ms      17.156ms            24  
          aten::_scaled_dot_product_efficient_attention         0.00%     261.000us         0.13%      10.494ms     437.250us       0.000us         0.00%     411.735ms      17.156ms            24  
                     aten::_efficient_attention_forward         0.00%     397.000us         0.12%      10.095ms     420.625us     387.076ms         5.06%     411.735ms      17.156ms            24  
                                   cudaFuncSetAttribute         0.11%       9.238ms         0.11%       9.238ms     384.917us      23.438ms         0.31%      23.438ms     976.583us            24  
void at::native::elementwise_kernel<128, 2, at::nati...         0.00%       0.000us         0.00%       0.000us       0.000us      87.549ms         1.14%      87.549ms       1.824ms            48  
fmha_cutlassF_f32_aligned_64x128_rf_sm80(PyTorchMemE...         0.00%       0.000us         0.00%       0.000us       0.000us     387.076ms         5.06%     387.076ms      16.128ms            24  
                                             aten::silu         0.00%     294.000us         1.15%      94.475ms       3.936ms      79.196ms         1.03%      79.794ms       3.325ms            24  
void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      87.747ms         1.15%      87.747ms       1.828ms            48  
                                ampere_sgemm_128x128_tn         0.00%       0.000us         0.00%       0.000us       0.000us        4.432s        57.90%        4.432s      60.712ms            73  
void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us      79.196ms         1.03%      79.196ms       3.300ms            24  
                                  cudaDeviceSynchronize        90.30%        7.443s        90.30%        7.443s        7.443s       0.000us         0.00%       0.000us       0.000us             1  
void at::native::vectorized_elementwise_kernel<4, at...         0.00%       0.000us         0.00%       0.000us       0.000us     117.996ms         1.54%     117.996ms       4.917ms            24  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
Self CPU time total: 8.243s
Self CUDA time total: 7.654s
```

In [9]:
with torch.no_grad():
    outputs = model(input_ids=input_ids, attention_mask=attention_mask, use_cache=False, output_attentions=False, output_hidden_states=False)

In [11]:
def profile_model_inference(model, batch_size, seq_length):
    input_ids = torch.randint(low=0, high=32000, size=(batch_size,seq_length), dtype=torch.int64).to(model.device)
    attention_mask = torch.ones(batch_size,seq_length).to(model.device)
    model.eval()
    with torch.profiler.profile(record_shapes=True, profile_memory=True, with_stack=True, with_flops=True, with_modules=True, experimental_config=torch._C._profiler._ExperimentalConfig(verbose=True, enable_cuda_sync_events=True)) as prof:
    #with torch.profiler.profile() as prof:
        with torch.profiler.record_function("MODEL INFERENCE"):
            with torch.no_grad():
                outputs = model(input_ids=input_ids, attention_mask=attention_mask, use_cache=False, output_attentions=False, output_hidden_states=False)        

    return prof

**Initial Error Message**

WARNING:2023-12-17 08:02:36 4992:4992 init.cpp:146] function cbapi->getCuptiStatus() failed with error CUPTI_ERROR_NOT_INITIALIZED (15)
WARNING:2023-12-17 08:02:36 4992:4992 init.cpp:147] CUPTI initialization failed - CUDA profiler activities will be missing
INFO:2023-12-17 08:02:36 4992:4992 init.cpp:149] If you see CUPTI_ERROR_INSUFFICIENT_PRIVILEGES, refer to https://developer.nvidia.com/nvidia-development-tools-solutions-err-nvgpuctrperm-cupti

In [12]:
prof = profile_model_inference(model, 50, 1024)

  Log file: /tmp/libkineto_activities_2304.json
  Trace start time: 2023-12-17 11:23:27  Trace duration: 500ms
  Warmup duration: 5s
  Max GPU buffer size: 128MB
  Enabled activities: cpu_op,user_annotation,gpu_user_annotation,gpu_memcpy,gpu_memset,kernel,external_correlation,cuda_runtime,cuda_driver,cpu_instant_event,python_function,cuda_sync,xpu_runtime
INFO:2023-12-17 11:23:20 2304:2304 CuptiActivityProfiler.cpp:834] Enabling GPU tracing
INFO:2023-12-17 11:23:20 2304:2304 CuptiActivityProfiler.cpp:873] Tracing starting in 6s
INFO:2023-12-17 11:23:20 2304:2304 CuptiActivityProfiler.cpp:878] Tracing will end in 7s
STAGE:2023-12-17 11:23:20 2304:2304 ActivityProfilerController.cpp:314] Completed Stage: Warm Up
STAGE:2023-12-17 11:23:25 2304:2304 ActivityProfilerController.cpp:320] Completed Stage: Collection
INFO:2023-12-17 11:23:25 2304:2304 CuptiActivityProfiler.cpp:232] Processing 1 CPU buffers
INFO:2023-12-17 11:23:25 2304:2304 CuptiActivityProfiler.cpp:262] Processed 47046 GPU rec

In [13]:
events = prof.events()

In [13]:
prof.activities

{<ProfilerActivity.CPU: 0>, <ProfilerActivity.CUDA: 2>}

In [14]:
total_time = 0
for event in events:
    if event.cpu_parent is not None and event.cpu_parent.id == events[0].id:
        total_time += event.cpu_time
events[0].cpu_time ,total_time + events[0].self_cpu_time_total

(3716415.0, 3716415.0)

In [14]:
threshold = events[0].cpu_time/1000

In [15]:
total_time = 0
count_events = 0
count_threshold = 0
for event in events:
    if event.cpu_parent is not None and event.cpu_parent.id == events[0].id:
        count_events += 1
        if event.cpu_time>=threshold:
            count_threshold += 1
            total_time += event.cpu_time
count_threshold, count_threshold/count_events*100, total_time/events[0].cpu_time*100

(33, 1.2154696132596685, 98.16356248190462)

In [None]:
for event in events:
    if event.cpu_parent is not None and event.cpu_parent.id == events[0].id:
        if event.cpu_time>=threshold:
            print(event.name, int(event.cpu_time), f"{(event.cpu_time/events[0].cpu_time*100):.2f} %")
            if event.cpu_children is not None:
                for child_event in event.cpu_children:
                    if child_event.cpu_time>=threshold:
                        print("-", child_event.name, int(child_event.cpu_time), f"{(child_event.cpu_time/event.cpu_time*100):.2f} %")
                        if child_event.cpu_children is not None:
                            for grandchild_event in child_event.cpu_children:
                                if grandchild_event.cpu_time>=threshold:
                                    print(" ","-", grandchild_event.name, int(grandchild_event.cpu_time), f"{(grandchild_event.cpu_time/event.cpu_time*100):.2f} %")
                                    if grandchild_event.cpu_children is not None:
                                        for Grandchild_event in grandchild_event.cpu_children:
                                            if Grandchild_event.cpu_time>=threshold:
                                                print(" "," ","-", Grandchild_event.name, int(Grandchild_event.cpu_time), f"{(Grandchild_event.cpu_time/event.cpu_time*100):.2f} %")

```
aten::arange 2361 3.12 %
- aten::empty 2170 91.91 %
- aten::arange 133 5.63 %
aten::embedding 142 0.19 %
- aten::index_select 109 76.76 %
aten::eq 81 0.11 %
aten::is_nonzero 1092 1.44 %
- aten::item 1086 99.45 %
  - aten::_local_scalar_dense 1082 99.08 %
    - cudaMemcpyAsync 1061 97.16 %
aten::pow 77 0.10 %
aten::rsqrt 696 0.92 %
- cudaMalloc 615 88.36 %
aten::linear 211 0.28 %
- aten::matmul 169 80.09 %
  - aten::mm 127 60.19 %
aten::linear 132 0.17 %
- aten::matmul 109 82.58 %
  - aten::mm 77 58.33 %
aten::linear 122 0.16 %
- aten::matmul 100 81.97 %
aten::index 104 0.14 %
aten::scaled_dot_product_attention 203 0.27 %
- aten::_scaled_dot_product_efficient_attention 166 81.77 %
  - aten::_efficient_attention_forward 102 50.25 %
aten::linear 134 0.18 %
- aten::matmul 113 84.33 %
  - aten::mm 82 61.19 %
aten::linear 111 0.15 %
- aten::matmul 89 80.18 %
aten::linear 104 0.14 %
- aten::matmul 78 75.00 %
aten::linear 94 0.12 %
aten::linear 123 0.16 %
- aten::matmul 101 82.11 %
aten::linear 102 0.13 %
- aten::matmul 85 83.33 %
aten::linear 103 0.14 %
- aten::matmul 85 82.52 %
aten::scaled_dot_product_attention 158 0.21 %
- aten::_scaled_dot_product_efficient_attention 134 84.81 %
  - aten::_efficient_attention_forward 86 54.43 %
aten::linear 118 0.16 %
- aten::matmul 101 85.59 %
aten::linear 111 0.15 %
- aten::matmul 87 78.38 %
aten::linear 95 0.13 %
- aten::matmul 76 80.00 %
aten::linear 91 0.12 %
aten::linear 120 0.16 %
- aten::matmul 98 81.67 %
aten::linear 106 0.14 %
- aten::matmul 86 81.13 %
aten::linear 101 0.13 %
- aten::matmul 83 82.18 %
aten::scaled_dot_product_attention 155 0.21 %
- aten::_scaled_dot_product_efficient_attention 135 87.10 %
  - aten::_efficient_attention_forward 86 55.48 %
aten::linear 122 0.16 %
- aten::matmul 104 85.25 %
aten::linear 108 0.14 %
- aten::matmul 85 78.70 %
aten::linear 94 0.12 %
aten::linear 93 0.12 %
aten::linear 123 0.16 %
- aten::matmul 99 80.49 %
aten::linear 98 0.13 %
- aten::matmul 82 83.67 %
aten::linear 99 0.13 %
- aten::matmul 82 82.83 %
aten::scaled_dot_product_attention 152 0.20 %
- aten::_scaled_dot_product_efficient_attention 132 86.84 %
  - aten::_efficient_attention_forward 84 55.26 %
aten::linear 131 0.17 %
- aten::matmul 102 77.86 %
aten::linear 122 0.16 %
- aten::matmul 88 72.13 %
aten::linear 94 0.12 %
aten::linear 96 0.13 %
aten::linear 120 0.16 %
- aten::matmul 98 81.67 %
aten::linear 99 0.13 %
- aten::matmul 83 83.84 %
aten::linear 105 0.14 %
- aten::matmul 85 80.95 %
aten::scaled_dot_product_attention 181 0.24 %
- aten::_scaled_dot_product_efficient_attention 161 88.95 %
  - aten::_efficient_attention_forward 102 56.35 %
aten::linear 119 0.16 %
- aten::matmul 102 85.71 %
aten::linear 108 0.14 %
- aten::matmul 85 78.70 %
aten::linear 102 0.13 %
- aten::matmul 78 76.47 %
aten::linear 112 0.15 %
- aten::matmul 92 82.14 %
aten::linear 124 0.16 %
- aten::matmul 99 79.84 %
aten::linear 103 0.14 %
- aten::matmul 84 81.55 %
aten::linear 102 0.13 %
- aten::matmul 84 82.35 %
aten::scaled_dot_product_attention 164 0.22 %
- aten::_scaled_dot_product_efficient_attention 144 87.80 %
  - aten::_efficient_attention_forward 86 52.44 %
aten::linear 117 0.15 %
- aten::matmul 100 85.47 %
aten::linear 110 0.15 %
- aten::matmul 86 78.18 %
aten::linear 94 0.12 %
- aten::matmul 76 80.85 %
aten::linear 92 0.12 %
aten::linear 121 0.16 %
- aten::matmul 97 80.17 %
aten::linear 98 0.13 %
- aten::matmul 81 82.65 %
aten::linear 97 0.13 %
- aten::matmul 80 82.47 %
aten::scaled_dot_product_attention 151 0.20 %
- aten::_scaled_dot_product_efficient_attention 131 86.75 %
  - aten::_efficient_attention_forward 84 55.63 %
aten::transpose 79 0.10 %
aten::linear 118 0.16 %
- aten::matmul 102 86.44 %
aten::linear 106 0.14 %
- aten::matmul 84 79.25 %
aten::linear 92 0.12 %
aten::linear 91 0.12 %
aten::linear 123 0.16 %
- aten::matmul 99 80.49 %
aten::linear 100 0.13 %
- aten::matmul 83 83.00 %
aten::linear 104 0.14 %
- aten::matmul 85 81.73 %
aten::scaled_dot_product_attention 151 0.20 %
- aten::_scaled_dot_product_efficient_attention 131 86.75 %
  - aten::_efficient_attention_forward 85 56.29 %
aten::linear 117 0.15 %
- aten::matmul 100 85.47 %
aten::linear 106 0.14 %
- aten::matmul 82 77.36 %
aten::linear 91 0.12 %
aten::linear 92 0.12 %
aten::linear 123 0.16 %
- aten::matmul 99 80.49 %
aten::linear 98 0.13 %
- aten::matmul 81 82.65 %
aten::linear 99 0.13 %
- aten::matmul 82 82.83 %
aten::scaled_dot_product_attention 154 0.20 %
- aten::_scaled_dot_product_efficient_attention 132 85.71 %
  - aten::_efficient_attention_forward 85 55.19 %
aten::linear 118 0.16 %
- aten::matmul 102 86.44 %
aten::linear 108 0.14 %
- aten::matmul 86 79.63 %
aten::linear 97 0.13 %
- aten::matmul 78 80.41 %
aten::linear 97 0.13 %
- aten::matmul 76 78.35 %
aten::linear 153 0.20 %
- aten::matmul 128 83.66 %
  - aten::mm 91 59.48 %
aten::linear 111 0.15 %
- aten::matmul 91 81.98 %
aten::linear 109 0.14 %
- aten::matmul 90 82.57 %
aten::index 79 0.10 %
aten::scaled_dot_product_attention 169 0.22 %
- aten::_scaled_dot_product_efficient_attention 147 86.98 %
  - aten::_efficient_attention_forward 96 56.80 %
aten::linear 131 0.17 %
- aten::matmul 112 85.50 %
  - aten::mm 81 61.83 %
aten::linear 121 0.16 %
- aten::matmul 98 80.99 %
aten::linear 96 0.13 %
- aten::matmul 77 80.21 %
aten::linear 95 0.13 %
aten::linear 110 0.15 %
- aten::matmul 91 82.73 %
aten::linear 80 0.11 %
aten::linear 78 0.10 %
aten::scaled_dot_product_attention 116 0.15 %
- aten::_scaled_dot_product_efficient_attention 100 86.21 %
aten::linear 91 0.12 %
- aten::matmul 79 86.81 %
aten::linear 82 0.11 %
aten::linear 95 0.13 %
- aten::matmul 78 82.11 %
aten::linear 79 0.10 %
aten::linear 84 0.11 %
aten::scaled_dot_product_attention 116 0.15 %
- aten::_scaled_dot_product_efficient_attention 101 87.07 %
aten::linear 91 0.12 %
- aten::matmul 78 85.71 %
aten::linear 84 0.11 %
aten::linear 78 0.10 %
aten::linear 94 0.12 %
- aten::matmul 76 80.85 %
aten::linear 88 0.12 %
aten::linear 79 0.10 %
aten::scaled_dot_product_attention 114 0.15 %
- aten::_scaled_dot_product_efficient_attention 99 86.84 %
aten::linear 90 0.12 %
- aten::matmul 76 84.44 %
aten::linear 83 0.11 %
aten::linear 84 0.11 %
aten::linear 104 0.14 %
- aten::matmul 78 75.00 %
aten::linear 81 0.11 %
aten::linear 76 0.10 %
aten::scaled_dot_product_attention 115 0.15 %
- aten::_scaled_dot_product_efficient_attention 100 86.96 %
aten::linear 90 0.12 %
- aten::matmul 77 85.56 %
aten::linear 81 0.11 %
aten::linear 96 0.13 %
- aten::matmul 76 79.17 %
aten::linear 79 0.10 %
aten::linear 77 0.10 %
aten::scaled_dot_product_attention 115 0.15 %
- aten::_scaled_dot_product_efficient_attention 100 86.96 %
aten::linear 91 0.12 %
- aten::matmul 78 85.71 %
aten::linear 83 0.11 %
aten::linear 81 0.11 %
aten::linear 97 0.13 %
- aten::matmul 77 79.38 %
aten::linear 80 0.11 %
aten::linear 77 0.10 %
aten::scaled_dot_product_attention 115 0.15 %
- aten::_scaled_dot_product_efficient_attention 100 86.96 %
aten::linear 90 0.12 %
- aten::matmul 78 86.67 %
aten::linear 82 0.11 %
aten::linear 80 0.11 %
aten::linear 97 0.13 %
- aten::matmul 77 79.38 %
aten::linear 77 0.10 %
aten::linear 78 0.10 %
aten::scaled_dot_product_attention 123 0.16 %
- aten::_scaled_dot_product_efficient_attention 108 87.80 %
aten::linear 91 0.12 %
- aten::matmul 78 85.71 %
aten::linear 147 0.19 %
aten::linear 93 0.12 %
- aten::matmul 76 81.72 %
aten::linear 83 0.11 %
aten::linear 78 0.10 %
aten::scaled_dot_product_attention 124 0.16 %
- aten::_scaled_dot_product_efficient_attention 109 87.90 %
aten::linear 91 0.12 %
- aten::matmul 77 84.62 %
aten::linear 84 0.11 %
aten::linear 100 0.13 %
- aten::matmul 81 81.00 %
aten::linear 79 0.10 %
aten::linear 79 0.10 %
aten::scaled_dot_product_attention 115 0.15 %
- aten::_scaled_dot_product_efficient_attention 100 86.96 %
aten::linear 93 0.12 %
- aten::matmul 78 83.87 %
aten::linear 82 0.11 %
aten::linear 76 0.10 %
aten::linear 106 0.14 %
- aten::matmul 88 83.02 %
aten::linear 83 0.11 %
aten::scaled_dot_product_attention 80 0.11 %
aten::linear 93 0.12 %
aten::scaled_dot_product_attention 83 0.11 %
aten::scaled_dot_product_attention 113 0.15 %
- aten::_scaled_dot_product_efficient_attention 103 91.15 %
  - aten::_efficient_attention_forward 81 71.68 %
aten::linear 4787 6.33 %
- aten::matmul 4775 99.75 %
  - aten::mm 4755 99.33 %
    - cudaMalloc 4710 98.39 %
```

Jarvislabs Pytorch VM config:

### Jeremy Howard : How to install Pytorch and cuda with conda

https://twitter.com/jeremyphoward/status/1697435241152127369

1. Install miniconda

https://github.com/fastai/fastsetup/blob/master/setup-conda.sh

2. Find out what CUDA version PyTorch expects by going to their website and seeing what the latest "compute platform" version is.

https://pytorch.org/

> conda install pytorch ... pytorch-cuda=12.1 ...

3. Install CUDA

```
conda install cuda -c nvidia/label/cuda-12.1.0
```

4.Copy the command to install pytorch from their website, but replace `-c nvidia` with a version specific label, as shown below:

```
conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia/label/cuda-12.1.0`
```

### ubuntu version

```
root@ac9978bd266c:~# lsb_release -a
No LSB modules are available.
Distributor ID: Ubuntu
Description:    Ubuntu 22.04.3 LTS
Release:        22.04
Codename:       jammy

root@ac9978bd266c:~# uname -a
Linux ac9978bd266c 5.15.0-89-generic #99-Ubuntu SMP Mon Oct 30 20:42:41 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux

root@ac9978bd266c:~# cat /etc/os-release
PRETTY_NAME="Ubuntu 22.04.3 LTS"
NAME="Ubuntu"
VERSION_ID="22.04"
VERSION="22.04.3 LTS (Jammy Jellyfish)"
VERSION_CODENAME=jammy
ID=ubuntu
ID_LIKE=debian
HOME_URL="https://www.ubuntu.com/"
SUPPORT_URL="https://help.ubuntu.com/"
BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/"
PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy"
UBUNTU_CODENAME=jammy
```

### container setup

Docker container ID

```
root@ac9978bd266c:~# echo $HOSTNAME
ac9978bd266c
```

Docker container entrypoint

```
root@ac9978bd266c:~# cat /docker-entrypoint.sh
#!/bin/bash
source /opt/conda/etc/profile.d/conda.sh
conda activate py3.10                   
echo "PasswordAuthentication no" >> /etc/ssh/sshd_config
service ssh start
export SHELL="/bin/bash"

env HOME=/home code-server --host 0.0.0.0 --port 7007 --auth none&
env HOME=/home jupyter lab --ip=0.0.0.0 --NotebookApp.token=$TOKEN  --allow-root --port 8889
```

```
root@ac9978bd266c:~# ps aux
USER         PID %CPU %MEM    VSZ   RSS TTY      STAT START   TIME COMMAND
root           1  0.0  0.0   4360  3392 ?        Ss   08:26   0:00 /bin/bash /docker-entrypoint.sh
root          18  0.0  0.0  15428  3732 ?        Ss   08:26   0:00 sshd: /usr/sbin/sshd [listener] 0 of 10-100 startups
root          19  0.0  0.0 1238088 64232 ?       Sl   08:26   0:00 /usr/lib/code-server/lib/node /usr/lib/code-server --host 0.0.0.0 --port 7007 --auth none
root          20  0.2  0.0 888616 94924 ?        Rl   08:26   0:07 /root/miniconda3/envs/py3.10/bin/python /root/miniconda3/envs/py3.10/bin/jupyter-lab --ip=0.0.0.0 --NotebookApp.token=3vAcOEC8d551ymv0ppLqoR69HaQEfJB-1KEiXP1WwS8WlZbWf0_R7tCOxXoV8T8G --allo
root          46  0.0  0.0 1172236 64432 ?       Sl   08:26   0:00 /usr/lib/code-server/lib/node /usr/lib/code-server/out/node/entry
```

Jarvislab Urls

- Jupyterlab: https://ac9978bd266c.notebooksh.jarvislabs.net/
- VS Code:    https://ac9978bd266c0.notebooksh.jarvislabs.net/
- Any service listening on port 6006 inside the container: https://ac9978bd266c1.notebooksh.jarvislabs.net/

### miniconda install

```
root@ac9978bd266c:/root/miniconda3# which conda
/root/miniconda3/bin/conda

root@ac9978bd266c:/root/miniconda3# conda --version
conda 23.11.0

root@ac9978bd266c:~# which python
/root/miniconda3/envs/py3.10/bin/python

root@ac9978bd266c:/root/miniconda3# python --version
Python 3.10.13
```

### miniconda environments

```
root@ac9978bd266c:/root/miniconda3# conda env list
base                     /root/miniconda3
py3.10                   /root/miniconda3/envs/py3.10

root@ac9978bd266c:/root/miniconda3/envs/py3.10/conda-meta# cat history 
==> 2024-01-30 10:00:19 <==
# cmd: /root/miniconda3/bin/conda create -n py3.10 python=3.10
# conda version: 23.11.0
```

### apt source

https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 

### nvidia packages

```
cuda-cccl-12-3/unknown,now 12.3.101-1 amd64 [installed,automatic]
cuda-compat-12-3/unknown,now 545.23.08-1 amd64 [installed]
cuda-cudart-12-3/unknown,now 12.3.101-1 amd64 [installed]
cuda-cudart-dev-12-3/unknown,now 12.3.101-1 amd64 [installed]
cuda-cuobjdump-12-3/unknown,now 12.3.101-1 amd64 [installed,automatic]
cuda-cupti-12-3/unknown,now 12.3.101-1 amd64 [installed,automatic]
cuda-cupti-dev-12-3/unknown,now 12.3.101-1 amd64 [installed,automatic]
cuda-cuxxfilt-12-3/unknown,now 12.3.101-1 amd64 [installed,automatic]
cuda-driver-dev-12-3/unknown,now 12.3.101-1 amd64 [installed,automatic]
cuda-gdb-12-3/unknown,now 12.3.101-1 amd64 [installed,automatic]
cuda-nvdisasm-12-3/unknown,now 12.3.101-1 amd64 [installed,automatic]
cuda-nvml-dev-12-3/unknown,now 12.3.101-1 amd64 [installed]
cuda-nvprof-12-3/unknown,now 12.3.101-1 amd64 [installed]
cuda-nvprune-12-3/unknown,now 12.3.101-1 amd64 [installed,automatic]
cuda-nvtx-12-3/unknown,now 12.3.101-1 amd64 [installed]
cuda-opencl-12-3/unknown,now 12.3.101-1 amd64 [installed,automatic]
cuda-opencl-dev-12-3/unknown,now 12.3.101-1 amd64 [installed,automatic]
cuda-profiler-api-12-3/unknown,now 12.3.101-1 amd64 [installed,automatic]
cuda-sanitizer-12-3/unknown,now 12.3.101-1 amd64 [installed,automatic]
cuda-toolkit-12-3-config-common/unknown,now 12.3.101-1 all [installed,automatic]
cuda-toolkit-12-config-common/unknown,now 12.3.101-1 all [installed,automatic]
cuda-toolkit-config-common/unknown,now 12.3.101-1 all [installed,automatic]
libcublas-12-3/unknown,now 12.3.4.1-1 amd64 [installed]
libcublas-dev-12-3/unknown,now 12.3.4.1-1 amd64 [installed]
libcufft-12-3/unknown,now 11.0.12.1-1 amd64 [installed,automatic]
libcufft-dev-12-3/unknown,now 11.0.12.1-1 amd64 [installed,automatic]
libcufile-12-3/unknown,now 1.8.1.2-1 amd64 [installed,automatic]
libcufile-dev-12-3/unknown,now 1.8.1.2-1 amd64 [installed,automatic]
libcusolver-12-3/unknown,now 11.5.4.101-1 amd64 [installed,automatic]
libcusolver-dev-12-3/unknown,now 11.5.4.101-1 amd64 [installed,automatic]
libcusparse-12-3/unknown,now 12.2.0.103-1 amd64 [installed]
libcusparse-dev-12-3/unknown,now 12.2.0.103-1 amd64 [installed]
libnpp-12-3/unknown,now 12.2.3.2-1 amd64 [installed]
libnpp-dev-12-3/unknown,now 12.2.3.2-1 amd64 [installed]
libnvjitlink-12-3/unknown,now 12.3.101-1 amd64 [installed,automatic]
libnvjitlink-dev-12-3/unknown,now 12.3.101-1 amd64 [installed,automatic]
libnvjpeg-12-3/unknown,now 12.3.0.81-1 amd64 [installed,automatic]
libnvjpeg-dev-12-3/unknown,now 12.3.0.81-1 amd64 [installed,automatic]
nsight-compute-2023.3.1/unknown,now 2023.3.1.1-1 amd64 [installed,automatic]
```

### Environment variables

```
NV_LIBCUBLAS_VERSION=12.3.4.1-1
NVIDIA_VISIBLE_DEVICES=3
NV_NVML_DEV_VERSION=12.3.101-1
NV_LIBNCCL_DEV_PACKAGE=libnccl-dev=2.19.3-1+cuda12.3
NV_LIBNCCL_DEV_PACKAGE_VERSION=2.19.3-1
PYTHON_VERSION=3.10
NVIDIA_REQUIRE_CUDA=cuda>=12.3 brand=tesla,driver>=470,driver<471 brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 brand=geforce,driver>=470,driver<471 brand=geforcertx,driver>=470,driver<471 brand=quadro,driver>=470,driver<471 brand=quadrortx,driver>=470,driver<471 brand=titan,driver>=470,driver<471 brand=titanrtx,driver>=470,driver<471 brand=tesla,driver>=525,driver<526 brand=unknown,driver>=525,driver<526 brand=nvidia,driver>=525,driver<526 brand=nvidiartx,driver>=525,driver<526 brand=geforce,driver>=525,driver<526 brand=geforcertx,driver>=525,driver<526 brand=quadro,driver>=525,driver<526 brand=quadrortx,driver>=525,driver<526 brand=titan,driver>=525,driver<526 brand=titanrtx,driver>=525,driver<526 brand=tesla,driver>=535,driver<536 brand=unknown,driver>=535,driver<536 brand=nvidia,driver>=535,driver<536 brand=nvidiartx,driver>=535,driver<536 brand=geforce,driver>=535,driver<536 brand=geforcertx,driver>=535,driver<536 brand=quadro,driver>=535,driver<536 brand=quadrortx,driver>=535,driver<536 brand=titan,driver>=535,driver<536 brand=titanrtx,driver>=535,driver<536
NV_LIBCUBLAS_DEV_PACKAGE=libcublas-dev-12-3=12.3.4.1-1
NV_NVTX_VERSION=12.3.101-1
NV_CUDA_CUDART_DEV_VERSION=12.3.101-1
NV_LIBCUSPARSE_VERSION=12.2.0.103-1
NV_LIBNPP_VERSION=12.2.3.2-1
NCCL_VERSION=2.19.3-1
NVIDIA_DRIVER_CAPABILITIES=compute,utility
NV_NVPROF_DEV_PACKAGE=cuda-nvprof-12-3=12.3.101-1
NV_LIBNPP_PACKAGE=libnpp-12-3=12.2.3.2-1
NV_LIBNCCL_DEV_PACKAGE_NAME=libnccl-dev
NV_LIBCUBLAS_DEV_VERSION=12.3.4.1-1
NVIDIA_PRODUCT_NAME=CUDA
NV_LIBCUBLAS_DEV_PACKAGE_NAME=libcublas-dev-12-3
NV_CUDA_CUDART_VERSION=12.3.101-1
CUDA_VERSION=12.3.1
NV_LIBCUBLAS_PACKAGE=libcublas-12-3=12.3.4.1-1
NV_CUDA_NSIGHT_COMPUTE_DEV_PACKAGE=cuda-nsight-compute-12-3=12.3.1-1
PYDEVD_USE_FRAME_EVAL=NO
NV_LIBNPP_DEV_PACKAGE=libnpp-dev-12-3=12.2.3.2-1
NV_LIBCUBLAS_PACKAGE_NAME=libcublas-12-3
NV_LIBNPP_DEV_VERSION=12.2.3.2-1
NV_LIBCUSPARSE_DEV_VERSION=12.2.0.103-1
LIBRARY_PATH=/usr/local/cuda/lib64/stubs
NV_CUDA_LIB_VERSION=12.3.1-1
NVARCH=x86_64
NV_CUDA_COMPAT_PACKAGE=cuda-compat-12-3
NV_LIBNCCL_PACKAGE=libnccl2=2.19.3-1+cuda12.3
LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
NV_CUDA_NSIGHT_COMPUTE_VERSION=12.3.1-1
NV_NVPROF_VERSION=12.3.101-1
PATH=/root/miniconda3/envs/py3.10/bin:/root/miniconda3/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
NV_LIBNCCL_PACKAGE_NAME=libnccl2
NV_LIBNCCL_PACKAGE_VERSION=2.19.3-1
```

### python packages

```
libpython3-stdlib/jammy-updates,jammy-security,now 3.10.6-1~22.04 amd64 [installed,automatic]
libpython3.10-minimal/jammy-updates,jammy-security,now 3.10.12-1~22.04.3 amd64 [installed,automatic]
libpython3.10-stdlib/jammy-updates,jammy-security,now 3.10.12-1~22.04.3 amd64 [installed,automatic]
libpython3.10/jammy-updates,jammy-security,now 3.10.12-1~22.04.3 amd64 [installed,automatic]
python3-dbus/jammy,now 1.2.18-3build1 amd64 [installed,automatic]
python3-distro/jammy,now 1.7.0-1 all [installed,automatic]
python3-gi/jammy-updates,now 3.42.1-0ubuntu1 amd64 [installed,automatic]
python3-minimal/jammy-updates,jammy-security,now 3.10.6-1~22.04 amd64 [installed,automatic]
python3.10-minimal/jammy-updates,jammy-security,now 3.10.12-1~22.04.3 amd64 [installed,automatic]
python3.10/jammy-updates,jammy-security,now 3.10.12-1~22.04.3 amd64 [installed,automatic]
python3/jammy-updates,jammy-security,now 3.10.6-1~22.04 amd64 [installed,automatic]
```

```
print(sys.path)
['/home', 
 '/root/miniconda3/envs/py3.10/lib/python310.zip', 
 '/root/miniconda3/envs/py3.10/lib/python3.10', 
 '/root/miniconda3/envs/py3.10/lib/python3.10/lib-dynload', 
 '', 
 '/root/miniconda3/envs/py3.10/lib/python3.10/site-packages']
```

### Jupyterlab config

/root/miniconda3/envs/py3.10/etc/jupyter/jupyter_notebook_config.d/jupyter-lsp-notebook.json

```
root@ac9978bd266c:~# cat /root/miniconda3/envs/py3.10/etc/jupyter/jupyter_notebook_config.d/jupyter-lsp-notebook.json
{
  "NotebookApp": {
    "nbserver_extensions": {
      "jupyter_lsp": true
    }
  }
}
```

/root/miniconda3/envs/py3.10/etc/jupyter/jupyter_notebook_config.d/jupyterlab.json

```
root@ac9978bd266c:~# cat /root/miniconda3/envs/py3.10/etc/jupyter/jupyter_notebook_config.d/jupyterlab.json
{
  "NotebookApp": {
    "nbserver_extensions": {
      "jupyterlab": true
    }
  }
}
```

/root/miniconda3/envs/py3.10/etc/jupyter/jupyter_server_config.d/jupyter-lsp-jupyter-server.json

```
root@ac9978bd266c:~# cat /root/miniconda3/envs/py3.10/etc/jupyter/jupyter_server_config.d/jupyter-lsp-jupyter-server.json
{
  "ServerApp": {
    "jpserver_extensions": {
      "jupyter_lsp": true
    }
  }
}
```

/root/miniconda3/envs/py3.10/etc/jupyter/jupyter_server_config.d/jupyter_server_terminals.json

```
root@ac9978bd266c:~# cat /root/miniconda3/envs/py3.10/etc/jupyter/jupyter_server_config.d/jupyter_server_terminals.json
{
  "ServerApp": {
    "jpserver_extensions": {
      "jupyter_server_terminals": true
    }
  }
}
```

/root/miniconda3/envs/py3.10/etc/jupyter/jupyter_server_config.d/jupyterlab.json

```
root@ac9978bd266c:~# cat /root/miniconda3/envs/py3.10/etc/jupyter/jupyter_server_config.d/jupyterlab.json
{
  "ServerApp": {
    "jpserver_extensions": {
      "jupyterlab": true
    }
  }
}
```

/root/miniconda3/envs/py3.10/etc/jupyter/jupyter_server_config.d/notebook.json


```
root@ac9978bd266c:~# cat /root/miniconda3/envs/py3.10/etc/jupyter/jupyter_server_config.d/notebook.json
{
  "ServerApp": {
    "jpserver_extensions": {
      "notebook": true
    }
  }
}
```

/root/miniconda3/envs/py3.10/etc/jupyter/jupyter_server_config.d/notebook_shim.json

```
root@ac9978bd266c:~# cat /root/miniconda3/envs/py3.10/etc/jupyter/jupyter_server_config.d/notebook_shim.json
{
    "ServerApp": {
        "jpserver_extensions": {
            "notebook_shim": true
        }
    }
}
```

/root/miniconda3/envs/py3.10/etc/jupyter/nbconfig/notebook.d/widgetsnbextension.json 

```
root@ac9978bd266c:~# cat /root/miniconda3/envs/py3.10/etc/jupyter/nbconfig/notebook.d/widgetsnbextension.json 
{
  "load_extensions": {
    "jupyter-js-widgets/extension": true
  }
}
```

### code-server config

```
root@ac9978bd266c:~# cat ~/.config/code-server/config.yaml

> nothing

root@ac9978bd266c:~# /usr/lib/code-server/bin/code-server --list-extensions

> nothing
```

### pip list

```
diffusers                 0.26.3
jupyterlab                4.0.11
numpy                     1.26.3
nvidia-cublas-cu12        12.1.3.1
nvidia-cuda-cupti-cu12    12.1.105
nvidia-cuda-nvrtc-cu12    12.1.105
nvidia-cuda-runtime-cu12  12.1.105
nvidia-cudnn-cu12         8.9.2.26
nvidia-cufft-cu12         11.0.2.54
nvidia-curand-cu12        10.3.2.106
nvidia-cusolver-cu12      11.4.5.107
nvidia-cusparse-cu12      12.1.0.106
nvidia-nccl-cu12          2.18.1
nvidia-nvjitlink-cu12     12.3.101
nvidia-nvtx-cu12          12.1.105
pandas                    2.2.0
spacy                     3.7.4
torch                     2.1.2
transformers              4.37.2
triton                    2.1.0
```

```
root@ac9978bd266c:~# pip show torch
Name: torch
Version: 2.1.2
Summary: Tensors and Dynamic neural networks in Python with strong GPU acceleration
Home-page: https://pytorch.org/
Author: PyTorch Team
Author-email: packages@pytorch.org
License: BSD-3
Location: /root/miniconda3/envs/py3.10/lib/python3.10/site-packages
Requires: filelock, fsspec, jinja2, networkx, nvidia-cublas-cu12, nvidia-cuda-cupti-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-runtime-cu12, nvidia-cudnn-cu12, nvidia-cufft-cu12, nvidia-curand-cu12, nvidia-cusolver-cu12, nvidia-cusparse-cu12, nvidia-nccl-cu12, nvidia-nvtx-cu12, sympy, triton, typing-extensions
Required-by: torchaudio, torchvision
```

```
root@ac9978bd266c:/root/miniconda3# conda list -n py3.10
# packages in environment at /root/miniconda3/envs/py3.10:
#
# Name                    Version                   Build  Channel
_libgcc_mutex             0.1                        main  
_openmp_mutex             5.1                       1_gnu  
annotated-types           0.6.0                    pypi_0    pypi
anyio                     4.2.0                    pypi_0    pypi
argon2-cffi               23.1.0                   pypi_0    pypi
argon2-cffi-bindings      21.2.0                   pypi_0    pypi
arrow                     1.3.0                    pypi_0    pypi
asttokens                 2.4.1                    pypi_0    pypi
async-lru                 2.0.4                    pypi_0    pypi
attrs                     23.2.0                   pypi_0    pypi
babel                     2.14.0                   pypi_0    pypi
beautifulsoup4            4.12.3                   pypi_0    pypi
bleach                    6.1.0                    pypi_0    pypi
blis                      0.7.11                   pypi_0    pypi
bzip2                     1.0.8                h7b6447c_0  
ca-certificates           2023.12.12           h06a4308_0  
catalogue                 2.0.10                   pypi_0    pypi
certifi                   2023.11.17               pypi_0    pypi
cffi                      1.16.0                   pypi_0    pypi
charset-normalizer        3.3.2                    pypi_0    pypi
click                     8.1.7                    pypi_0    pypi
cloudpathlib              0.16.0                   pypi_0    pypi
comm                      0.2.1                    pypi_0    pypi
confection                0.1.4                    pypi_0    pypi
cymem                     2.0.8                    pypi_0    pypi
debugpy                   1.8.0                    pypi_0    pypi
decorator                 5.1.1                    pypi_0    pypi
defusedxml                0.7.1                    pypi_0    pypi
diffusers                 0.26.3                   pypi_0    pypi
exceptiongroup            1.2.0                    pypi_0    pypi
executing                 2.0.1                    pypi_0    pypi
fastjsonschema            2.19.1                   pypi_0    pypi
filelock                  3.13.1                   pypi_0    pypi
fqdn                      1.5.1                    pypi_0    pypi
fsspec                    2023.12.2                pypi_0    pypi
ftfy                      6.1.3                    pypi_0    pypi
huggingface-hub           0.20.3                   pypi_0    pypi
idna                      3.6                      pypi_0    pypi
importlib-metadata        7.0.1                    pypi_0    pypi
ipykernel                 6.29.0                   pypi_0    pypi
ipython                   8.20.0                   pypi_0    pypi
ipywidgets                8.1.1                    pypi_0    pypi
isoduration               20.11.0                  pypi_0    pypi
jedi                      0.19.1                   pypi_0    pypi
jinja2                    3.1.3                    pypi_0    pypi
json5                     0.9.14                   pypi_0    pypi
jsonpointer               2.4                      pypi_0    pypi
jsonschema                4.21.1                   pypi_0    pypi
jsonschema-specifications 2023.12.1                pypi_0    pypi
jupyter-client            8.6.0                    pypi_0    pypi
jupyter-core              5.7.1                    pypi_0    pypi
jupyter-events            0.9.0                    pypi_0    pypi
jupyter-lsp               2.2.2                    pypi_0    pypi
jupyter-server            2.12.5                   pypi_0    pypi
jupyter-server-terminals  0.5.2                    pypi_0    pypi
jupyterlab                4.0.11                   pypi_0    pypi
jupyterlab-pygments       0.3.0                    pypi_0    pypi
jupyterlab-server         2.25.2                   pypi_0    pypi
jupyterlab-widgets        3.0.9                    pypi_0    pypi
langcodes                 3.3.0                    pypi_0    pypi
ld_impl_linux-64          2.38                 h1181459_1  
libffi                    3.4.4                h6a678d5_0  
libgcc-ng                 11.2.0               h1234567_1  
libgomp                   11.2.0               h1234567_1  
libstdcxx-ng              11.2.0               h1234567_1  
libuuid                   1.41.5               h5eee18b_0  
markupsafe                2.1.4                    pypi_0    pypi
matplotlib-inline         0.1.6                    pypi_0    pypi
mistune                   3.0.2                    pypi_0    pypi
mpmath                    1.3.0                    pypi_0    pypi
murmurhash                1.0.10                   pypi_0    pypi
nbclient                  0.9.0                    pypi_0    pypi
nbconvert                 7.14.2                   pypi_0    pypi
nbformat                  5.9.2                    pypi_0    pypi
ncurses                   6.4                  h6a678d5_0  
nest-asyncio              1.6.0                    pypi_0    pypi
networkx                  3.2.1                    pypi_0    pypi
notebook                  7.0.7                    pypi_0    pypi
notebook-shim             0.2.3                    pypi_0    pypi
numpy                     1.26.3                   pypi_0    pypi
nvidia-cublas-cu12        12.1.3.1                 pypi_0    pypi
nvidia-cuda-cupti-cu12    12.1.105                 pypi_0    pypi
nvidia-cuda-nvrtc-cu12    12.1.105                 pypi_0    pypi
nvidia-cuda-runtime-cu12  12.1.105                 pypi_0    pypi
nvidia-cudnn-cu12         8.9.2.26                 pypi_0    pypi
nvidia-cufft-cu12         11.0.2.54                pypi_0    pypi
nvidia-curand-cu12        10.3.2.106               pypi_0    pypi
nvidia-cusolver-cu12      11.4.5.107               pypi_0    pypi
nvidia-cusparse-cu12      12.1.0.106               pypi_0    pypi
nvidia-nccl-cu12          2.18.1                   pypi_0    pypi
nvidia-nvjitlink-cu12     12.3.101                 pypi_0    pypi
nvidia-nvtx-cu12          12.1.105                 pypi_0    pypi
openssl                   3.0.12               h7f8727e_0  
overrides                 7.7.0                    pypi_0    pypi
packaging                 23.2                     pypi_0    pypi
pandas                    2.2.0                    pypi_0    pypi
pandocfilters             1.5.1                    pypi_0    pypi
parso                     0.8.3                    pypi_0    pypi
pexpect                   4.9.0                    pypi_0    pypi
pillow                    10.2.0                   pypi_0    pypi
pip                       23.3.2                   pypi_0    pypi
platformdirs              4.1.0                    pypi_0    pypi
preshed                   3.0.9                    pypi_0    pypi
prometheus-client         0.19.0                   pypi_0    pypi
prompt-toolkit            3.0.43                   pypi_0    pypi
psutil                    5.9.8                    pypi_0    pypi
ptyprocess                0.7.0                    pypi_0    pypi
pure-eval                 0.2.2                    pypi_0    pypi
pycparser                 2.21                     pypi_0    pypi
pydantic                  2.6.1                    pypi_0    pypi
pydantic-core             2.16.2                   pypi_0    pypi
pygments                  2.17.2                   pypi_0    pypi
python                    3.10.13              h955ad1f_0  
python-dateutil           2.8.2                    pypi_0    pypi
python-json-logger        2.0.7                    pypi_0    pypi
pytz                      2024.1                   pypi_0    pypi
pyyaml                    6.0.1                    pypi_0    pypi
pyzmq                     25.1.2                   pypi_0    pypi
readline                  8.2                  h5eee18b_0  
referencing               0.33.0                   pypi_0    pypi
regex                     2023.12.25               pypi_0    pypi
requests                  2.31.0                   pypi_0    pypi
rfc3339-validator         0.1.4                    pypi_0    pypi
rfc3986-validator         0.1.1                    pypi_0    pypi
rpds-py                   0.17.1                   pypi_0    pypi
safetensors               0.4.2                    pypi_0    pypi
send2trash                1.8.2                    pypi_0    pypi
setuptools                68.2.2          py310h06a4308_0  
six                       1.16.0                   pypi_0    pypi
smart-open                6.4.0                    pypi_0    pypi
sniffio                   1.3.0                    pypi_0    pypi
soupsieve                 2.5                      pypi_0    pypi
spacy                     3.7.4                    pypi_0    pypi
spacy-legacy              3.0.12                   pypi_0    pypi
spacy-loggers             1.0.5                    pypi_0    pypi
sqlite                    3.41.2               h5eee18b_0  
srsly                     2.4.8                    pypi_0    pypi
stack-data                0.6.3                    pypi_0    pypi
sympy                     1.12                     pypi_0    pypi
terminado                 0.18.0                   pypi_0    pypi
thinc                     8.2.3                    pypi_0    pypi
tinycss2                  1.2.1                    pypi_0    pypi
tk                        8.6.12               h1ccaba5_0  
tokenizers                0.15.2                   pypi_0    pypi
tomli                     2.0.1                    pypi_0    pypi
torch                     2.1.2                    pypi_0    pypi
torchaudio                2.1.2                    pypi_0    pypi
torchvision               0.16.2                   pypi_0    pypi
tornado                   6.4                      pypi_0    pypi
tqdm                      4.66.2                   pypi_0    pypi
traitlets                 5.14.1                   pypi_0    pypi
transformers              4.37.2                   pypi_0    pypi
triton                    2.1.0                    pypi_0    pypi
typer                     0.9.0                    pypi_0    pypi
types-python-dateutil     2.8.19.20240106          pypi_0    pypi
typing-extensions         4.9.0                    pypi_0    pypi
tzdata                    2024.1                   pypi_0    pypi
uri-template              1.3.0                    pypi_0    pypi
urllib3                   2.1.0                    pypi_0    pypi
wasabi                    1.1.2                    pypi_0    pypi
wcwidth                   0.2.13                   pypi_0    pypi
weasel                    0.3.4                    pypi_0    pypi
webcolors                 1.13                     pypi_0    pypi
webencodings              0.5.1                    pypi_0    pypi
websocket-client          1.7.0                    pypi_0    pypi
wheel                     0.41.2          py310h06a4308_0  
widgetsnbextension        4.0.9                    pypi_0    pypi
xz                        5.4.5                h5eee18b_0  
zipp                      3.17.0                   pypi_0    pypi
zlib                      1.2.13               h5eee18b_0  
```