# ZADANIE: Model Card Tensor

- przygotuj dataframe w oparciu o specyfikacje "model cards" dla poszczegÃ³lnych modeli

# DOCS

- [dokumentacja pliku HF:`config.json`](https://huggingface.co/docs/transformers/main_classes/configuration)
- model cards:
  1. [Bielik-7B-v0.1](https://huggingface.co/speakleash/Bielik-7B-v0.1)
  2. [Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B)
  3. [Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1)
  4. dla ambitnych ðŸ”¥ (inna struktura)
    - [DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1)
    - [Qwen2.5-7B](https://huggingface.co/Qwen/Qwen2.5-7B)

In [1]:
!pip install pandas




[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
from pathlib import Path
import json
import sys
import os
import pandas as pd

base = Path(os.getcwd())
pattern = "*-config.json"
matches = sorted(base.rglob(pattern))
files = [p.name for p in matches]
# print(files)
# print(json.dumps(files, indent=2))

failing = []
model_cards = []

for p in matches:
    try:
        with p.open('r', encoding='utf-8') as f:
            data = json.load(f)
        model_cards.append({
            'filename': p.name,
            'json': data
        })
    except json.JSONDecodeError:
        failing.append(f"Niepoprawny format JSON (Pusty/BÅ‚Ä™dny) w pliku: {p.name}")
    except ValueError as e:
        failing.append(f"BÅ‚Ä…d danych: {e} Plik: {p.name}")
    except Exception as e:
        failing.append(f"Inny nieznany bÅ‚Ä…d przy wczytywaniu {p.name}: {e}")

if len(failing):
    print(failing)
else:
    print('All models calrds loaded successfully')

df = pd.DataFrame(model_cards)
df['model_type'] = df['json'].apply(lambda x: x.get('model_type', None))
df_wynikowy = df[['filename', 'model_type']]

display(df_wynikowy)



All models calrds loaded successfully


Unnamed: 0,filename,model_type
0,Bielik-7B-Instruct-v0.1-config.json,mistral
1,DeepSeek-R1-config.json,deepseek_v3
2,Llama-3.1-8B-config.json,llama
3,Mistral-7B-v0.1-config.json,mistral
4,Qwen2.5-7B-Instruct-config.json,qwen2


In [5]:
import pandas as pd
from pathlib import Path
import json
import os

# list of tensors we want to visualise
tensors = [
    'embed_tokens.weight',
    'input_layernorm.weight',
    'mlp.down_proj.weight',
    'mpl.gate_proj.weight',
    'mpl.up_proj.weight',
    'post_attention_layernorm.weight',
    'self_attn.k_proj.weight',
    'self_attn.o_proj.weight',
    'self_attn.q_proj.weight',
    'self_attn.v_proj.weight',
]

def infer_shapes_from_config(cfg):
    # extract common fields from different HF configs (with fallbacks)
    hidden_size = cfg.get('hidden_size') or cfg.get('n_embd') or cfg.get('d_model')
    intermediate = cfg.get('intermediate_size') or cfg.get('ffn_dim') or cfg.get('mlp_dim')
    num_attention_heads = cfg.get('num_attention_heads') or cfg.get('n_head') or cfg.get('n_heads')
    num_kv_heads = cfg.get('num_key_value_heads') or cfg.get('num_kv_heads') or None
    vocab_size = cfg.get('vocab_size')
    model_type = cfg.get('model_type') or (cfg.get('architectures')[0] if cfg.get('architectures') else None)

    # sensible fallbacks
    if hidden_size is None or num_attention_heads is None:
        # not enough info to compute shapes deterministically
        return None

    try:
        head_dim = int(hidden_size) // int(num_attention_heads)
    except Exception:
        head_dim = None

    if num_kv_heads is None:
        num_kv_heads = int(num_attention_heads)
    try:
        k_out = int(num_kv_heads) * head_dim if head_dim is not None else None
    except Exception:
        k_out = None

    # fallback for intermediate size if missing: common pattern is 4*hidden
    if intermediate is None and hidden_size is not None:
        try:
            intermediate = int(hidden_size) * 4
        except Exception:
            intermediate = None

    shapes = {}
    shapes['embed_tokens.weight'] = f'[{vocab_size}, {hidden_size}]' if vocab_size and hidden_size else '[?, ?]'
    shapes['input_layernorm.weight'] = f'[{hidden_size}]' if hidden_size else '[?]'
    shapes['post_attention_layernorm.weight'] = f'[{hidden_size}]' if hidden_size else '[?]'

    # handle Mistral / Bielik layout (observed in the screenshot):
    # - mlp.down_proj.weight stored as [hidden, intermediate]
    # - mpl.gate_proj.weight and mpl.up_proj.weight stored as [intermediate, hidden]
    mt = str(model_type).lower() if model_type is not None else ''
    if 'mistral' in mt or 'mistral' in str(cfg.get('architectures', [])).lower():
        shapes['mlp.down_proj.weight'] = f'[{hidden_size}, {intermediate}]' if intermediate and hidden_size else '[?, ?]'
        shapes['mpl.gate_proj.weight'] = f'[{intermediate}, {hidden_size}]' if intermediate and hidden_size else '[?, ?]'
        shapes['mpl.up_proj.weight'] = f'[{intermediate}, {hidden_size}]' if intermediate and hidden_size else '[?, ?]'
    else:
        # default/common layout: down_proj [intermediate, hidden], gate [intermediate, hidden], up [hidden, intermediate]
        shapes['mlp.down_proj.weight'] = f'[{intermediate}, {hidden_size}]' if intermediate and hidden_size else '[?, ?]'
        shapes['mpl.gate_proj.weight'] = f'[{intermediate}, {hidden_size}]' if intermediate and hidden_size else '[?, ?]'
        shapes['mpl.up_proj.weight'] = f'[{hidden_size}, {intermediate}]' if intermediate and hidden_size else '[?, ?]'

    shapes['self_attn.k_proj.weight'] = f'[{k_out}, {hidden_size}]' if k_out and hidden_size else '[?, ?]'
    shapes['self_attn.o_proj.weight'] = f'[{hidden_size}, {k_out}]' if k_out and hidden_size else '[?, ?]'
    shapes['self_attn.q_proj.weight'] = f'[{hidden_size}, {hidden_size}]' if hidden_size else '[?, ?]'
    shapes['self_attn.v_proj.weight'] = f'[{k_out}, {hidden_size}]' if k_out and hidden_size else '[?, ?]'

    return shapes

# collect config files (searching the workspace for *-config.json)
base = Path(os.getcwd())
matches = sorted(base.rglob('*-config.json'))

models = {}
for p in matches:
    # only consider files inside hf-configs (or similar) or any explicit config files found
    try:
        with p.open('r', encoding='utf-8') as f:
            cfg = json.load(f)
    except Exception as e:
        # skip unreadable/invalid JSON files
        continue

    # choose a friendly model name: prefer file stem without '-config' suffix
    name = p.stem.replace('-config', '')
    shapes = infer_shapes_from_config(cfg)
    if shapes is None:
        # populate with unknown placeholders
        models[name] = ['[?, ?]' for _ in tensors]
    else:
        models[name] = [shapes.get(t, '[?, ?]') for t in tensors]

# if no configs found, provide a minimal example using the Bielik config if present
if len(models) == 0:
    # try to load from hf-configs/Bielik-7B-Instruct-v0.1-config.json if present
    fallback = base.joinpath('hf-configs').joinpath('Bielik-7B-Instruct-v0.1-config.json')
    if fallback.exists():
        with fallback.open('r', encoding='utf-8') as f:
            cfg = json.load(f)
        shapes = infer_shapes_from_config(cfg)
        if shapes is not None:
            models['Bielik-7B-Instruct-v0.1'] = [shapes.get(t,'[?, ?]') for t in tensors]

# build DataFrame and display
df = pd.DataFrame(models, index=tensors)

display(df)
# optionally show transposed view for easier per-model reading
# display(df.T)

# short textual summary
print(f'Found configs: {len(models)}. Models shown: {list(models.keys())}')


Unnamed: 0,Bielik-7B-Instruct-v0.1,DeepSeek-R1,Llama-3.1-8B,Mistral-7B-v0.1,Qwen2.5-7B-Instruct
embed_tokens.weight,"[32000, 4096]","[129280, 7168]","[128256, 4096]","[32000, 4096]","[152064, 3584]"
input_layernorm.weight,[4096],[7168],[4096],[4096],[3584]
mlp.down_proj.weight,"[4096, 14336]","[18432, 7168]","[14336, 4096]","[4096, 14336]","[18944, 3584]"
mpl.gate_proj.weight,"[14336, 4096]","[18432, 7168]","[14336, 4096]","[14336, 4096]","[18944, 3584]"
mpl.up_proj.weight,"[14336, 4096]","[7168, 18432]","[4096, 14336]","[14336, 4096]","[3584, 18944]"
post_attention_layernorm.weight,[4096],[7168],[4096],[4096],[3584]
self_attn.k_proj.weight,"[1024, 4096]","[7168, 7168]","[1024, 4096]","[1024, 4096]","[512, 3584]"
self_attn.o_proj.weight,"[4096, 1024]","[7168, 7168]","[4096, 1024]","[4096, 1024]","[3584, 512]"
self_attn.q_proj.weight,"[4096, 4096]","[7168, 7168]","[4096, 4096]","[4096, 4096]","[3584, 3584]"
self_attn.v_proj.weight,"[1024, 4096]","[7168, 7168]","[1024, 4096]","[1024, 4096]","[512, 3584]"


Found configs: 5. Models shown: ['Bielik-7B-Instruct-v0.1', 'DeepSeek-R1', 'Llama-3.1-8B', 'Mistral-7B-v0.1', 'Qwen2.5-7B-Instruct']
