# 1 install and import

In [None]:
%%cature
!pip install transformers torch pandas
!pip install "autoawq<0.2.7"

In [2]:
from awq import AutoAWQForCausalLM
from transformers import AutoTokenizer
import pandas as pd

In [4]:
model_name = "zhaolizhang/DeepSeek-R1-Distill-Qwen-7B-140k"
quant_path = "DeepSeek-R1-Distill-Qwen-7B-140k-AWQ"

In [None]:
max_seq_length = 2048

# 2 load tokenizer

In [1]:
tokenizer = AutoTokenizer.from_pretrained(model_name)


# 3 load calibration data 

In [2]:
df = pd.read_csv("calibration.csv")

In [3]:
# Calculate token counts for 'problem' and 'solution'
df['problem_token_count'] = df['problem'].apply(lambda x: len(tokenizer.encode(x)))
df['solution_token_count'] = df['solution'].apply(lambda x: len(tokenizer.encode(x)))

# Add a new column for the total token count
df['total_token_count'] = df['problem_token_count'] + df['solution_token_count']

# Print statistics for all token count columns
print("Problem Token Count Statistics:")
print(df['problem_token_count'].describe())

print("\nSolution Token Count Statistics:")
print(df['solution_token_count'].describe())

print("\nTotal Token Count Statistics:")
print(df['total_token_count'].describe())



In [4]:
df['answer'] = df['answer'].astype(int)
dfTrain = df[df['total_token_count'] < max_seq_length ].reset_index(drop = True)

In [None]:

data = []

for i, row in dfTrain.iterrows():
    if i == 100:
        break
    # Combine the problem and solution into a single sequence
    combined_text = row["problem"] + " " + row["solution"]
    
    # Tokenize the combined text
    tokens = tokenizer.encode(combined_text)
    
    # Truncate the tokens if they exceed the maximum sequence length
    if len(tokens) > max_seq_length:
        tokens = tokens[:max_seq_length]
    
    # Decode the tokens back to text
    truncated_text = tokenizer.decode(tokens)
    
    # Append the truncated text to the data list
    data.append(truncated_text)


In [None]:
# data = []
# for _, row in dfTrain.iterrows():
#     # Combine the problem and solution into a single sequence
#     combined_text = row["problem"] + " " + row["solution"]
    
#     # Tokenize the combined text
#     tokens = tokenizer.encode(combined_text)
    
#     # Truncate the tokens if they exceed the maximum sequence length
#     if len(tokens) > max_seq_length:
#         tokens = tokens[:max_seq_len]
    
#     # Decode the tokens back to text
#     truncated_text = tokenizer.decode(tokens)
    
#     # Create a conversation-like structure
#     conversation = [
#         {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
#         {"role": "user", "content": row["problem"]},  # User asks the problem
#         {"role": "assistant", "content": truncated_text}  # Assistant provides the solution
#     ]
    
#     data.append(conversation)

# 4  load model and Quantization

In [None]:
from awq import AutoAWQForCausalLM

model = AutoAWQForCausalLM.from_pretrained(model_name, device_map="auto", safetensors=True)


In [None]:
quant_config = { "zero_point": True, "q_group_size": 128, "w_bit": 4, "version": "GEMM" }

In [None]:
model.quantize(tokenizer, quant_config=quant_config, calib_data=data)

In [None]:
model.save_quantized(quant_path, safetensors=True, shard_size="4GB")
tokenizer.save_pretrained(quant_path)

# 5 put to hugging face

In [None]:
%%cature
!pip install huggingface_hub


In [None]:
from huggingface_hub import login
# Push model and tokenizer
login(token="")
model.push_to_hub("zhaolizhang/DeepSeek-R1-Distill-Qwen-7B-140k-AWQ")
tokenizer.push_to_hub("zhaolizhang/DeepSeek-R1-Distill-Qwen-7B-140k-AWQ")



# 6 put to kaggle

In [None]:
!zip -r x.zip DeepSeek-R1-Distill-Qwen-7B-140k-AWQ

In [None]:
# ==============================
# 1. Install kaggle
# ==============================
!pip install kaggle --quiet

# ==============================
# 2. Upload your kaggle.json (API token)
#    Get it from https://www.kaggle.com/<YourUserName>/account
# ==============================
from google.colab import files

print("Please choose the kaggle.json file you downloaded from your Kaggle account:")
uploaded = files.upload()  # This prompts a file chooser dialog

# ==============================
# 3. Move kaggle.json into the correct location and set permissions
# ==============================
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# ==============================
# 4. Create folder "/content/x" to simulate your data/models.
#    (If you already have a folder with your files, skip this step.)
# ==============================
!mkdir -p /content/x
!echo "Sample model data" > /content/x/example.txt

# ==============================
# 5. Zip the folder /content/x -> x.zip
# ==============================
# !zip -r x.zip /content/x

# ==============================
# 6. Create the dataset-metadata.json file
#    Adjust values to your liking:
#    - kaggle_username
#    - dataset_title
#    - dataset_slug
# ==============================
import json

kaggle_username = "zzllusa"  # e.g. "johnsmith"
dataset_title   = "DeepSeek-R1-Distill-Qwen-7B-140k-AWQ"
dataset_slug    = "Qwenm7B"

metadata = {
    "title": dataset_title,
    "id": f"{kaggle_username}/{dataset_slug}",
    "licenses": [
        {
            "name": "CC0-1.0"
        }
    ]
}

with open("dataset-metadata.json", "w") as f:
    json.dump(metadata, f, indent=2)

# ==============================
# 7. Create a staging folder and move:
#    - x.zip
#    - dataset-metadata.json
# ==============================
!mkdir AWQ
!mv x.zip AWQ
!cp dataset-metadata.json AWQ

# ==============================
# 8. Create the Kaggle dataset (PRIVATE)
# ==============================
!kaggle datasets create -p AWQ
