Skip to content

Commit

Permalink
Test data files
Browse files Browse the repository at this point in the history
  • Loading branch information
laurentprudhon authored and laurentprudhon committed Apr 1, 2024
1 parent de4b905 commit 3d5c452
Show file tree
Hide file tree
Showing 8 changed files with 219 additions and 129 deletions.
15 changes: 9 additions & 6 deletions _proc/index.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -66,15 +66,18 @@
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"wordslab_llms models\n"
]
"data": {
"text/plain": [
"mistral_7b: mistralai/Mistral-7B-v0.1 => params: 7.3 B | disk: 13.49 GB | vram: 14.324 GB (16 bits)"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test()"
"base_models['mistral_7b']"
]
},
{
Expand Down
208 changes: 117 additions & 91 deletions nbs/00_models.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -45,21 +45,13 @@
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {
"execution": {
"iopub.execute_input": "2024-04-01T10:36:32.293423Z",
"iopub.status.busy": "2024-04-01T10:36:32.281748Z",
"iopub.status.idle": "2024-04-01T10:36:32.319374Z",
"shell.execute_reply": "2024-04-01T10:36:32.318903Z",
"shell.execute_reply.started": "2024-04-01T10:36:32.293405Z"
},
"tags": []
},
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| export\n",
"import csv\n",
"import os, csv\n",
"from pathlib import Path\n",
"from datetime import datetime, timedelta\n",
"\n",
"base_models = {}\n",
Expand All @@ -71,8 +63,11 @@
" def __repr__(self):\n",
" return f\"{self.name}: {self.huggingface_repo} => params: {self.params_B} B | disk: {self.disk_size_GB} GB | vram: {self.memory_size_MB/1000} GB ({self.precision} bits)\"\n",
"\n",
" def print_identification_properties(self):\n",
" print(f\"{self.name}: {self.huggingface_repo}\")\n",
" def print_name_and_url(self):\n",
" print(f\"{self.name}: https://huggingface.co/{self.huggingface_repo}\")\n",
" \n",
" def print_model_card(self, print_name=True):\n",
" if print_name: self.print_name_and_url()\n",
" if self.is_best_model:\n",
" print(f\"** Best model - languages performance: {self.languages_perf} **\")\n",
" if self.moe_activated_params_B == 0:\n",
Expand All @@ -86,17 +81,38 @@
" print(f\"- model weights license: {self.license}\")\n",
" print(f\"- publication date: {self.date}\")\n",
" \n",
" def print_download_on_disk_properties(self):\n",
" def print_download_properties(self, print_name=True):\n",
" print(f\"{self.name}: {self.huggingface_repo}\")\n",
" if self.gated_access:\n",
" print(f\"** WARNING - Gated access: you need to request access on the Huggingface website **\")\n",
" print(f\"- huggingface repo: {self.huggingface_repo}\")\n",
" if self.gated_access:\n",
" print(f\"- huggingface read access token: mandatory\")\n",
" print(f\"- disk size: {self.disk_size_GB} GB\")\n",
" if self.safetensors:\n",
" print(f\"- weights format: Huggingface safetensors\")\n",
" else:\n",
" print(f\"- weights format: Pytorch .bin (pickle)\")\n",
" if self.install_commands:\n",
" print(\"- model dependencies installation commands\")\n",
" self.print_install_commands(line_prefix=\" - \")\n",
" \n",
" def print_install_commands(self, line_prefix=\"- \"):\n",
" for line in self.install_commands.splitlines():\n",
" print(line_prefix + line)\n",
" \n",
" def print_load_in_memory_properties(self):\n",
" def print_load_properties(self, print_name=True):\n",
" print(f\"{self.name}: {self.huggingface_repo}\")\n",
" \n",
" def print_perplexity_properties(self):\n",
" def print_perplexity_test(self, print_name=True):\n",
" print(f\"{self.name}: {self.huggingface_repo}\")\n",
" \n",
"try:\n",
" libdata_path = Path(__file__).parent / \"data\"\n",
"except NameError:\n",
" libdata_path = Path(os.getcwd()).parent / \"wordslab_llms\" / \"data\"\n",
" \n",
"with open('base_models.csv', 'r') as file:\n",
"with open(libdata_path / 'base_models.csv', 'r') as file:\n",
" csv_reader = csv.reader(file)\n",
" # Skip first line with column titles\n",
" next(csv_reader)\n",
Expand Down Expand Up @@ -198,17 +214,8 @@
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {
"execution": {
"iopub.execute_input": "2024-04-01T10:36:32.910090Z",
"iopub.status.busy": "2024-04-01T10:36:32.909674Z",
"iopub.status.idle": "2024-04-01T10:36:32.922029Z",
"shell.execute_reply": "2024-04-01T10:36:32.921499Z",
"shell.execute_reply.started": "2024-04-01T10:36:32.910063Z"
},
"tags": []
},
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
Expand Down Expand Up @@ -251,10 +258,10 @@
"solar_10b: upstage/SOLAR-10.7B-v1.0 => params: 10.7 B | disk: 19.99 GB | vram: 10.588 GB (8 bits)\n",
"qwen1.5_14b: Qwen/Qwen1.5-14B => params: 14.0 B | disk: 26.4 GB | vram: 0.0 GB (8 bits)\n",
"qwen1.5_moe_14bx2b: Qwen/Qwen1.5-MoE-A2.7B => params: 14.3 B | disk: 26.68 GB | vram: 14.901 GB (8 bits)\n",
"internlm2_20b: internlm/internlm2-20b => params: 20.0 B | disk: 37.0 GB | vram: 20.803 GB (8 bits)\n",
"mpt_30b: mosaicml/mpt-30b => params: 30.0 B | disk: 55.8 GB | vram: 16.567 GB (4 bits)\n",
"codellama_34b: codellama/CodeLlama-34b-hf => params: 34.0 B | disk: 62.86 GB | vram: 19.218 GB (4 bits)\n",
"yi_34b: 01-ai/Yi-34B => params: 34.0 B | disk: 64.06 GB | vram: 0.0 GB (4 bits)\n",
"internlm2_20b: internlm/internlm2-20b => params: 20.0 B | disk: 0.0 GB | vram: 0.0 GB (8 bits)\n",
"command-r_35b: CohereForAI/c4ai-command-r-v01-4bit => params: 35.0 B | disk: 21.15 GB | vram: 21.76 GB (4 bits)\n",
"falcon_40b: tiiuae/falcon-40b => params: 40.0 B | disk: 77.93 GB | vram: 0.0 GB (4 bits)\n",
"alfred_40b: lightonai/alfred-40b-1023 => params: 40.0 B | disk: 77.93 GB | vram: 22.889 GB (4 bits)\n",
Expand All @@ -273,7 +280,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"`ModelInfo` attributes - Model identification: \n",
"`ModelInfo` attributes - **Model card**\n",
"- `name`: simplified id to easily reference the model '(model)*(version)*_ (params)*_(contextlength)*'\n",
"- `is_best_model`: True if the model is one of the bests among models of similar size, either for performance, speed, or openness (in a first approach, you can ignore all other models)\n",
"- `languages_perf`: \n",
Expand All @@ -282,29 +289,20 @@
"- `context_size`: maximum sequence length natively supported\n",
"- `vocabulary`: number of token types in the vocabulary of the tokenizer\n",
"- `training_tokens_T`: when the information was disclosed, number of tokens on which the base model was trained (in trillions) \n",
"- `license`: license for **the weights** of the model\n",
"- `license`: license for the weightsµ of the model\n",
"- `date`: model publication date"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {
"execution": {
"iopub.execute_input": "2024-04-01T10:36:34.289022Z",
"iopub.status.busy": "2024-04-01T10:36:34.277805Z",
"iopub.status.idle": "2024-04-01T10:36:34.292804Z",
"shell.execute_reply": "2024-04-01T10:36:34.292284Z",
"shell.execute_reply.started": "2024-04-01T10:36:34.288984Z"
},
"tags": []
},
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"mistral_7b: mistralai/Mistral-7B-v0.1\n",
"mistral_7b: https://huggingface.co/mistralai/Mistral-7B-v0.1\n",
"** Best model - languages performance: EN>ES,FR,DE **\n",
"- parameters: 7.3 B\n",
"- context size: 8192 tokens\n",
Expand All @@ -315,47 +313,38 @@
}
],
"source": [
"base_models['mistral_7b'].print_identification_properties()"
"base_models['mistral_7b'].print_model_card()"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {
"execution": {
"iopub.execute_input": "2024-04-01T10:33:27.759682Z",
"iopub.status.busy": "2024-04-01T10:33:27.758833Z",
"iopub.status.idle": "2024-04-01T10:33:27.768676Z",
"shell.execute_reply": "2024-04-01T10:33:27.767768Z",
"shell.execute_reply.started": "2024-04-01T10:33:27.759643Z"
},
"tags": []
},
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"mixtral_moe_46bx13b_hqq3bit: mobiuslabsgmbh/Mixtral-8x7B-Instruct-v0.1-hf-attn-4bit-moe-3bit-metaoffload-HQQ\n",
"mixtral_moe_46bx13b_hqq3bit: https://huggingface.co/mobiuslabsgmbh/Mixtral-8x7B-Instruct-v0.1-hf-attn-4bit-moe-3bit-metaoffload-HQQ\n",
"** Best model - languages performance: multilingual **\n",
"- activated parameters: 12.9 B, total parameters: 46.7 B (mixture of experts)\n",
"- context size: 32768 tokens\n",
"- vocabulary: 32000 token types\n",
"- training tokens: 0.0 T\n",
"- model weights license: Apache 2.0\n",
"- publication date: 2024-02-29\n"
]
}
],
"source": [
"base_models['mixtral_moe_46bx13b_hqq3bit'].print_identification_properties()"
"base_models['mixtral_moe_46bx13b_hqq3bit'].print_model_card()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`ModelInfo` attributes - Download model weights and code from the Huggingface hub:\n",
"`ModelInfo` attributes - **Download model** weights and code from the Huggingface hub\n",
"\n",
"- `huggingface_repo`: path of the model in the Huggingface hub, append 'https://huggingface.co/' at the beginning to get the repo URL\n",
"- `disk_size_GB`: total files size (in GB) which will be downloaded and stored on your disk in the Huggingface models cache directory (see 'HF_HOME' environment variable)\n",
"- `gated_access`: if True, you will need to be granted access before you can download the model\n",
Expand All @@ -377,14 +366,53 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"stablelm_3b: stabilityai/stablelm-3b-4e1t\n",
"** WARNING - Gated access: you need to request access on the Huggingface website **\n",
"- huggingface repo: stabilityai/stablelm-3b-4e1t\n",
"- huggingface read access token: mandatory\n",
"- disk size: 5.21 GB\n",
"- weights format: Huggingface safetensors\n"
]
}
],
"source": [
"base_models['stablelm_3b'].print_download_properties()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"mixtral_moe_46bx13b_hqq3bit: mobiuslabsgmbh/Mixtral-8x7B-Instruct-v0.1-hf-attn-4bit-moe-3bit-metaoffload-HQQ\n",
"- huggingface repo: mobiuslabsgmbh/Mixtral-8x7B-Instruct-v0.1-hf-attn-4bit-moe-3bit-metaoffload-HQQ\n",
"- disk size: 20.88 GB\n",
"- weights format: Huggingface safetensors\n",
"- model dependencies installation commands\n",
" - pip install --upgrade hqq\n",
" - git clone https://github.com/mobiusml/hqq/\n",
" - cd hqq/hqq/kernels && python setup_cuda.py install\n"
]
}
],
"source": [
"base_models['mixtral_moe_46bx13b_hqq3bit'].print_download_properties()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`ModelInfo` attributes - Load model in memory with Huggingface transformers:\n",
"`ModelInfo` attributes - **Load model** in memory with Huggingface transformers\n",
"- `tokenizer_remote_code`: False if the tokenizer code is included in the Hugginface transformers library, True if you need to trust the tokenizer code downloaded from the model repository\n",
"- `vocab_size_code: additional code to execute after load to set the tokenizer vocabulary size\n",
"- `padding_token_code`: additional code to execute after load to set the tokenizer padding token\n",
Expand All @@ -394,35 +422,45 @@
"- `model_load_dtype`: specific torch.dtype to use to load the model without wasting memory (for example fp16 is the model weights are saved in fp32)\n",
"- `quantization_type`: quantization algorithm to use to get the best performance on a local machine with a limited amount of VRAM\n",
"- `precision`: number of bits per parameter after quantization, average rounded to the closest integer \n",
"- `memory_size_MB`: VRAM necessary just to load the model in memory (in MB)\n",
"\n",
"`ModelInfo` attributes - Performance tests on real-world business language in English, French, German, and Spanish (retail banking websites, approx 10 millions tokens per language)\n",
"- `memory_size_MB`: VRAM necessary just to load the model in memory (in MB)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`ModelInfo` attributes - **Performance tests** on real-world business language in English, French, German, and Spanish (retail banking websites, approx 10 millions tokens per language)\n",
"- `ppl_sequence_length`: max sequence length used to measure perplexity (optimized for 24 GB VRAM)\n",
"- `ppl_batch_size`: batch size used during the perplexity test (optimized for 24 GB VRAM)\n",
"- `ppl_memory_size_MB`: total VRAM used during the perplexity test (in MB)\n",
"- `fr_tokens_M`: FRENCH dataset - millions of tokens tested\n",
"- `fr_tokens_duration`: FRENCH dataset - duration to tokenize the dataset (timedelta object)\n",
"- `fr_tokens_duration`: FRENCH dataset - time to tokenize the dataset (timedelta object)\n",
"- `fr_pplu_x1000`: FRENCH dataset - unigram-normalized perplexity (x1000)\n",
"- `fr_ppl`: FRENCH dataset - perplexity\n",
"- `fr_ppl_duration`: FRENCH dataset - duration to compute the model perplexity (timedelta object)\n",
"- `fr_ppl_duration`: FRENCH dataset - time to compute the model perplexity (timedelta object)\n",
"- `en_tokens_M` / `en_tokens_duration` / `en_pplu_x1000` / `en_ppl` / `en_ppl_duration`: same metrics for the ENGLISH dataset \n",
"- `de_tokens_M` / `de_tokens_duration` / `de_pplu_x1000` / `de_ppl` / `de_ppl_duration`: same metrics for the GERMAN dataset \n",
"- `es_tokens_M` / `es_tokens_duration` / `es_pplu_x1000` / `es_ppl` / `es_ppl_duration`: same metrics for the SPANISH dataset"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"execution": {
"iopub.execute_input": "2024-03-30T14:14:53.551802Z",
"iopub.status.busy": "2024-03-30T14:14:53.551454Z",
"iopub.status.idle": "2024-03-30T14:14:53.560662Z",
"shell.execute_reply": "2024-03-30T14:14:53.560168Z",
"shell.execute_reply.started": "2024-03-30T14:14:53.551787Z"
},
"tags": []
},
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"ename": "SyntaxError",
Expand Down Expand Up @@ -459,18 +497,6 @@
"display_name": "wordslab-llms-lib",
"language": "python",
"name": "wordslab-llms-lib"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
Expand Down
Binary file removed nbs/base_models.xlsx
Binary file not shown.
Loading

0 comments on commit 3d5c452

Please sign in to comment.