From d844b1302a8abf9c69a198004b8a38d5f0a7f66d Mon Sep 17 00:00:00 2001 From: Anirban Ray <39331844+yarnabrina@users.noreply.github.com> Date: Sat, 27 Jan 2024 01:10:09 +0530 Subject: [PATCH 01/26] updated copyright year --- LICENSE | 2 +- docs/source/conf.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LICENSE b/LICENSE index 985bd07..c48628e 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2023 Anirban Ray +Copyright (c) 2023-2024 Anirban Ray Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/docs/source/conf.py b/docs/source/conf.py index 2ef55bb..155dce0 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -8,7 +8,7 @@ project = "query-package-documentation" version = str(generative_ai.__version__) -project_copyright = "2024, Anirban Ray" +project_copyright = "2023-2024, Anirban Ray" author = "Anirban Ray" release = f"v{version}" From b50e1cf47ce7474f5b1dd8b083e650a9da232634 Mon Sep 17 00:00:00 2001 From: Anirban Ray <39331844+yarnabrina@users.noreply.github.com> Date: Sat, 27 Jan 2024 17:21:30 +0530 Subject: [PATCH 02/26] added evaluation dependency --- pyproject.toml | 1 + requirements/constraints.fine_tuning.txt | 1 + requirements/requirements.fine_tuning.txt | 1 + 3 files changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 324b359..a306737 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -93,6 +93,7 @@ fine-tuning = [ "accelerate<0.27,>=0.24.1", "bitsandbytes<0.43,>=0.41.2", "datasets<2.17,>=2.15", + "evaluate<0.5,>=0.4.1", "peft<0.8,>=0.6.2", "safetensors<0.5,>=0.4", "torch<2.2,>=2.1.1", diff --git a/requirements/constraints.fine_tuning.txt b/requirements/constraints.fine_tuning.txt index 27726a0..be7eebc 100644 --- a/requirements/constraints.fine_tuning.txt +++ b/requirements/constraints.fine_tuning.txt @@ -1,6 +1,7 @@ accelerate<0.27,>=0.24.1 bitsandbytes<0.43,>=0.41.2 datasets<2.17,>=2.15.0 +evaluate<0.5,>=0.4.1 peft<0.8,>=0.6.2 safetensors<0.5,>=0.4.0 torch<2.2,>=2.1.1 diff --git a/requirements/requirements.fine_tuning.txt b/requirements/requirements.fine_tuning.txt index 50dea65..39d3445 100644 --- a/requirements/requirements.fine_tuning.txt +++ b/requirements/requirements.fine_tuning.txt @@ -1,6 +1,7 @@ accelerate bitsandbytes datasets +evaluate peft safetensors torch From 5af347acd078afc4d33d378c60c9dbdf46f1517a Mon Sep 17 00:00:00 2001 From: Anirban Ray <39331844+yarnabrina@users.noreply.github.com> Date: Sat, 27 Jan 2024 23:39:52 +0530 Subject: [PATCH 03/26] stop storing tuning prompts --- .../dataset_generation/utils_generation.py | 44 ------------------- 1 file changed, 44 deletions(-) diff --git a/src/generative_ai/dataset_generation/utils_generation.py b/src/generative_ai/dataset_generation/utils_generation.py index b6ebe9a..43d5939 100644 --- a/src/generative_ai/dataset_generation/utils_generation.py +++ b/src/generative_ai/dataset_generation/utils_generation.py @@ -439,44 +439,6 @@ class Document(pydantic.BaseModel): answer: str split: SplitName - @pydantic.computed_field - @functools.cached_property - def instruction_with_context(self: "Document") -> str: - """Store tuning prompt for the document using context, question and answer. - - Returns - ------- - str - tuning prompt for the document using context, question and answer - """ - system_instruction = ( - "Below is a question that can be answered using the following context. " - "Write an answer for the question appropriately without using any additional data." - ) - - return " ".join( - [ - "", - f"[INST] {system_instruction} [/INST]", - f"[INST] Context: {self.context} [/INST]", - f"[INST] Question: {self.question} [/INST]", - f"[INST] Answer: {self.answer} [/INST]", - "", - ] - ) - - @pydantic.computed_field - @functools.cached_property - def instruction_without_context(self: "Document") -> str: - """Store tuning prompt for the document using question and answer. - - Returns - ------- - str - tuning prompt for the document using question and answer - """ - return f"[INST] {self.question} [/INST] {self.answer} " - class Dataset(pydantic.BaseModel): """Store details of a dataset. @@ -526,18 +488,12 @@ class JSONDocument(pydantic.BaseModel): answer to the question or instruction based on the ``context`` split : SplitName split allocation of the document - instruction_with_context : str - tuning prompt for the document using context, question and answer - instruction_without_context : str - tuning prompt for the document using question and answer """ context: str question: str answer: str split: SplitName - instruction_with_context: str - instruction_without_context: str class JSONDataset(pydantic.BaseModel): From fad67a681cb7c1b18a356ae74b66114dd4f49336 Mon Sep 17 00:00:00 2001 From: Anirban Ray <39331844+yarnabrina@users.noreply.github.com> Date: Sun, 11 Feb 2024 14:33:01 +0530 Subject: [PATCH 04/26] configuration updates --- .flake8 | 5 +- .pre-commit-config.yaml | 30 ++- pyproject.toml | 196 ++++++++++++++---- .../dataset_generation/step_1_generation.py | 2 +- .../dataset_generation/step_2_generation.py | 13 +- ...nd_using_instruction_without_context.ipynb | 18 +- .../information_retrieval/__init__.py | 2 - .../orchestrate_retrieval.py | 28 +-- .../information_retrieval/step_1_retrieval.py | 16 +- .../information_retrieval/step_2_retrieval.py | 19 +- .../information_retrieval/step_3_retrieval.py | 23 +- .../information_retrieval/utils_retrieval.py | 4 - src/generative_ai/top_level.py | 2 +- 13 files changed, 248 insertions(+), 110 deletions(-) diff --git a/.flake8 b/.flake8 index e7ad425..4fa985f 100644 --- a/.flake8 +++ b/.flake8 @@ -1,5 +1,8 @@ [flake8] -extend-ignore = E203 +extend-ignore = + # whitespace before ‘,’, ‘;’, or ‘:’ + E203 per-file-ignores = + # module imported but unused __init__.py: F401 max-line-length = 99 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bf5ffb2..6b4cf17 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,6 +16,9 @@ repos: - id: detect-private-key - id: end-of-file-fixer - id: mixed-line-ending + - id: name-tests-test + args: + - --pytest-test-first - id: no-commit-to-branch - id: pretty-format-json args: @@ -46,7 +49,7 @@ repos: - src pass_filenames: false - repo: https://github.com/psf/black - rev: 24.1.0 + rev: 24.1.1 hooks: - id: black additional_dependencies: @@ -85,12 +88,16 @@ repos: stages: - manual - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.14 + rev: v0.2.1 hooks: - id: ruff args: - src pass_filenames: false + - id: ruff-format + args: + - src + pass_filenames: false - repo: https://github.com/jendrikseipp/vulture rev: v2.10 hooks: @@ -122,6 +129,12 @@ repos: args: - src pass_filenames: false + - repo: https://github.com/numpy/numpydoc + rev: v1.6.0 + hooks: + - id: numpydoc-validation + stages: + - manual - repo: https://github.com/tox-dev/pyproject-fmt rev: 1.7.0 hooks: @@ -140,6 +153,19 @@ repos: - --write-changes stages: - manual + - repo: https://github.com/crate-ci/typos + rev: v1.18.2 + hooks: + - id: typos + args: + - --force-exclude + - --write-changes + - --locale + - en-gb + - --format + - brief + stages: + - manual default_language_version: python: python3.11 fail_fast: false diff --git a/pyproject.toml b/pyproject.toml index a306737..2f0721c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -168,82 +168,127 @@ target-version = [ [tool.ruff] fix = true +indent-width = 4 +line-length = 99 +output-format = "grouped" +preview = false +respect-gitignore = true +src = [ + "docs", + "src", + "tests", +] +target-version = "py311" + +[tool.ruff.format] +docstring-code-format = true +docstring-code-line-length = "dynamic" +indent-style = "space" +preview = false +quote-style = "double" + +[tool.ruff.lint] +fixable = [ + "ALL", +] ignore = [ - "COM", - "D203", - "D213", - "DTZ", - "EM", - "FBT", - "FIX", - "G", - "ICN", - "PD", - "RET501", - "RET502", - "RET503", - "RET504", - "SLF", - "TRY003", + "COM", # flake8-commas + "CPY", # flake8-copyright + "D213", # Multi-line docstring summary should start at the second line + "DTZ", # flake8-datetimez + "EM", # flake8-errmsg + "FBT", # flake8-boolean-trap + "FIX", # flake8-fixme + "G", # flake8-logging-format + "ICN", # flake8-import-conventions + "PD", # pandas-vet + "RET501", # Do not explicitly return None in function if it is the only possible return value + "RET502", # Do not implicitly return None in function able to return non-None value + "RET503", # Missing explicit return at the end of function able to return non-None value + "RET504", # Unnecessary assignment to {name} before return statement + "TID252", # Prefer absolute imports over relative imports from parent modules + "TRY003", # Avoid specifying long messages outside the exception class ] ignore-init-module-imports = true -line-length = 99 -output-format = "grouped" +preview = false select = [ "ALL", ] -src = [ - "src", +task-tags = [ + "TODO", + "FIXME", ] -target-version = "py311" -[tool.ruff.flake8-annotations] -allow-star-arg-any = true +[tool.ruff.lint.flake8-annotations] ignore-fully-untyped = true -[tool.ruff.flake8-bugbear] +[tool.ruff.lint.flake8-bugbear] extend-immutable-calls = [ "fastapi.Depends", "fastapi.Query", "pydantic.Field", ] -[tool.ruff.flake8-type-checking] +[tool.ruff.lint.flake8-quotes] +docstring-quotes = "double" +inline-quotes = "double" +multiline-quotes = "double" + +[tool.ruff.lint.flake8-type-checking] exempt-modules = [ "typing", "typing_extensions", ] +quote-annotations = true runtime-evaluated-base-classes = [ "pydantic.BaseModel", ] +runtime-evaluated-decorators = [ + "pydantic.validate_call", +] -[tool.ruff.pep8-naming] +[tool.ruff.lint.pep8-naming] classmethod-decorators = [ "pydantic.field_validator", ] -[tool.ruff.per-file-ignores] -"**/__init__.py" = [ - "F401", +[tool.ruff.lint.per-file-ignores] +"src/**/__init__.py" = [ + "F401", # {name} imported but unused; consider using importlib.util.find_spec to test for availability +] +"docs/**/conf.py" = [ + "INP001", # File {filename} is part of an implicit namespace package. Add an __init__.py. +] +"tests/**/conftest.py" = [ + "INP001", # File {filename} is part of an implicit namespace package. Add an __init__.py. + "PLR0913", # Too many arguments in function definition ({c_args} > {max_args}) +] +"tests/**/test_*.py" = [ + "INP001", # File {filename} is part of an implicit namespace package. Add an __init__.py. + "PLR0913", # Too many arguments in function definition ({c_args} > {max_args}) + "S101", # Use of assert detected ] -[tool.ruff.pycodestyle] +[tool.ruff.lint.pycodestyle] max-doc-length = 99 +max-line-length = 99 -[tool.ruff.pydocstyle] +[tool.ruff.lint.pydocstyle] convention = "numpy" ignore-decorators = [ "typing.overload", ] +property-decorators = [ + "pydantic.computed_field", +] -[tool.ruff.pyupgrade] +[tool.ruff.lint.pyupgrade] keep-runtime-typing = true [tool.isort] overwrite_in_place = true profile = "black" atomic = true -float_to_top = true line_length = 99 remove_redundant_aliases = true src_paths = [ @@ -273,7 +318,8 @@ enable = [ "all", ] disable = [ - "logging-fstring-interpolation", + "import-outside-toplevel", # Used when an import statement is used anywhere other than the module toplevel. Move this import to the top of the file. + "logging-fstring-interpolation", # Used when a logging statement has a call form of "logging.(f"...")".Use another type of string formatting instead. You can use % formatting but leave interpolation to the logging function by passing the parameters as arguments. If logging-format-interpolation is disabled then you can use str.format. If logging-not-lazy is disabled then you can use % formatting as normal. ] [tool.pylint.reports] @@ -285,6 +331,67 @@ recursive = true wrap-summaries = 99 wrap-descriptions = 99 +[tool.pytest.ini_options] +addopts = "--junit-xml=pytest_junit_report.xml --doctest-modules --doctest-ignore-import-errors --doctest-continue-on-failure" +console_output_style = "count" + +[tool.coverage.run] +branch = true +command_line = "--module pytest" +data_file = "coverage_data" +include = [ + "src/**/*.py", +] +omit = [ + "tests/**/conftest.py", + "tests/**/test_*.py", +] + +[tool.coverage.report] +exclude_lines = [ + "pragma: no cover", + "if __name__ == .__main__.:", + "if typing.TYPE_CHECKING:", +] +fail_under = 85 +include = [ + "src/**/*.py", +] +include_namespace_packages = true +omit = [ + "tests/**/conftest.py", + "tests/**/test_*.py", +] +precision = 2 +show_missing = true + +[tool.coverage.html] +directory = "coverage_html_report" + +[tool.coverage.xml] +output = "coverage_xml_report.xml" + +[tool.mypy] +files = [ + "src", +] +exclude = [ + "conftest", + "test_", +] +strict = true + +[tool.pyright] +include = [ + "src", +] +exclude = [ + "tests/**/conftest.py", + "tests/**/test_*.py", +] +pythonVersion = "3.11" +reportMissingImports = false + [tool.autoflake] in-place = true remove-all-unused-imports = true @@ -305,14 +412,17 @@ paths = [ ] [tool.numpydoc_validation] -checks = [ +checks = [ # Report findings on all checks except the ones listed below "all" "all", - "GL01", - "ES01", - "PR08", - "PR09", - "RT04", - "RT05", - "SA01", - "EX01", + "GL01", # Docstring text (summary) should start in the line immediately after the opening quotes (not in the same line, or leaving a blank line in between) + "ES01", # No extended summary found + "PR08", # Parameter "{param_name}" description should start with a capital letter + "PR09", # Parameter "{param_name}" description should finish with "." + "RT04", # Return value description should start with a capital letter + "RT05", # Return value description should finish with "." + "SA01", # See Also section not found + "EX01", # No examples section found +] +exclude = [ # Don't report issues on objects matching any of the regular regular expressions + "\\.__init__$", ] diff --git a/src/generative_ai/dataset_generation/step_1_generation.py b/src/generative_ai/dataset_generation/step_1_generation.py index 68ab8ef..d48f509 100644 --- a/src/generative_ai/dataset_generation/step_1_generation.py +++ b/src/generative_ai/dataset_generation/step_1_generation.py @@ -6,7 +6,7 @@ import inspect import logging import pkgutil -import types +import types # noqa: TCH003 import typing import pydantic diff --git a/src/generative_ai/dataset_generation/step_2_generation.py b/src/generative_ai/dataset_generation/step_2_generation.py index 825f441..0919078 100644 --- a/src/generative_ai/dataset_generation/step_2_generation.py +++ b/src/generative_ai/dataset_generation/step_2_generation.py @@ -27,6 +27,9 @@ train_proportion=0.6, validation_proportion=0.2, test_proportion=0.2 ) +EMPTY_PARAMETER = inspect.Parameter.empty +EMPTY_SIGNATURE = inspect.Signature.empty + @pydantic.validate_call(validate_return=True) def allocate_tuning_pairs( @@ -1212,7 +1215,7 @@ def generate_class_member_dataset( # noqa: C901, PLR0912, PLR0915 parameter_name = class_parameter.parameter_name parameter = f"'{parameter_name}' argument in {class_member}" - if (parameter_default := class_parameter.parameter_default) is inspect._empty: + if (parameter_default := class_parameter.parameter_default) is EMPTY_PARAMETER: class_parameter_defaults_pairs = [ ( f"Tell default value of {parameter}.", @@ -1273,7 +1276,7 @@ def generate_class_member_dataset( # noqa: C901, PLR0912, PLR0915 ) class_member_tuning_pairs.extend(allocate_tuning_pairs(class_parameter_defaults_pairs)) - if (parameter_annotation := class_parameter.parameter_annotation) is inspect._empty: + if (parameter_annotation := class_parameter.parameter_annotation) is EMPTY_PARAMETER: class_parameter_types_pairs = [ ( f"Name type hint for {parameter}.", @@ -1951,7 +1954,7 @@ def generate_function_member_dataset( # noqa: C901, PLR0912, PLR0915 parameter_name = function_parameter.parameter_name parameter = f"'{parameter_name}' argument in {function_member}" - if (parameter_default := function_parameter.parameter_default) is inspect._empty: + if (parameter_default := function_parameter.parameter_default) is EMPTY_PARAMETER: function_parameter_defaults_pairs = [ (f"Default value of {parameter}?", f"{parameter} does not have a default value."), ( @@ -2013,7 +2016,7 @@ def generate_function_member_dataset( # noqa: C901, PLR0912, PLR0915 allocate_tuning_pairs(function_parameter_defaults_pairs) ) - if (parameter_annotation := function_parameter.parameter_annotation) is inspect._empty: + if (parameter_annotation := function_parameter.parameter_annotation) is EMPTY_PARAMETER: function_parameter_types_pairs = [ ( f"What is type annotation of {parameter}?", @@ -2156,7 +2159,7 @@ def generate_function_member_dataset( # noqa: C901, PLR0912, PLR0915 if ( returns_annotation := member_type_details.function_returns.returns_annotation - ) is inspect._empty: + ) is EMPTY_SIGNATURE: function_return_type_pairs = [ ( f"What is the return type annotation of {function_member}?", diff --git a/src/generative_ai/fine_tuning/v2_opt_350m/end_to_end_using_instruction_without_context.ipynb b/src/generative_ai/fine_tuning/v2_opt_350m/end_to_end_using_instruction_without_context.ipynb index d25935e..f615f3c 100644 --- a/src/generative_ai/fine_tuning/v2_opt_350m/end_to_end_using_instruction_without_context.ipynb +++ b/src/generative_ai/fine_tuning/v2_opt_350m/end_to_end_using_instruction_without_context.ipynb @@ -539,9 +539,7 @@ ], "source": [ "model = transformers.AutoModelForCausalLM.from_pretrained(\n", - " base_model_identifier,\n", - " quantization_config=quantisation_configuration,\n", - " device_map=\"auto\",\n", + " base_model_identifier, quantization_config=quantisation_configuration, device_map=\"auto\"\n", ")\n", "\n", "model.config.use_cache = False\n", @@ -571,11 +569,7 @@ "outputs": [], "source": [ "peft_configuration = peft.LoraConfig(\n", - " r=8,\n", - " lora_alpha=16,\n", - " lora_dropout=0.1,\n", - " bias=\"lora_only\",\n", - " task_type=peft.TaskType.CAUSAL_LM,\n", + " r=8, lora_alpha=16, lora_dropout=0.1, bias=\"lora_only\", task_type=peft.TaskType.CAUSAL_LM\n", ")\n", "\n", "peft_model = peft.get_peft_model(model, peft_configuration)" @@ -956,9 +950,7 @@ "outputs": [], "source": [ "untuned_model = transformers.AutoModelForCausalLM.from_pretrained(\n", - " base_model_identifier,\n", - " quantization_config=quantisation_configuration,\n", - " device_map=\"auto\",\n", + " base_model_identifier, quantization_config=quantisation_configuration, device_map=\"auto\"\n", ")" ] }, @@ -1098,9 +1090,7 @@ "outputs": [], "source": [ "untuned_model = transformers.AutoModelForCausalLM.from_pretrained(\n", - " base_model_identifier,\n", - " quantization_config=quantisation_configuration,\n", - " device_map=\"auto\",\n", + " base_model_identifier, quantization_config=quantisation_configuration, device_map=\"auto\"\n", ")" ] }, diff --git a/src/generative_ai/information_retrieval/__init__.py b/src/generative_ai/information_retrieval/__init__.py index db8c63d..ec9d109 100644 --- a/src/generative_ai/information_retrieval/__init__.py +++ b/src/generative_ai/information_retrieval/__init__.py @@ -25,7 +25,6 @@ RetrievalType, StandardModel, TransformerType, - ValidatedChroma, ) __all__ = [ @@ -37,7 +36,6 @@ "RetrievalType", "StandardModel", "TransformerType", - "ValidatedChroma", "configure_language_model", "create_database_retriever", "create_document_embedder", diff --git a/src/generative_ai/information_retrieval/orchestrate_retrieval.py b/src/generative_ai/information_retrieval/orchestrate_retrieval.py index 9f216e1..f2fde88 100644 --- a/src/generative_ai/information_retrieval/orchestrate_retrieval.py +++ b/src/generative_ai/information_retrieval/orchestrate_retrieval.py @@ -1,10 +1,8 @@ """Define functionalities to orchestrate information retrieval.""" -import pathlib +import typing import pydantic -from langchain.docstore.document import Document -from langchain.schema.runnable import RunnableSerializable from .step_1_retrieval import ( create_document_embedder, @@ -20,11 +18,17 @@ PipelineType, RetrievalType, TransformerType, - ValidatedChroma, ) +if typing.TYPE_CHECKING: + import pathlib -def load_source_documents(file_path: pathlib.Path) -> list[Document]: + from langchain.docstore.document import Document + from langchain.schema.runnable import RunnableSerializable + from langchain.vectorstores.chroma import Chroma + + +def load_source_documents(file_path: "pathlib.Path") -> list["Document"]: """Load and partition source documents. Parameters @@ -44,8 +48,8 @@ def load_source_documents(file_path: pathlib.Path) -> list[Document]: def create_embedding_database( - embedding_model: str, directory_path: pathlib.Path, source_documents: list[Document] -) -> ValidatedChroma: + embedding_model: str, directory_path: "pathlib.Path", source_documents: list["Document"] +) -> "Chroma": """Prepare an embedding database. Parameters @@ -70,7 +74,7 @@ def create_embedding_database( return vector_store -def store_embedding_database(vector_store: ValidatedChroma) -> None: +def store_embedding_database(vector_store: "Chroma") -> None: """Dump vector store to disk into configured directory. Parameters @@ -81,7 +85,7 @@ def store_embedding_database(vector_store: ValidatedChroma) -> None: vector_store.persist() -def load_embedding_database(embedding_model: str, directory_path: pathlib.Path) -> ValidatedChroma: +def load_embedding_database(embedding_model: str, directory_path: "pathlib.Path") -> "Chroma": """Load vector store from disk from configured directory. Parameters @@ -168,13 +172,13 @@ def configure_language_model( # noqa: PLR0913 def prepare_question_answer_chain( # noqa: PLR0913 - embedding_database: ValidatedChroma, + embedding_database: "Chroma", search_type: RetrievalType, number_of_documents: int, initial_number_of_documents: int, diversity_level: float, language_model: LanguageModel, -) -> RunnableSerializable: +) -> "RunnableSerializable": """Prepare a question answering pipeline. Parameters @@ -212,7 +216,7 @@ def prepare_question_answer_chain( # noqa: PLR0913 def run_question_answer_chain( - question_answer_chain: RunnableSerializable, question: str + question_answer_chain: "RunnableSerializable", question: str ) -> tuple[dict, CaptureDetailsCallback]: """Run question answering pipeline for user input. diff --git a/src/generative_ai/information_retrieval/step_1_retrieval.py b/src/generative_ai/information_retrieval/step_1_retrieval.py index 6962928..648c368 100644 --- a/src/generative_ai/information_retrieval/step_1_retrieval.py +++ b/src/generative_ai/information_retrieval/step_1_retrieval.py @@ -1,17 +1,19 @@ """Define functionalities to store document embeddings.""" -import pathlib +import typing -from langchain.docstore.document import Document from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.vectorstores.chroma import Chroma from langchain_community.document_loaders import JSONLoader from langchain_community.embeddings import HuggingFaceEmbeddings -from .utils_retrieval import ValidatedChroma +if typing.TYPE_CHECKING: + import pathlib + from langchain.docstore.document import Document -def load_json_documents(file_path: pathlib.Path) -> list[Document]: + +def load_json_documents(file_path: "pathlib.Path") -> list["Document"]: """Load retrieval documents from a JSON file. Parameters @@ -30,7 +32,7 @@ def load_json_documents(file_path: pathlib.Path) -> list[Document]: return raw_documents -def partition_documents(raw_documents: list[Document]) -> list[Document]: +def partition_documents(raw_documents: list["Document"]) -> list["Document"]: """Partition retrieval documents into chunks. Parameters @@ -72,9 +74,7 @@ def create_document_embedder(embedding_model: str) -> HuggingFaceEmbeddings: return embedder -def create_vector_store( - embedder: HuggingFaceEmbeddings, directory_path: pathlib.Path -) -> ValidatedChroma: +def create_vector_store(embedder: HuggingFaceEmbeddings, directory_path: "pathlib.Path") -> Chroma: """Initialise a Chroma vector store. Parameters diff --git a/src/generative_ai/information_retrieval/step_2_retrieval.py b/src/generative_ai/information_retrieval/step_2_retrieval.py index bd0d34e..ae610f7 100644 --- a/src/generative_ai/information_retrieval/step_2_retrieval.py +++ b/src/generative_ai/information_retrieval/step_2_retrieval.py @@ -1,23 +1,28 @@ """Define functionalities to fetch and use relevant information from database.""" +import typing + import transformers from langchain.chains import RetrievalQA -from langchain.chains.retrieval_qa.base import BaseRetrievalQA from langchain.llms.ctransformers import CTransformers from langchain.llms.huggingface_pipeline import HuggingFacePipeline from langchain.prompts import PromptTemplate -from langchain.schema.vectorstore import VectorStoreRetriever -from .utils_retrieval import LanguageModel, RetrievalType, TransformerType, ValidatedChroma +from .utils_retrieval import LanguageModel, RetrievalType, TransformerType + +if typing.TYPE_CHECKING: + from langchain.chains.retrieval_qa.base import BaseRetrievalQA + from langchain.schema.vectorstore import VectorStoreRetriever + from langchain.vectorstores.chroma import Chroma def create_database_retriever( - embedding_database: ValidatedChroma, + embedding_database: "Chroma", search_type: RetrievalType, number_of_documents: int, initial_number_of_documents: int, diversity_level: float, -) -> VectorStoreRetriever: +) -> "VectorStoreRetriever": """Prepare a vector store retriever for the retrieval database. Parameters @@ -134,8 +139,8 @@ def create_llm(language_model: LanguageModel) -> CTransformers | HuggingFacePipe def generate_retrieval_chain( - database_retriever: VectorStoreRetriever, llm: CTransformers | HuggingFacePipeline -) -> BaseRetrievalQA: + database_retriever: "VectorStoreRetriever", llm: CTransformers | HuggingFacePipeline +) -> "BaseRetrievalQA": """Prepare a retrieval chain for question answering. Parameters diff --git a/src/generative_ai/information_retrieval/step_3_retrieval.py b/src/generative_ai/information_retrieval/step_3_retrieval.py index b2c67fc..d24a8b6 100644 --- a/src/generative_ai/information_retrieval/step_3_retrieval.py +++ b/src/generative_ai/information_retrieval/step_3_retrieval.py @@ -2,10 +2,13 @@ import time import typing -import uuid from langchain.callbacks.base import BaseCallbackHandler -from langchain_core.outputs import LLMResult + +if typing.TYPE_CHECKING: + import uuid + + from langchain_core.outputs import LLMResult class CaptureDetailsCallback(BaseCallbackHandler): @@ -19,7 +22,7 @@ class CaptureDetailsCallback(BaseCallbackHandler): time taken (in seconds) for large language model to generate response """ - def __init__(self: "CaptureDetailsCallback") -> None: # numpydoc ignore=GL08 + def __init__(self: "CaptureDetailsCallback") -> None: super().__init__() self.effective_prompt: str | None = None @@ -30,11 +33,11 @@ def on_llm_start( # noqa: PLR0913, numpydoc ignore=PR01 serialized: dict, prompts: list[str], *, - run_id: uuid.UUID, - parent_run_id: uuid.UUID | None = None, + run_id: "uuid.UUID", + parent_run_id: "uuid.UUID | None" = None, tags: list[str] | None = None, metadata: dict | None = None, - **kwargs: typing.Any, + **kwargs: typing.Any, # noqa: ANN401 ) -> None: """Run when large language model starts generating response. @@ -58,11 +61,11 @@ def on_llm_start( # noqa: PLR0913, numpydoc ignore=PR01 def on_llm_end( # numpydoc ignore=PR01 self: "CaptureDetailsCallback", - response: LLMResult, + response: "LLMResult", *, - run_id: uuid.UUID, - parent_run_id: uuid.UUID | None = None, - **kwargs: typing.Any, + run_id: "uuid.UUID", + parent_run_id: "uuid.UUID | None" = None, + **kwargs: typing.Any, # noqa: ANN401 ) -> None: """Run when large language model finishes generating response. diff --git a/src/generative_ai/information_retrieval/utils_retrieval.py b/src/generative_ai/information_retrieval/utils_retrieval.py index 1b365a1..56fb1f5 100644 --- a/src/generative_ai/information_retrieval/utils_retrieval.py +++ b/src/generative_ai/information_retrieval/utils_retrieval.py @@ -4,7 +4,6 @@ import typing import pydantic -from langchain.vectorstores.chroma import Chroma class RetrievalType(str, enum.Enum): @@ -72,8 +71,6 @@ class QuantisedModel(pydantic.BaseModel): ] LanguageModelAdapter = pydantic.TypeAdapter(LanguageModel) -ValidatedChroma = pydantic.InstanceOf[Chroma] - __all__ = [ "LanguageModel", @@ -83,5 +80,4 @@ class QuantisedModel(pydantic.BaseModel): "QuantisedModel", "RetrievalType", "StandardModel", - "ValidatedChroma", ] diff --git a/src/generative_ai/top_level.py b/src/generative_ai/top_level.py index e7bbed5..013166d 100644 --- a/src/generative_ai/top_level.py +++ b/src/generative_ai/top_level.py @@ -1,7 +1,7 @@ """Define functionalities for top level modules.""" import logging -import pathlib +import pathlib # noqa: TCH003 import shutil import pydantic From ad1f9b8b6ba64efe89fb7af8328cd99c14aa43c0 Mon Sep 17 00:00:00 2001 From: Anirban Ray <39331844+yarnabrina@users.noreply.github.com> Date: Tue, 13 Feb 2024 09:51:11 +0530 Subject: [PATCH 05/26] updated dependencies --- pyproject.toml | 16 ++++++++-------- requirements/constraints.fine_tuning.txt | 8 ++++---- requirements/constraints.txt | 8 ++++---- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2f0721c..6f47fff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,12 +44,12 @@ dynamic=[ dependencies = [ "chromadb<0.5,>=0.4.15", "ctransformers<0.3,>=0.2.27", - "gradio<4.17,>=4.12", + "gradio<4.19,>=4.16", "jq<1.7,>=1.6", - "langchain<0.1.5,>=0.1.1", + "langchain<0.1.7,>=0.1.1", "numpydoc<1.7,>=1.6", - "pydantic<2.6,>=2.4.2", - "sentence-transformers<2.3,>=2.2.2", + "pydantic<2.7,>=2.4.2", + "sentence-transformers<2.4,>=2.2.2", "transformers<4.38,>=4.36", "typer<0.10,>=0.9", ] @@ -90,13 +90,13 @@ doc = [ "sphinx-copybutton", ] fine-tuning = [ - "accelerate<0.27,>=0.24.1", + "accelerate<0.28,>=0.24.1", "bitsandbytes<0.43,>=0.41.2", - "datasets<2.17,>=2.15", + "datasets<2.18,>=2.15", "evaluate<0.5,>=0.4.1", - "peft<0.8,>=0.6.2", + "peft<0.9,>=0.6.2", "safetensors<0.5,>=0.4", - "torch<2.2,>=2.1.1", + "torch<2.3,>=2.1.1", "transformers<4.38,>=4.36", "trl<0.8,>=0.7.4", ] diff --git a/requirements/constraints.fine_tuning.txt b/requirements/constraints.fine_tuning.txt index be7eebc..d5d39d6 100644 --- a/requirements/constraints.fine_tuning.txt +++ b/requirements/constraints.fine_tuning.txt @@ -1,9 +1,9 @@ -accelerate<0.27,>=0.24.1 +accelerate<0.28,>=0.24.1 bitsandbytes<0.43,>=0.41.2 -datasets<2.17,>=2.15.0 +datasets<2.18,>=2.15.0 evaluate<0.5,>=0.4.1 -peft<0.8,>=0.6.2 +peft<0.9,>=0.6.2 safetensors<0.5,>=0.4.0 -torch<2.2,>=2.1.1 +torch<2.3,>=2.1.1 transformers<4.38,>=4.36 trl<0.8,>=0.7.4 diff --git a/requirements/constraints.txt b/requirements/constraints.txt index 64d6da0..2e4967c 100644 --- a/requirements/constraints.txt +++ b/requirements/constraints.txt @@ -1,10 +1,10 @@ chromadb<0.5,>=0.4.15 ctransformers<0.3,>=0.2.27 -gradio<4.17,>=4.12 +gradio<4.19,>=4.16 jq<1.7,>=1.6 -langchain<0.1.5,>=0.1.1 +langchain<0.1.7,>=0.1.1 numpydoc<1.7,>=1.6 -pydantic<2.6,>=2.4.2 -sentence-transformers<2.3,>=2.2.2 +pydantic<2.7,>=2.4.2 +sentence-transformers<2.4,>=2.2.2 transformers<4.38,>=4.36 typer<0.10,>=0.9 From 128d7aaf0e01c1c181b7fa2eb05661d2e215b641 Mon Sep 17 00:00:00 2001 From: Anirban Ray <39331844+yarnabrina@users.noreply.github.com> Date: Sun, 18 Feb 2024 19:38:53 +0530 Subject: [PATCH 06/26] updated dependencies --- pyproject.toml | 5 ++--- requirements/constraints.fine_tuning.txt | 1 - requirements/constraints.txt | 4 ++-- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6f47fff..d22da69 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,9 +44,9 @@ dynamic=[ dependencies = [ "chromadb<0.5,>=0.4.15", "ctransformers<0.3,>=0.2.27", - "gradio<4.19,>=4.16", + "gradio<4.20,>=4.16", "jq<1.7,>=1.6", - "langchain<0.1.7,>=0.1.1", + "langchain<0.2,>=0.1.1", "numpydoc<1.7,>=1.6", "pydantic<2.7,>=2.4.2", "sentence-transformers<2.4,>=2.2.2", @@ -97,7 +97,6 @@ fine-tuning = [ "peft<0.9,>=0.6.2", "safetensors<0.5,>=0.4", "torch<2.3,>=2.1.1", - "transformers<4.38,>=4.36", "trl<0.8,>=0.7.4", ] format = [ diff --git a/requirements/constraints.fine_tuning.txt b/requirements/constraints.fine_tuning.txt index d5d39d6..69b0e23 100644 --- a/requirements/constraints.fine_tuning.txt +++ b/requirements/constraints.fine_tuning.txt @@ -5,5 +5,4 @@ evaluate<0.5,>=0.4.1 peft<0.9,>=0.6.2 safetensors<0.5,>=0.4.0 torch<2.3,>=2.1.1 -transformers<4.38,>=4.36 trl<0.8,>=0.7.4 diff --git a/requirements/constraints.txt b/requirements/constraints.txt index 2e4967c..bdbdde0 100644 --- a/requirements/constraints.txt +++ b/requirements/constraints.txt @@ -1,8 +1,8 @@ chromadb<0.5,>=0.4.15 ctransformers<0.3,>=0.2.27 -gradio<4.19,>=4.16 +gradio<4.20,>=4.16 jq<1.7,>=1.6 -langchain<0.1.7,>=0.1.1 +langchain<0.2,>=0.1.1 numpydoc<1.7,>=1.6 pydantic<2.7,>=2.4.2 sentence-transformers<2.4,>=2.2.2 From 0abd3a472f2ca6e7a6f90631ef1a454e2a915865 Mon Sep 17 00:00:00 2001 From: Anirban Ray <39331844+yarnabrina@users.noreply.github.com> Date: Sat, 24 Feb 2024 01:05:00 +0530 Subject: [PATCH 07/26] updated dependencies --- pyproject.toml | 6 +++--- requirements/constraints.txt | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d22da69..1992e72 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,13 +44,13 @@ dynamic=[ dependencies = [ "chromadb<0.5,>=0.4.15", "ctransformers<0.3,>=0.2.27", - "gradio<4.20,>=4.16", + "gradio<4.20,>=4.19.2", "jq<1.7,>=1.6", "langchain<0.2,>=0.1.1", "numpydoc<1.7,>=1.6", "pydantic<2.7,>=2.4.2", - "sentence-transformers<2.4,>=2.2.2", - "transformers<4.38,>=4.36", + "sentence-transformers<2.5,>=2.2.2", + "transformers<4.39,>=4.36", "typer<0.10,>=0.9", ] [project.optional-dependencies] diff --git a/requirements/constraints.txt b/requirements/constraints.txt index bdbdde0..5f288b5 100644 --- a/requirements/constraints.txt +++ b/requirements/constraints.txt @@ -1,10 +1,10 @@ chromadb<0.5,>=0.4.15 ctransformers<0.3,>=0.2.27 -gradio<4.20,>=4.16 +gradio<4.20,>=4.19.2 jq<1.7,>=1.6 langchain<0.2,>=0.1.1 numpydoc<1.7,>=1.6 pydantic<2.7,>=2.4.2 -sentence-transformers<2.4,>=2.2.2 -transformers<4.38,>=4.36 +sentence-transformers<2.5,>=2.2.2 +transformers<4.39,>=4.36 typer<0.10,>=0.9 From 66043a6d2c40b91b8d4dbdf1870ee4f22bb5a89c Mon Sep 17 00:00:00 2001 From: Anirban Ray <39331844+yarnabrina@users.noreply.github.com> Date: Sat, 24 Feb 2024 01:16:25 +0530 Subject: [PATCH 08/26] pre-commit autoupdate --- .pre-commit-config.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6b4cf17..a9f1db3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -28,14 +28,14 @@ repos: - id: requirements-txt-fixer - id: trailing-whitespace - repo: https://github.com/asottile/pyupgrade - rev: v3.15.0 + rev: v3.15.1 hooks: - id: pyupgrade args: - --keep-runtime-typing - --py311-plus - repo: https://github.com/pycqa/autoflake - rev: v2.2.1 + rev: v2.3.0 hooks: - id: autoflake args: @@ -49,7 +49,7 @@ repos: - src pass_filenames: false - repo: https://github.com/psf/black - rev: 24.1.1 + rev: 24.2.0 hooks: - id: black additional_dependencies: @@ -88,7 +88,7 @@ repos: stages: - manual - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.2.1 + rev: v0.2.2 hooks: - id: ruff args: @@ -99,7 +99,7 @@ repos: - src pass_filenames: false - repo: https://github.com/jendrikseipp/vulture - rev: v2.10 + rev: v2.11 hooks: - id: vulture pass_filenames: false From 41240090445ceff75eaa39f21fc63087ba47a1de Mon Sep 17 00:00:00 2001 From: Anirban Ray <39331844+yarnabrina@users.noreply.github.com> Date: Sat, 24 Feb 2024 22:46:45 +0530 Subject: [PATCH 09/26] reduced langchain upper bound to handle return_full_text --- pyproject.toml | 2 +- requirements/constraints.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1992e72..f24ec08 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,7 @@ dependencies = [ "ctransformers<0.3,>=0.2.27", "gradio<4.20,>=4.19.2", "jq<1.7,>=1.6", - "langchain<0.2,>=0.1.1", + "langchain<0.1.8,>=0.1.1", "numpydoc<1.7,>=1.6", "pydantic<2.7,>=2.4.2", "sentence-transformers<2.5,>=2.2.2", diff --git a/requirements/constraints.txt b/requirements/constraints.txt index 5f288b5..af1bdaf 100644 --- a/requirements/constraints.txt +++ b/requirements/constraints.txt @@ -2,7 +2,7 @@ chromadb<0.5,>=0.4.15 ctransformers<0.3,>=0.2.27 gradio<4.20,>=4.19.2 jq<1.7,>=1.6 -langchain<0.2,>=0.1.1 +langchain<0.1.8,>=0.1.1 numpydoc<1.7,>=1.6 pydantic<2.7,>=2.4.2 sentence-transformers<2.5,>=2.2.2 From 09915dcb87005197ee40ad538d9f3d0b9acc4d5e Mon Sep 17 00:00:00 2001 From: Anirban Ray <39331844+yarnabrina@users.noreply.github.com> Date: Sun, 25 Feb 2024 15:36:11 +0530 Subject: [PATCH 10/26] added padding --- src/generative_ai/information_retrieval/step_2_retrieval.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/generative_ai/information_retrieval/step_2_retrieval.py b/src/generative_ai/information_retrieval/step_2_retrieval.py index ae610f7..2e2b7bb 100644 --- a/src/generative_ai/information_retrieval/step_2_retrieval.py +++ b/src/generative_ai/information_retrieval/step_2_retrieval.py @@ -112,6 +112,7 @@ def create_llm(language_model: LanguageModel) -> CTransformers | HuggingFacePipe use_fast=True, padding="max_length", truncation=True, + padding_side="left", ) tokeniser.pad_token = tokeniser.eos_token From ccab23b5d8b016fc83a06bbf30acd319fd988ed8 Mon Sep 17 00:00:00 2001 From: Anirban Ray <39331844+yarnabrina@users.noreply.github.com> Date: Sat, 2 Mar 2024 13:56:56 +0530 Subject: [PATCH 11/26] updated dependencies --- pyproject.toml | 6 +++--- requirements/constraints.fine_tuning.txt | 4 ++-- requirements/constraints.txt | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f24ec08..43b2ac5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,7 +49,7 @@ dependencies = [ "langchain<0.1.8,>=0.1.1", "numpydoc<1.7,>=1.6", "pydantic<2.7,>=2.4.2", - "sentence-transformers<2.5,>=2.2.2", + "sentence-transformers<2.6,>=2.2.2", "transformers<4.39,>=4.36", "typer<0.10,>=0.9", ] @@ -92,9 +92,9 @@ doc = [ fine-tuning = [ "accelerate<0.28,>=0.24.1", "bitsandbytes<0.43,>=0.41.2", - "datasets<2.18,>=2.15", + "datasets<2.19,>=2.15", "evaluate<0.5,>=0.4.1", - "peft<0.9,>=0.6.2", + "peft<0.10,>=0.6.2", "safetensors<0.5,>=0.4", "torch<2.3,>=2.1.1", "trl<0.8,>=0.7.4", diff --git a/requirements/constraints.fine_tuning.txt b/requirements/constraints.fine_tuning.txt index 69b0e23..9ff341c 100644 --- a/requirements/constraints.fine_tuning.txt +++ b/requirements/constraints.fine_tuning.txt @@ -1,8 +1,8 @@ accelerate<0.28,>=0.24.1 bitsandbytes<0.43,>=0.41.2 -datasets<2.18,>=2.15.0 +datasets<2.19,>=2.15.0 evaluate<0.5,>=0.4.1 -peft<0.9,>=0.6.2 +peft<0.10,>=0.6.2 safetensors<0.5,>=0.4.0 torch<2.3,>=2.1.1 trl<0.8,>=0.7.4 diff --git a/requirements/constraints.txt b/requirements/constraints.txt index af1bdaf..5949c85 100644 --- a/requirements/constraints.txt +++ b/requirements/constraints.txt @@ -5,6 +5,6 @@ jq<1.7,>=1.6 langchain<0.1.8,>=0.1.1 numpydoc<1.7,>=1.6 pydantic<2.7,>=2.4.2 -sentence-transformers<2.5,>=2.2.2 +sentence-transformers<2.6,>=2.2.2 transformers<4.39,>=4.36 typer<0.10,>=0.9 From 07eea2c0dc9d57186c152b53f1d8e608f76a2b9c Mon Sep 17 00:00:00 2001 From: Anirban Ray <39331844+yarnabrina@users.noreply.github.com> Date: Sun, 3 Mar 2024 00:26:39 +0530 Subject: [PATCH 12/26] removed duplicated dependency --- requirements/requirements.fine_tuning.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements/requirements.fine_tuning.txt b/requirements/requirements.fine_tuning.txt index 39d3445..727a4ab 100644 --- a/requirements/requirements.fine_tuning.txt +++ b/requirements/requirements.fine_tuning.txt @@ -5,5 +5,4 @@ evaluate peft safetensors torch -transformers trl From 519e64fe129290651cf647f829dae8c54c23bd48 Mon Sep 17 00:00:00 2001 From: Anirban Ray <39331844+yarnabrina@users.noreply.github.com> Date: Mon, 4 Mar 2024 00:03:42 +0530 Subject: [PATCH 13/26] added step 1 --- .../fine_tuning/v3_opt_350m/step_1.ipynb | 122 ++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 src/generative_ai/fine_tuning/v3_opt_350m/step_1.ipynb diff --git a/src/generative_ai/fine_tuning/v3_opt_350m/step_1.ipynb b/src/generative_ai/fine_tuning/v3_opt_350m/step_1.ipynb new file mode 100644 index 0000000..66c3284 --- /dev/null +++ b/src/generative_ai/fine_tuning/v3_opt_350m/step_1.ipynb @@ -0,0 +1,122 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import pathlib\n", + "import shutil\n", + "\n", + "from datasets import Dataset, DatasetDict" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "step_identifier = pathlib.Path(\"step_1\")\n", + "\n", + "input_directory = pathlib.Path(step_identifier, \"input_directory\")\n", + "working_directory = pathlib.Path(step_identifier, \"working_directory\")\n", + "output_directory = pathlib.Path(step_identifier, \"output_directory\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "raw_dataset_path = pathlib.Path(input_directory, \"json_documents.json\")\n", + "\n", + "hugging_face_dataset_path = pathlib.Path(working_directory, \"hugging_face_dataset_directory\")\n", + "hugging_face_dataset_archive = pathlib.Path(output_directory, \"hugging_face_dataset_archive.zip\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with raw_dataset_path.open(encoding=\"utf-8\") as file_object:\n", + " raw_dataset = json.load(file_object)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset_splits = {split_type: [] for split_type in [\"train\", \"validation\", \"test\"]}\n", + "\n", + "for document in raw_dataset[\"tuning_documents\"]:\n", + " dataset_splits[document[\"split\"]].append(document)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "hugging_face_dataset = DatasetDict(\n", + " {\n", + " split_type: Dataset.from_list(split_data)\n", + " for split_type, split_data in dataset_splits.items()\n", + " }\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "hugging_face_dataset.save_to_disk(hugging_face_dataset_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "_ = shutil.make_archive(\n", + " str(pathlib.Path(hugging_face_dataset_archive.parent, hugging_face_dataset_archive.stem)),\n", + " hugging_face_dataset_archive.suffix[1:],\n", + " root_dir=working_directory,\n", + " base_dir=hugging_face_dataset_path.stem,\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "genai", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 996add84744ee5327b0606f94636536309f4dd0a Mon Sep 17 00:00:00 2001 From: Anirban Ray <39331844+yarnabrina@users.noreply.github.com> Date: Sun, 10 Mar 2024 15:41:26 +0530 Subject: [PATCH 14/26] updated dependencies --- pyproject.toml | 6 ++++-- requirements/constraints.fine_tuning.txt | 5 ++++- requirements/constraints.txt | 2 +- requirements/requirements.fine_tuning.txt | 2 ++ 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 43b2ac5..5d9d592 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,7 +44,7 @@ dynamic=[ dependencies = [ "chromadb<0.5,>=0.4.15", "ctransformers<0.3,>=0.2.27", - "gradio<4.20,>=4.19.2", + "gradio<4.22,>=4.19.2", "jq<1.7,>=1.6", "langchain<0.1.8,>=0.1.1", "numpydoc<1.7,>=1.6", @@ -91,11 +91,13 @@ doc = [ ] fine-tuning = [ "accelerate<0.28,>=0.24.1", - "bitsandbytes<0.43,>=0.41.2", + "bitsandbytes<0.44,>=0.41.2", "datasets<2.19,>=2.15", "evaluate<0.5,>=0.4.1", "peft<0.10,>=0.6.2", + "rouge-score<0.2,>=0.1.2", "safetensors<0.5,>=0.4", + "scikit-learn<1.5,>=1.3", "torch<2.3,>=2.1.1", "trl<0.8,>=0.7.4", ] diff --git a/requirements/constraints.fine_tuning.txt b/requirements/constraints.fine_tuning.txt index 9ff341c..dd26e39 100644 --- a/requirements/constraints.fine_tuning.txt +++ b/requirements/constraints.fine_tuning.txt @@ -1,8 +1,11 @@ accelerate<0.28,>=0.24.1 -bitsandbytes<0.43,>=0.41.2 +bitsandbytes<0.44,>=0.41.2 datasets<2.19,>=2.15.0 evaluate<0.5,>=0.4.1 +nltk<3.9,>=3.8.1 peft<0.10,>=0.6.2 +rouge-score<0.2,>=0.1.2 safetensors<0.5,>=0.4.0 +scikit-learn<1.5,>=1.3.0 torch<2.3,>=2.1.1 trl<0.8,>=0.7.4 diff --git a/requirements/constraints.txt b/requirements/constraints.txt index 5949c85..604b111 100644 --- a/requirements/constraints.txt +++ b/requirements/constraints.txt @@ -1,6 +1,6 @@ chromadb<0.5,>=0.4.15 ctransformers<0.3,>=0.2.27 -gradio<4.20,>=4.19.2 +gradio<4.22,>=4.19.2 jq<1.7,>=1.6 langchain<0.1.8,>=0.1.1 numpydoc<1.7,>=1.6 diff --git a/requirements/requirements.fine_tuning.txt b/requirements/requirements.fine_tuning.txt index 727a4ab..27ce214 100644 --- a/requirements/requirements.fine_tuning.txt +++ b/requirements/requirements.fine_tuning.txt @@ -2,7 +2,9 @@ accelerate bitsandbytes datasets evaluate +nltk peft +rouge-score safetensors torch trl From 812b73b69ff1999fa0083147642864b45f7375f1 Mon Sep 17 00:00:00 2001 From: Anirban Ray <39331844+yarnabrina@users.noreply.github.com> Date: Sun, 10 Mar 2024 21:45:43 +0530 Subject: [PATCH 15/26] added step 2 --- .../fine_tuning/v3_opt_350m/step_2.ipynb | 420 ++++++++++++++++++ 1 file changed, 420 insertions(+) create mode 100644 src/generative_ai/fine_tuning/v3_opt_350m/step_2.ipynb diff --git a/src/generative_ai/fine_tuning/v3_opt_350m/step_2.ipynb b/src/generative_ai/fine_tuning/v3_opt_350m/step_2.ipynb new file mode 100644 index 0000000..1e4c0d7 --- /dev/null +++ b/src/generative_ai/fine_tuning/v3_opt_350m/step_2.ipynb @@ -0,0 +1,420 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import pathlib\n", + "import shutil\n", + "\n", + "import numpy\n", + "from datasets import load_from_disk\n", + "from evaluate import load\n", + "from peft import LoraConfig, PeftType, TaskType\n", + "from sklearn.metrics import accuracy_score, f1_score, fbeta_score, precision_score, recall_score\n", + "from torch import Tensor, float16\n", + "from transformers import (\n", + " AutoModelForCausalLM,\n", + " AutoTokenizer,\n", + " BitsAndBytesConfig,\n", + " EarlyStoppingCallback,\n", + " EvalPrediction,\n", + " SchedulerType,\n", + " TrainingArguments,\n", + ")\n", + "from trl import DataCollatorForCompletionOnlyLM, SFTTrainer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "step_identifier = pathlib.Path(\"step_2\")\n", + "\n", + "input_directory = pathlib.Path(step_identifier, \"input_directory\")\n", + "working_directory = pathlib.Path(step_identifier, \"working_directory\")\n", + "output_directory = pathlib.Path(step_identifier, \"output_directory\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "hugging_face_dataset_archive = pathlib.Path(input_directory, \"hugging_face_dataset_archive.zip\")\n", + "hugging_face_dataset_path = pathlib.Path(working_directory, \"hugging_face_dataset_directory\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "shutil.unpack_archive(hugging_face_dataset_archive, extract_dir=working_directory)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "hugging_face_dataset = load_from_disk(hugging_face_dataset_path)\n", + "\n", + "train_subset = hugging_face_dataset[\"train\"]\n", + "validation_subset = hugging_face_dataset[\"validation\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "base_model_identifier = \"facebook/opt-350m\"\n", + "\n", + "quantisation_configuration = BitsAndBytesConfig(\n", + " load_in_4bit=True,\n", + " bnb_4bit_compute_dtype=float16,\n", + " bnb_4bit_quant_type=\"nf4\",\n", + " bnb_4bit_use_double_quant=True,\n", + ")\n", + "\n", + "mask_token_index = -100" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tuning_checkpoints_path = pathlib.Path(working_directory, \"tuning_checkpoints_directory\")\n", + "tuning_checkpoints_archive = pathlib.Path(output_directory, \"tuning_checkpoints_archive.zip\")\n", + "\n", + "tuned_adapter_path = pathlib.Path(working_directory, \"tuned_adapter_directory\")\n", + "tuned_adapter_archive = pathlib.Path(output_directory, \"tuned_adapter_archive.zip\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = AutoModelForCausalLM.from_pretrained(\n", + " base_model_identifier,\n", + " quantization_config=quantisation_configuration,\n", + " device_map=\"auto\",\n", + " low_cpu_mem_usage=True,\n", + ")\n", + "\n", + "model.config.use_cache = False\n", + "model.config.pretraining_tp = 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tokeniser = AutoTokenizer.from_pretrained(base_model_identifier)\n", + "\n", + "tokeniser.pad_token = tokeniser.eos_token\n", + "tokeniser.padding_side = \"right\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "peft_configuration = LoraConfig(\n", + " peft_type=PeftType.LORA,\n", + " task_type=TaskType.CAUSAL_LM,\n", + " r=8,\n", + " lora_alpha=16,\n", + " lora_dropout=0.1,\n", + " bias=\"none\",\n", + " use_rslora=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "training_configuration = TrainingArguments(\n", + " output_dir=str(tuning_checkpoints_path),\n", + " overwrite_output_dir=True,\n", + " evaluation_strategy=\"epoch\",\n", + " gradient_accumulation_steps=1,\n", + " eval_delay=3,\n", + " learning_rate=1e-4,\n", + " weight_decay=0.001,\n", + " max_grad_norm=0.3,\n", + " num_train_epochs=50,\n", + " lr_scheduler_type=SchedulerType.REDUCE_ON_PLATEAU,\n", + " warmup_ratio=0.03,\n", + " log_level=\"error\",\n", + " logging_strategy=\"epoch\",\n", + " save_strategy=\"epoch\",\n", + " save_total_limit=5,\n", + " save_safetensors=True,\n", + " save_only_model=True,\n", + " use_cpu=False,\n", + " seed=0,\n", + " data_seed=0,\n", + " fp16=True,\n", + " half_precision_backend=\"auto\",\n", + " fp16_full_eval=False,\n", + " load_best_model_at_end=True,\n", + " metric_for_best_model=\"eval_google_bleu\",\n", + " greater_is_better=True,\n", + " optim=\"paged_adamw_32bit\",\n", + " group_by_length=True,\n", + " report_to=[\"none\"],\n", + " skip_memory_metrics=True,\n", + " push_to_hub=False,\n", + " auto_find_batch_size=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "context_template = \" ### Context:\"\n", + "question_template = \" ### Question:\"\n", + "answer_template = \" ### Answer:\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def format_inputs(examples: list[dict[str, str]]) -> list[str]:\n", + " return [\n", + " \"\\n\".join(\n", + " [\n", + " f\"{context_template} {examples['context'][counter]}\",\n", + " f\"{question_template} {examples['question'][counter]}\",\n", + " f\"{answer_template} {examples['answer'][counter]}\",\n", + " ]\n", + " )\n", + " for counter in range(len(examples))\n", + " ]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "response_template_with_context = f\"\\n{answer_template}\"\n", + "response_template_token_indices = tokeniser.encode(\n", + " response_template_with_context, add_special_tokens=False\n", + ")[2:]\n", + "\n", + "collator = DataCollatorForCompletionOnlyLM(\n", + " response_template_token_indices, tokenizer=tokeniser, ignore_index=mask_token_index\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bleu_metric = load(\"bleu\", module_type=\"metric\")\n", + "google_bleu_metric = load(\"google_bleu\", module_type=\"metric\")\n", + "rouge_metric = load(\"rouge\", module_type=\"metric\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_positions_of_most_likely_token(logits: Tensor, labels: Tensor | None) -> Tensor:\n", + " del labels\n", + "\n", + " if isinstance(logits, tuple):\n", + " logits = logits[0]\n", + "\n", + " return logits.argmax(dim=-1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def calculate_multi_class_classification_metrics(\n", + " y_true: numpy.ndarray, y_pred: numpy.ndarray\n", + ") -> dict[str, float]:\n", + " accuracy = accuracy_score(y_true, y_pred, normalize=False)\n", + "\n", + " precision = precision_score(y_true, y_pred, average=\"micro\", zero_division=1)\n", + " recall = recall_score(y_true, y_pred, average=\"micro\", zero_division=1)\n", + "\n", + " f1_balanced = f1_score(y_true, y_pred, average=\"micro\", zero_division=1)\n", + " f1_precision = fbeta_score(y_true, y_pred, beta=0.5, average=\"micro\", zero_division=1)\n", + " f1_recall = fbeta_score(y_true, y_pred, beta=2, average=\"micro\", zero_division=1)\n", + "\n", + " return {\n", + " \"accuracy\": accuracy,\n", + " \"precision\": precision,\n", + " \"recall\": recall,\n", + " \"f1_balanced\": f1_balanced,\n", + " \"f1_precision\": f1_precision,\n", + " \"f1_recall\": f1_recall,\n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def track_validation_metrics(validation_outputs: EvalPrediction) -> dict[str, float]:\n", + " predictions = validation_outputs.predictions\n", + " labels = validation_outputs.label_ids\n", + "\n", + " if isinstance(predictions, tuple):\n", + " predictions = predictions[0]\n", + "\n", + " predictions = numpy.where(predictions != mask_token_index, predictions, tokeniser.pad_token_id)\n", + " labels = numpy.where(labels != mask_token_index, labels, tokeniser.pad_token_id)\n", + "\n", + " decoded_predictions = tokeniser.batch_decode(predictions, skip_special_tokens=True)\n", + " decoded_labels = tokeniser.batch_decode(labels, skip_special_tokens=True)\n", + "\n", + " bleu_score = bleu_metric.compute(predictions=decoded_predictions, references=decoded_labels)\n", + " google_bleu_score = google_bleu_metric.compute(\n", + " predictions=decoded_predictions, references=decoded_labels\n", + " )\n", + " rouge_score = rouge_metric.compute(predictions=decoded_predictions, references=decoded_labels)\n", + "\n", + " classification_scores = calculate_multi_class_classification_metrics(\n", + " labels.flatten(), predictions.flatten()\n", + " )\n", + "\n", + " return {**bleu_score, **google_bleu_score, **rouge_score, **classification_scores}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "early_stopping_callback = EarlyStoppingCallback(\n", + " early_stopping_patience=10, early_stopping_threshold=0.000001\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "supervised_trainer = SFTTrainer(\n", + " model=model,\n", + " args=training_configuration,\n", + " data_collator=collator,\n", + " train_dataset=train_subset,\n", + " eval_dataset=validation_subset,\n", + " tokenizer=tokeniser,\n", + " compute_metrics=track_validation_metrics,\n", + " callbacks=[early_stopping_callback],\n", + " preprocess_logits_for_metrics=get_positions_of_most_likely_token,\n", + " peft_config=peft_configuration,\n", + " formatting_func=format_inputs,\n", + " packing=False,\n", + " max_seq_length=512,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "supervised_trainer.train()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "supervised_trainer.model.save_pretrained(tuned_adapter_path, safe_serialization=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "_ = shutil.make_archive(\n", + " str(pathlib.Path(tuned_adapter_archive.parent, tuned_adapter_archive.stem)),\n", + " tuned_adapter_archive.suffix[1:],\n", + " root_dir=working_directory,\n", + " base_dir=tuned_adapter_path.stem,\n", + ")\n", + "\n", + "_ = shutil.make_archive(\n", + " str(pathlib.Path(tuning_checkpoints_archive.parent, tuning_checkpoints_archive.stem)),\n", + " tuning_checkpoints_archive.suffix[1:],\n", + " root_dir=working_directory,\n", + " base_dir=tuning_checkpoints_path.stem,\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "genai", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 772696d4fdf94248e26e26321efce75b43721c43 Mon Sep 17 00:00:00 2001 From: Anirban Ray <39331844+yarnabrina@users.noreply.github.com> Date: Wed, 13 Mar 2024 09:37:32 +0530 Subject: [PATCH 16/26] updated dependencies --- pyproject.toml | 2 +- requirements/constraints.fine_tuning.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5d9d592..580c481 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -90,7 +90,7 @@ doc = [ "sphinx-copybutton", ] fine-tuning = [ - "accelerate<0.28,>=0.24.1", + "accelerate<0.29,>=0.24.1", "bitsandbytes<0.44,>=0.41.2", "datasets<2.19,>=2.15", "evaluate<0.5,>=0.4.1", diff --git a/requirements/constraints.fine_tuning.txt b/requirements/constraints.fine_tuning.txt index dd26e39..4004e32 100644 --- a/requirements/constraints.fine_tuning.txt +++ b/requirements/constraints.fine_tuning.txt @@ -1,4 +1,4 @@ -accelerate<0.28,>=0.24.1 +accelerate<0.29,>=0.24.1 bitsandbytes<0.44,>=0.41.2 datasets<2.19,>=2.15.0 evaluate<0.5,>=0.4.1 From 3616a3c183192a7211773cb655f12fb34f0645b4 Mon Sep 17 00:00:00 2001 From: Anirban Ray <39331844+yarnabrina@users.noreply.github.com> Date: Wed, 13 Mar 2024 09:40:53 +0530 Subject: [PATCH 17/26] updated dataset generation --- .../dataset_generation/step_2_generation.py | 4452 ++++++++--------- .../dataset_generation/utils_generation.py | 24 +- 2 files changed, 2091 insertions(+), 2385 deletions(-) diff --git a/src/generative_ai/dataset_generation/step_2_generation.py b/src/generative_ai/dataset_generation/step_2_generation.py index 0919078..94011dc 100644 --- a/src/generative_ai/dataset_generation/step_2_generation.py +++ b/src/generative_ai/dataset_generation/step_2_generation.py @@ -1,6 +1,7 @@ """Define functionalities to generate retrieval and tuning sources.""" import inspect +import itertools import logging import random @@ -9,6 +10,7 @@ from .utils_generation import ( ClassDetails, Dataset, + Document, EnumDetails, FunctionDetails, MemberDetails, @@ -32,24 +34,32 @@ @pydantic.validate_call(validate_return=True) -def allocate_tuning_pairs( - tuning_pairs: list[tuple[str, str]], +def allocate_tuning_triplets( + context: str, + questions: list[str], + answers: list[str], split_proportions: SplitProportions = DEFAULT_SPLIT_PROPORTIONS, -) -> list[tuple[str, str, SplitName]]: - """Allocate tuning pairs to different splits. +) -> list[Document]: + """Allocate tuning triplets to different splits. Parameters ---------- - tuning_pairs : list[tuple[str, str]] - question and answer pairs to be allocated to different splits + context : str + source of information + questions : list[str] + queries from ``context`` + answers : list[str] + responses based on ``context`` split_proportions : SplitProportions, optional chance of a pair to be allocated to different splits, by default DEFAULT_SPLIT_PROPORTIONS Returns ------- - list[tuple[str, str, SplitName]] - updated tuning pairs with split allocation + list[Document] + updated tuning triplets with split allocation """ + question_answer_pairs = list(itertools.product(questions, answers)) + allocations = random.choices( # noqa: S311 [SplitName.TRAIN, SplitName.VALIDATION, SplitName.TEST], weights=[ @@ -57,12 +67,12 @@ def allocate_tuning_pairs( split_proportions.validation_proportion, split_proportions.test_proportion, ], - k=len(tuning_pairs), + k=len(question_answer_pairs), ) return [ - (question, answer, allocation) - for (question, answer), allocation in zip(tuning_pairs, allocations, strict=True) + Document(context=context, question=question, answer=answer, split=allocation) + for (question, answer), allocation in zip(question_answer_pairs, allocations, strict=True) ] @@ -121,510 +131,444 @@ def generate_package_dataset(package_contents: PackageDetails) -> Dataset: # no package = f"'{package_name}' package" package_retrieval_chunks: list[str] = [f"'{package_name}' is a Python package."] - package_tuning_pairs: list[tuple[str, str, SplitName]] = [] + package_tuning_documents: list[Document] = [] if (parent_package := package_contents.parent_package_name) is None: - root_package_pairs = [ - ("What is the root package?", f"'{package_name}' is the root package."), - ( - "Can you tell me what the root package is?", - f"Sure, the root package is '{package_name}'.", - ), - ( - "I'm trying to find out the root package. Can you help?", - f"Of course, the root package is '{package_name}'.", - ), - ( - "Do you know what the root package is?", - f"Yes, the root package is '{package_name}'.", - ), - ( - "I'd like to know the root package.", - f"The root package you're asking about is '{package_name}'.", - ), - ( - "Could you identify the root package?", - f"Certainly, '{package_name}' is the root package.", - ), - ] - package_retrieval_chunks.append(f"'{package_name}' is the root package.") - package_tuning_pairs.extend(allocate_tuning_pairs(root_package_pairs)) - - parent_package_pairs = [ - ( - f"Name parent package of '{package_name}'.", - f"Being the root package, '{package_name}' has no parent package.", - ), - ( - f"What is the parent package of '{package_name}'?", - f"The root package '{package_name}' does not have a parent package.", - ), - ( - f"Can you tell me the parent package of '{package_name}'?", - f"'{package_name}' is a root package and therefore," - " it does not have a parent package.", - ), - ( - f"Could you identify the parent package of '{package_name}'?", - f"As a root package, '{package_name}' does not possess a parent package.", - ), - ( - f"I'm looking for the parent package of '{package_name}'. Can you help?", - f"Sure, '{package_name}' is a root package, so it doesn't have a parent package.", - ), - ( - f"Do you know the parent package of '{package_name}'?", - f"Yes, '{package_name}' is a root package and hence," - " it doesn't have a parent package.", - ), - ] - package_retrieval_chunks.append(f"'{package_name}' has no parent package.") - package_tuning_pairs.extend(allocate_tuning_pairs(parent_package_pairs)) + root_package_context = f"'{package_name}' is the root package." + root_package_questions = [ + "What is the root package?", + "Can you tell me what the root package is?", + "I'm trying to find out the root package. Can you help?", + "Do you know what the root package is?", + "I'd like to know the root package.", + "Could you identify the root package?", + ] + root_package_answers = [ + f"'{package_name}' is the root package.", + f"The root package is '{package_name}'.", + f"The root package you're asking about is '{package_name}'.", + ] + + package_retrieval_chunks.append(root_package_context) + package_tuning_documents.extend( + allocate_tuning_triplets( + root_package_context, root_package_questions, root_package_answers + ) + ) + + parent_package_context = f"'{package_name}' has no parent package." + parent_package_questions = [ + f"Name parent package of '{package_name}'.", + f"What is the parent package of '{package_name}'?", + f"Can you tell me the parent package of '{package_name}'?", + f"Could you identify the parent package of '{package_name}'?", + f"I'm looking for the parent package of '{package_name}'. Can you help?", + f"Do you know the parent package of '{package_name}'?", + ] + parent_package_answers = [ + f"Being the root package, '{package_name}' has no parent package.", + f"The root package '{package_name}' does not have a parent package.", + f"'{package_name}' is a root package and therefore it does not have a parent package.", + f"As a root package, '{package_name}' does not possess a parent package.", + f"'{package_name}' is a root package, so it doesn't have a parent package.", + f"'{package_name}' is a root package and hence it doesn't have a parent package.", + ] + + package_retrieval_chunks.append(parent_package_context) + package_tuning_documents.extend( + allocate_tuning_triplets( + parent_package_context, parent_package_questions, parent_package_answers + ) + ) else: - parent_package_pairs = [ - ( - f"Name parent package of '{package_name}' sub-package.", - f"'{parent_package}' is the full name of its parent package.", - ), - ( - f"What is the parent package of the '{package_name}' sub-package?", - f"The parent package of '{package_name}' is '{parent_package}'.", - ), - ( - f"Could you tell me the parent package of '{package_name}'?", - f"Sure, the parent package of '{package_name}' is '{parent_package}'.", - ), - ( - f"I need to know the parent package of '{package_name}'.", - f"The parent package of '{package_name}' is '{parent_package}'.", - ), - ( - f"Identify the parent package for the '{package_name}' sub-package.", - f"The parent package for '{package_name}' is identified as '{parent_package}'.", - ), - ( - f"Can you name the parent package of the '{package_name}' sub-package?", - f"Yes, the parent package of '{package_name}' is '{parent_package}'.", - ), - ] - package_retrieval_chunks.append( - f"'{package_name}' is part of parent package '{parent_package}'." - ) - package_tuning_pairs.extend(allocate_tuning_pairs(parent_package_pairs)) - - package_full_name_pairs = [ - ( - f"Tell the full name of '{package_name}' sub-package.", - f"'{package_full_name}' is the fully qualified name of '{package_name}'.", - ), - ( - f"What is the fully qualified name of the '{package_name}' sub-package?", - f"Fully qualified name of '{package_name}' sub-package is '{package_full_name}'.", - ), - ( - f"Could you provide the full name of the '{package_name}' sub-package?", - f"Sure, the full name of '{package_name}' sub-package is '{package_full_name}'.", - ), - ( - f"I need the full name of the '{package_name}' sub-package. Can you tell me?", - f"Of course, full name of '{package_name}' sub-package is '{package_full_name}'.", - ), - ( - f"Can you inform me about the full name of the '{package_name}' sub-package?", - f"Certainly, full name of '{package_name}' sub-package is '{package_full_name}'.", - ), - ( - f"Please, reveal the full name of the '{package_name}' sub-package.", - f"Absolutely, full name of '{package_name}' sub-package is '{package_full_name}'.", - ), - ] - package_retrieval_chunks.append( + parent_package_context = f"'{package_name}' is part of parent package '{parent_package}'." + parent_package_questions = [ + f"Name parent package of '{package_name}' sub-package.", + f"What is the parent package of the '{package_name}' sub-package?", + f"Could you tell me the parent package of '{package_name}'?", + f"I need to know the parent package of '{package_name}'.", + f"Identify the parent package for the '{package_name}' sub-package.", + f"Can you name the parent package of the '{package_name}' sub-package?", + ] + parent_package_answers = [ + f"'{parent_package}' is the full name of its parent package.", + f"The parent package of '{package_name}' is '{parent_package}'.", + f"The parent package for '{package_name}' is identified as '{parent_package}'.", + ] + + package_retrieval_chunks.append(parent_package_context) + package_tuning_documents.extend( + allocate_tuning_triplets( + parent_package_context, parent_package_questions, parent_package_answers + ) + ) + + package_full_name_context = ( f"Full name of '{package_name}' sub-package is '{package_full_name}'." ) - package_tuning_pairs.extend(allocate_tuning_pairs(package_full_name_pairs)) + package_full_name_questions = [ + f"Tell the full name of '{package_name}' sub-package.", + f"What is the fully qualified name of the '{package_name}' sub-package?", + f"Could you provide the full name of the '{package_name}' sub-package?", + f"I need the full name of the '{package_name}' sub-package. Can you tell me?", + f"Can you inform me about the full name of the '{package_name}' sub-package?", + f"Please, reveal the full name of the '{package_name}' sub-package.", + ] + package_full_name_answers = [ + f"'{package_full_name}' is the fully qualified name of '{package_name}'.", + f"Fully qualified name of '{package_name}' sub-package is '{package_full_name}'.", + f"The full name of '{package_name}' sub-package is '{package_full_name}'.", + ] + + package_retrieval_chunks.append(package_full_name_context) + package_tuning_documents.extend( + allocate_tuning_triplets( + package_full_name_context, package_full_name_questions, package_full_name_answers + ) + ) package_hierarchy = enumerate_array_elements(package_contents.package_hierarchy) - package_hierarchy_pairs = [ - ( - f"What is the hierarchy of {package}?", - f"The hierarchy of {package} is as follows: {package_hierarchy}.", - ), - ( - f"Can you explain the hierarchy of the {package}?", - f"Sure, the hierarchy of the {package} is: {package_hierarchy}.", - ), - ( - f"Could you describe the structure of the {package}?", - f"Of course, the structure of {package} is: {package_hierarchy}.", - ), - ( - f"I need to understand the hierarchy of {package}. Can you help?", - f"Absolutely, the hierarchy of {package} is: {package_hierarchy}.", - ), - ( - f"Please provide the hierarchy of the {package}.", - f"The hierarchy of the {package} is: {package_hierarchy}.", - ), - ( - f"I'm interested in the structure of the {package}. What is it?", - f"The structure of {package} is as follows: {package_hierarchy}.", - ), - ] - package_retrieval_chunks.append( - f"Hierarchy of {package} is as follows: {package_hierarchy}." - ) - package_tuning_pairs.extend(allocate_tuning_pairs(package_hierarchy_pairs)) + + package_hierarchy_context = f"Hierarchy of {package} is as follows: {package_hierarchy}." + package_hierarchy_questions = [ + f"What is the hierarchy of {package}?", + f"Can you explain the hierarchy of the {package}?", + f"Could you describe the structure of the {package}?", + f"I need to understand the hierarchy of {package}. Can you help?", + f"Please provide the hierarchy of the {package}.", + f"I'm interested in the structure of the {package}. What is it?", + ] + package_hierarchy_answers = [ + f"The hierarchy of {package} is as follows: {package_hierarchy}.", + f"The hierarchy of {package} is: {package_hierarchy}.", + f"The structure of {package} is: {package_hierarchy}.", + f"The structure of {package} is as follows: {package_hierarchy}.", + ] + + package_retrieval_chunks.append(package_hierarchy_context) + package_tuning_documents.extend( + allocate_tuning_triplets( + package_hierarchy_context, package_hierarchy_questions, package_hierarchy_answers + ) + ) if not (children_sub_packages := package_contents.children_sub_packages_names): - package_sub_package_pairs = [ - ( - f"List the sub-packages of {package}.", - f"{package} does not have any further sub-packages.", - ), - ( - f"What are the sub-packages of the {package}?", - f"The {package} does not contain any sub-packages.", - ), - ( - f"Could you tell me the sub-packages of {package}?", - f"I'm sorry, but the {package} doesn't have any sub-packages.", - ), - ( - f"I need to know the sub-packages of {package}. Can you list them?", - f"Unfortunately, {package} doesn't include any sub-packages.", - ), - ( - f"Can you provide a list of sub-packages for the {package}?", - f"There are no sub-packages in the {package}.", - ), - ( - f"Identify the sub-packages of {package}.", - f"No sub-packages are present in the {package}.", - ), - ] - package_retrieval_chunks.append(f"{package} does not have any further sub-packages.") - package_tuning_pairs.extend(allocate_tuning_pairs(package_sub_package_pairs)) + package_sub_package_context = f"{package} does not have any further sub-packages." + package_sub_package_questions = [ + f"List the sub-packages of {package}.", + f"What are the sub-packages of the {package}?", + f"Could you tell me the sub-packages of {package}?", + f"I need to know the sub-packages of {package}. Can you list them?", + f"Can you provide a list of sub-packages for the {package}?", + f"Identify the sub-packages of {package}.", + ] + package_sub_package_answers = [ + f"{package} does not have any further sub-packages.", + f"The {package} does not contain any sub-packages.", + f"The {package} doesn't have any sub-packages.", + f"{package} doesn't include any sub-packages.", + f"There are no sub-packages in the {package}.", + f"No sub-packages are present in the {package}.", + ] + + package_retrieval_chunks.append(package_sub_package_context) + package_tuning_documents.extend( + allocate_tuning_triplets( + package_sub_package_context, + package_sub_package_questions, + package_sub_package_answers, + ) + ) else: children_sub_packages_count = len(children_sub_packages) - children_sub_packages_count_pairs = [ - ( - f"How many sub-packages are there in {package}?", - f"{package} has {children_sub_packages_count} many sub-packages.", - ), - ( - f"What is the count of sub-packages in {package}?", - f"The count of sub-packages in {package} is {children_sub_packages_count}.", - ), - ( - f"Could you tell me the number of sub-packages available in {package}?", - f"{package} has {children_sub_packages_count} sub-packages.", - ), - ( - f"Please provide the count of sub-packages for {package}.", - f"Number of sub-packages in {package} is {children_sub_packages_count}.", - ), - ( - f"Tell me the quantity of sub-packages present in {package}.", - f"{package} has {children_sub_packages_count} sub-packages.", - ), - ( - f"Would you mind letting me know how many sub-packages {package} contains?", - f"{package} contains {children_sub_packages_count} sub-packages.", - ), - ] - package_retrieval_chunks.append( + + children_sub_packages_count_context = ( f"{package} has {children_sub_packages_count} many sub-packages." ) - package_tuning_pairs.extend(allocate_tuning_pairs(children_sub_packages_count_pairs)) + children_sub_packages_count_questions = [ + f"How many sub-packages are there in {package}?", + f"What is the count of sub-packages in {package}?", + f"Could you tell me the number of sub-packages available in {package}?", + f"Please provide the count of sub-packages for {package}.", + f"Tell me the quantity of sub-packages present in {package}.", + f"Would you mind letting me know how many sub-packages {package} contains?", + ] + children_sub_packages_count_answers = [ + f"{package} has {children_sub_packages_count} many sub-packages.", + f"The count of sub-packages in {package} is {children_sub_packages_count}.", + f"{package} has {children_sub_packages_count} sub-packages.", + f"Number of sub-packages in {package} is {children_sub_packages_count}.", + f"{package} has {children_sub_packages_count} sub-packages.", + f"{package} contains {children_sub_packages_count} sub-packages.", + ] + + package_retrieval_chunks.append(children_sub_packages_count_context) + package_tuning_documents.extend( + allocate_tuning_triplets( + children_sub_packages_count_context, + children_sub_packages_count_questions, + children_sub_packages_count_answers, + ) + ) package_sub_packages = enumerate_array_elements(children_sub_packages) - package_sub_package_pairs = [ - ( - f"List the sub-packages of {package}.", - f"Sub-packages of {package} are as follows: {package_sub_packages}.", - ), - ( - f"What are the sub-packages of the {package}?", - f"The {package} has the following sub-packages: {package_sub_packages}.", - ), - ( - f"Could you tell me the sub-packages of {package}?", - f"Sure, the sub-packages of {package} are: {package_sub_packages}.", - ), - ( - f"I need to know the sub-packages of {package}. Can you list them?", - f"Of course, the sub-packages of {package} are: {package_sub_packages}.", - ), - ( - f"Please provide the sub-packages of {package}.", - f"The sub-packages of {package} are: {package_sub_packages}.", - ), - ( - f"Can you enumerate the sub-packages of {package}?", - f"Certainly, the sub-packages of {package} are: {package_sub_packages}.", - ), - ] - package_retrieval_chunks.append( + + package_sub_package_context = ( f"Sub-packages of {package} are as follows: {package_sub_packages}." ) - package_tuning_pairs.extend(allocate_tuning_pairs(package_sub_package_pairs)) + package_sub_package_questions = [ + f"List the sub-packages of {package}.", + f"What are the sub-packages of the {package}?", + f"Could you tell me the sub-packages of {package}?", + f"I need to know the sub-packages of {package}. Can you list them?", + f"Please provide the sub-packages of {package}.", + f"Can you enumerate the sub-packages of {package}?", + ] + package_sub_package_answers = [ + f"Sub-packages of {package} are as follows: {package_sub_packages}.", + f"The {package} has the following sub-packages: {package_sub_packages}.", + f"The sub-packages of {package} are: {package_sub_packages}.", + ] + + package_retrieval_chunks.append(package_sub_package_context) + package_tuning_documents.extend( + allocate_tuning_triplets( + package_sub_package_context, + package_sub_package_questions, + package_sub_package_answers, + ) + ) if not (children_modules := package_contents.children_modules_names): - package_module_pairs = [ - ( - f"What are the modules of {package}?", - f"{package} does not have any direct modules under itself.", - ), - ( - f"Can you list the modules under the {package}?", - f"There are no direct modules under the {package}.", - ), - ( - f"Does the {package} contain any modules?", - f"No, the {package} does not contain any direct modules.", - ), - ( - f"I'm looking for the modules of {package}. Can you help?", - f"I'm sorry, but {package} does not have any direct modules.", - ), - ( - f"Tell me about the modules of {package}.", - f"Actually, the {package} does not have any direct modules.", - ), - ( - f"Are there any modules under the {package}?", - f"No, there aren't any direct modules under the {package}.", - ), - ] - package_retrieval_chunks.append(f"{package} does not have any further modules.") - package_tuning_pairs.extend(allocate_tuning_pairs(package_module_pairs)) + package_module_context = f"{package} does not have any further modules." + package_module_questions = [ + f"What are the modules of {package}?", + f"Can you list the modules under the {package}?", + f"Does the {package} contain any modules?", + f"I'm looking for the modules of {package}. Can you help?", + f"Tell me about the modules of {package}.", + f"Are there any modules under the {package}?", + ] + package_module_answers = [ + f"{package} does not have any direct modules under itself.", + f"There are no direct modules under the {package}.", + f"No, the {package} does not contain any direct modules.", + f"{package} does not have any direct modules.", + f"The {package} does not have any direct modules.", + f"There aren't any direct modules under the {package}.", + ] + + package_retrieval_chunks.append(package_module_context) + package_tuning_documents.extend( + allocate_tuning_triplets( + package_module_context, package_module_questions, package_module_answers + ) + ) else: children_modules_count = len(children_modules) - children_modules_count_pairs = [ - ( - f"How many modules are there in {package}?", - f"{package} has {children_modules_count} many modules.", - ), - ( - f"What is the count of modules in {package}?", - f"The count of modules in {package} is {children_modules_count}.", - ), - ( - f"Could you tell me the number of modules available in {package}?", - f"{package} has {children_modules_count} modules.", - ), - ( - f"Please provide the count of modules for {package}.", - f"The number of modules in {package} is {children_modules_count}.", - ), - ( - f"Tell me the quantity of modules present in {package}.", - f"{package} has {children_modules_count} modules.", - ), - ( - f"Would you mind letting me know how many modules {package} contains?", - f"{package} contains {children_modules_count} modules.", - ), - ] - package_retrieval_chunks.append(f"{package} has {children_modules_count} many modules.") - package_tuning_pairs.extend(allocate_tuning_pairs(children_modules_count_pairs)) + + children_modules_count_context = f"{package} has {children_modules_count} many modules." + children_modules_count_questions = [ + f"How many modules are there in {package}?", + f"What is the count of modules in {package}?", + f"Could you tell me the number of modules available in {package}?", + f"Please provide the count of modules for {package}.", + f"Tell me the quantity of modules present in {package}.", + f"Would you mind letting me know how many modules {package} contains?", + ] + children_modules_count_answers = [ + f"{package} has {children_modules_count} many modules.", + f"The count of modules in {package} is {children_modules_count}.", + f"{package} has {children_modules_count} modules.", + f"The number of modules in {package} is {children_modules_count}.", + f"{package} contains {children_modules_count} modules.", + ] + + package_retrieval_chunks.append(children_modules_count_context) + package_tuning_documents.extend( + allocate_tuning_triplets( + children_modules_count_context, + children_modules_count_questions, + children_modules_count_answers, + ) + ) package_modules = enumerate_array_elements(children_modules) - package_module_pairs = [ - ( - f"What are the modules of {package}?", - f"Direct modules under {package} are as follows: {package_modules}.", - ), - ( - f"Can you list the modules of the {package}?", - f"Sure, the direct modules under {package} are: {package_modules}.", - ), - ( - f"I need to know the modules of the {package}.", - f"The modules you're looking for in {package} are: {package_modules}.", - ), - ( - f"Could you tell me what the modules of the {package} are?", - f"Of course, the modules under {package} are: {package_modules}.", - ), - ( - f"I'm interested in the modules of the {package}.", - f"The modules in {package} are: {package_modules}.", - ), - ( - f"What modules does the {package} contain?", - f"The {package} contains these modules: {package_modules}.", - ), - ] - package_retrieval_chunks.append(f"Modules of {package} are as follows: {package_modules}.") - package_tuning_pairs.extend(allocate_tuning_pairs(package_module_pairs)) + + package_module_context = f"Modules of {package} are as follows: {package_modules}." + package_module_questions = [ + f"What are the modules of {package}?", + f"Can you list the modules of the {package}?", + f"I need to know the modules of the {package}.", + f"Could you tell me what the modules of the {package} are?", + f"I'm interested in the modules of the {package}.", + f"What modules does the {package} contain?", + ] + package_module_answers = [ + f"Direct modules under {package} are as follows: {package_modules}.", + f"The direct modules under {package} are: {package_modules}.", + f"The modules you're looking for in {package} are: {package_modules}.", + f"The modules under {package} are: {package_modules}.", + f"The modules in {package} are: {package_modules}.", + f"The {package} contains these modules: {package_modules}.", + ] + + package_retrieval_chunks.append(package_module_context) + package_tuning_documents.extend( + allocate_tuning_triplets( + package_module_context, package_module_questions, package_module_answers + ) + ) if not (package_summary := package_contents.package_summary): - package_summary_pairs = [ - (f"What does {package} do?", f"{package} does not have any documentation."), - ( - f"Can you tell me the functionality of the {package}?", - f"Unfortunately, the {package} provides no documentation.", - ), - ( - f"I'm curious about what the {package} does. Can you enlighten me?", - f"I'm sorry, but the {package} does not come with any documentation.", - ), - ( - f"Could you explain the purpose of the {package}?", - f"Regrettably, the {package} lacks any form of documentation.", - ), - ( - f"What's the role of the {package}?", - f"The {package} does not offer any documentation.", - ), - ( - f"What functionality does the {package} provide?", - f"The {package} does not have any available documentation.", - ), - ] - package_retrieval_chunks.append( + package_summary_context = ( f"Unfortunately, {package} currently does not have any documentation." ) - package_tuning_pairs.extend(allocate_tuning_pairs(package_summary_pairs)) + package_summary_questions = [ + f"What does {package} do?", + f"Can you tell me the functionality of the {package}?", + f"I'm curious about what the {package} does. Can you enlighten me?", + f"Could you explain the purpose of the {package}?", + f"What's the role of the {package}?", + f"What functionality does the {package} provide?", + ] + package_summary_answers = [ + f"{package} does not have any documentation.", + f"The {package} provides no documentation.", + f"The {package} does not come with any documentation.", + f"The {package} lacks any form of documentation.", + f"The {package} does not offer any documentation.", + f"The {package} does not have any available documentation.", + ] + + package_retrieval_chunks.append(package_summary_context) + package_tuning_documents.extend( + allocate_tuning_triplets( + package_summary_context, package_summary_questions, package_summary_answers + ) + ) else: - package_summary_pairs = [ - (f"What does {package} do?", f"Its documentation is as follows: '{package_summary}'."), - ( - f"Can you tell me about the {package}?", - f"Sure, here is its documentation: '{package_summary}'.", - ), - ( - f"I'd like to know what the {package} does.", - f"Of course, here's the documentation for it: '{package_summary}'.", - ), - ( - f"Could you explain the functionality of the {package}?", - f"Absolutely, the documentation states: '{package_summary}'.", - ), - ( - f"What's the purpose of the {package}?", - f"The purpose is described in its documentation: '{package_summary}'.", - ), - ( - f"I'm curious about the {package}, what does it do?", - f"Good question, its documentation reads: '{package_summary}'.", - ), - ] - package_retrieval_chunks.append( + package_summary_context = ( f"The following is the documentation of {package}: '{package_summary}'." ) - package_tuning_pairs.extend(allocate_tuning_pairs(package_summary_pairs)) + package_summary_questions = [ + f"What does {package} do?", + f"Can you tell me about the {package}?", + f"I'd like to know what the {package} does.", + f"Could you explain the functionality of the {package}?", + f"What's the purpose of the {package}?", + f"I'm curious about the {package}, what does it do?", + ] + package_summary_answers = [ + f"Its documentation is as follows: '{package_summary}'.", + f"Here is its documentation: '{package_summary}'.", + f"Here's the documentation for it: '{package_summary}'.", + f"The documentation states: '{package_summary}'.", + f"The purpose is described in its documentation: '{package_summary}'.", + f"Its documentation reads: '{package_summary}'.", + ] + + package_retrieval_chunks.append(package_summary_context) + package_tuning_documents.extend( + allocate_tuning_triplets( + package_summary_context, package_summary_questions, package_summary_answers + ) + ) if not (package_exports := package_contents.package_all_exports): - package_members_pairs = [ - ( - f"What are the public members of the {package}?", - f"{package} does not have any public member exported through '__all__'.", - ), - ( - f"Can you list the public members of the {package}?", - f"The {package} does not export any public members through '__all__'.", - ), - ( - f"Are there any public members in the {package}?", - f"No, the {package} does not have any public members exported through '__all__'.", - ), - ( - f"I'm looking for public members of {package}. Can you help?", - f"Sure, but the {package} does not export any public members through '__all__'.", - ), - ( - f"Could you tell me the public members of the {package}?", - f"Unfortunately, the {package} does not have any public members" - " exported through '__all__'.", - ), - ( - f"I'd like to know the public members of the {package}." - " Can you provide that information?", - f"I'm sorry, but the {package} does not have any public members" - " exported through '__all__'.", - ), - ] - package_retrieval_chunks.append( + package_members_context = ( f"{package} does not export anything publicly using __all__ variable." ) - package_tuning_pairs.extend(allocate_tuning_pairs(package_members_pairs)) + package_members_questions = [ + f"What are the public members of the {package}?", + f"Can you list the public members of the {package}?", + f"Are there any public members in the {package}?", + f"I'm looking for public members of {package}. Can you help?", + f"Could you tell me the public members of the {package}?", + f"I'd like to know the public members of the {package}." + " Can you provide that information?", + ] + package_members_answers = [ + f"{package} does not have any public member exported through '__all__'.", + f"The {package} does not export any public members through '__all__'.", + f"The {package} does not have any public members exported through '__all__'.", + ] + + package_retrieval_chunks.append(package_members_context) + package_tuning_documents.extend( + allocate_tuning_triplets( + package_members_context, package_members_questions, package_members_answers + ) + ) else: package_exports_count = len(package_exports) - package_exports_count_pairs = [ - ( - f"How many objects does {package} export publicly?", - f"{package} exports {package_exports_count} many objects using __all__.", - ), - ( - f"What is the count of publicly exported objects in {package}?", - f"Count of publicly exported objects in {package} is {package_exports_count}.", - ), - ( - f"Could you tell me the number of objects publicly exported by {package}?", - f"{package} exports {package_exports_count} objects using __all__.", - ), - ( - f"Please provide the count of objects publicly exported by {package}.", - f"Number of objects publicly exported by {package} is {package_exports_count}.", - ), - ( - f"Tell me the quantity of objects that {package} exports publicly.", - f"{package} exports {package_exports_count} objects using __all__.", - ), - ( - f"Would you mind letting me know how many objects {package} publicly exports?", - f"{package} publicly exports {package_exports_count} objects.", - ), - ] - package_retrieval_chunks.append( + + package_exports_count_context = ( f"{package} has {package_exports_count} many public exports." ) - package_tuning_pairs.extend(allocate_tuning_pairs(package_exports_count_pairs)) + package_exports_count_questions = [ + f"How many objects does {package} export publicly?", + f"What is the count of publicly exported objects in {package}?", + f"Could you tell me the number of objects publicly exported by {package}?", + f"Please provide the count of objects publicly exported by {package}.", + f"Tell me the quantity of objects that {package} exports publicly.", + f"Would you mind letting me know how many objects {package} publicly exports?", + ] + package_exports_count_answers = [ + f"{package} exports {package_exports_count} many objects using __all__.", + f"Count of publicly exported objects in {package} is {package_exports_count}.", + f"{package} exports {package_exports_count} objects using __all__.", + f"Number of objects publicly exported by {package} is {package_exports_count}.", + f"{package} exports {package_exports_count} objects using __all__.", + f"{package} publicly exports {package_exports_count} objects.", + ] + + package_retrieval_chunks.append(package_exports_count_context) + package_tuning_documents.extend( + allocate_tuning_triplets( + package_exports_count_context, + package_exports_count_questions, + package_exports_count_answers, + ) + ) package_public_members = enumerate_array_elements(package_exports) - package_members_pairs = [ - ( - f"What are the public members of the {package}?", - f"{package} publicly exports the following members using '__all__':" - f" {package_public_members}.", - ), - ( - f"Can you list the public members of the {package}?", - f"Sure, the {package} publicly exports these members using '__all__':" - f" {package_public_members}.", - ), - ( - f"I need to know the public members of the {package}. Can you tell me?", - f"Of course, the {package} publicly exports these members using '__all__':" - f" {package_public_members}.", - ), - ( - f"Could you tell me what the {package} publicly exports?", - f"The {package} publicly exports the following members using '__all__':" - f" {package_public_members}.", - ), - ( - f"I'm interested in the public members of the {package}. What are they?", - f"The {package} publicly exports these members using '__all__':" - f" {package_public_members}.", - ), - ] - package_retrieval_chunks.append( + + package_members_context = ( f"{package} exports following public members using __all__: {package_public_members}." ) - package_tuning_pairs.extend(allocate_tuning_pairs(package_members_pairs)) + package_members_questions = [ + f"What are the public members of the {package}?", + f"Can you list the public members of the {package}?", + f"I need to know the public members of the {package}. Can you tell me?", + f"Could you tell me what the {package} publicly exports?", + f"I'm interested in the public members of the {package}. What are they?", + ] + package_members_answers = [ + f"{package} publicly exports the following members using '__all__':" + f" {package_public_members}.", + f"The {package} publicly exports the following members using '__all__':" + f" {package_public_members}.", + f"The {package} publicly exports these members using '__all__':" + f" {package_public_members}.", + ] + + package_retrieval_chunks.append(package_members_context) + package_tuning_documents.extend( + allocate_tuning_triplets( + package_members_context, package_members_questions, package_members_answers + ) + ) package_dataset = Dataset( - retrieval_chunks=package_retrieval_chunks, tuning_pairs=package_tuning_pairs + retrieval_chunks=package_retrieval_chunks, tuning_documents=package_tuning_documents ) return package_dataset @pydantic.validate_call(validate_return=True) -def generate_module_dataset(module_contents: ModuleDetails) -> Dataset: +def generate_module_dataset(module_contents: ModuleDetails) -> Dataset: # noqa: PLR0915 """Create relevant question and answers based on module details. Parameters @@ -642,310 +586,281 @@ def generate_module_dataset(module_contents: ModuleDetails) -> Dataset: module = f"'{module_name}' module" module_retrieval_chunks: list[str] = [f"'{module_name}' is a Python module."] - module_tuning_pairs: list[tuple[str, str, SplitName]] = [] - - module_package_pairs = [ - ( - f"Can you tell the the parent package of {module}?", - f"'{module_contents.package_name}' is the parent package of {module}.", - ), - ( - f"What is the parent package of the {module}?", - f"The parent package of {module} is '{module_contents.package_name}'.", - ), - ( - f"I'm trying to find the parent package of the {module}. Can you help?", - f"Sure, parent package of {module} is '{module_contents.package_name}'.", - ), - ( - f"Could you inform me about the parent package of the {module}?", - f"Certainly, '{module_contents.package_name}' is the parent package of the {module}.", - ), - ( - f"I need to know the parent package of {module}. Can you provide that information?", - f"Absolutely, the parent package of the {module} is '{module_contents.package_name}'.", - ), - ( - f"Can you identify the parent package for the {module}?", - f"Yes, parent package for {module} is '{module_contents.package_name}'.", - ), - ] - module_retrieval_chunks.append( + module_tuning_documents: list[Document] = [] + + module_package_context = ( f"{module} is part of parent package '{module_contents.package_name}'." ) - module_tuning_pairs.extend(allocate_tuning_pairs(module_package_pairs)) - - module_full_name_pairs = [ - ( - f"Specify the full name of {module}?", - f"'{module_full_name}' is fully qualified name for {module}.", - ), - ( - f"What is the fully qualified name for the {module}?", - f"The fully qualified name for the {module} is '{module_full_name}'.", - ), - ( - f"Could you tell me the full name of the {module}?", - f"Sure, the full name of the {module} is '{module_full_name}'.", - ), - ( - f"I need the full name of the {module}. Can you provide it?", - f"Of course, the full name of the {module} is '{module_full_name}'.", - ), - ( - f"Can you specify the fully qualified name of the {module}?", - f"Yes, fully qualified name of the {module} is '{module_full_name}'.", - ), - ( - f"I'm looking for the full name of the {module}. What is it?", - f"Full name of the {module} you're looking for is '{module_full_name}'.", - ), + module_package_questions = [ + f"Can you tell the the parent package of {module}?", + f"What is the parent package of the {module}?", + f"I'm trying to find the parent package of the {module}. Can you help?", + f"Could you inform me about the parent package of the {module}?", + f"I need to know the parent package of {module}. Can you provide that information?", + f"Can you identify the parent package for the {module}?", + ] + module_package_answers = [ + f"'{module_contents.package_name}' is the parent package of {module}.", + f"The parent package of {module} is '{module_contents.package_name}'.", + f"Parent package of {module} is '{module_contents.package_name}'.", + f"'{module_contents.package_name}' is the parent package of the {module}.", + f"The parent package of the {module} is '{module_contents.package_name}'.", + f"Parent package for {module} is '{module_contents.package_name}'.", ] - module_retrieval_chunks.append(f"Full name of {module} is '{module_full_name}'.") - module_tuning_pairs.extend(allocate_tuning_pairs(module_full_name_pairs)) + + module_retrieval_chunks.append(module_package_context) + module_tuning_documents.extend( + allocate_tuning_triplets( + module_package_context, module_package_questions, module_package_answers + ) + ) + + module_full_name_context = f"Full name of {module} is '{module_full_name}'." + module_full_name_questions = [ + f"Specify the full name of {module}?", + f"What is the fully qualified name for the {module}?", + f"Could you tell me the full name of the {module}?", + f"I need the full name of the {module}. Can you provide it?", + f"Can you specify the fully qualified name of the {module}?", + f"I'm looking for the full name of the {module}. What is it?", + ] + module_full_name_answers = [ + f"'{module_full_name}' is fully qualified name for {module}.", + f"The fully qualified name for the {module} is '{module_full_name}'.", + f"The full name of the {module} is '{module_full_name}'.", + f"Fully qualified name of the {module} is '{module_full_name}'.", + f"Full name of the {module} you're looking for is '{module_full_name}'.", + ] + + module_retrieval_chunks.append(module_full_name_context) + module_tuning_documents.extend( + allocate_tuning_triplets( + module_full_name_context, module_full_name_questions, module_full_name_answers + ) + ) module_hierarchy = enumerate_array_elements(module_contents.module_hierarchy) - module_hierarchy_pairs = [ - ( - f"What is the hierarchy of {module}?", - f"The hierarchy of {module} is as follows: {module_hierarchy}.", - ), - ( - f"Can you explain the hierarchy of the {module}?", - f"Sure, the hierarchy of the {module} is: {module_hierarchy}.", - ), - ( - f"Could you describe the structure of the {module}?", - f"Of course, the structure of the {module} is: {module_hierarchy}.", - ), - ( - f"I need to understand the hierarchy of the {module}. Can you help?", - f"Absolutely, the hierarchy of the {module} is: {module_hierarchy}.", - ), - ( - f"Please provide the hierarchy of the {module}.", - f"The hierarchy of the {module} is: {module_hierarchy}.", - ), - ( - f"What does the hierarchy of the {module} look like?", - f"The hierarchy of the {module} looks like this: {module_hierarchy}.", - ), + + module_hierarchy_context = f"Hierarchy of {module} is as follows: {module_hierarchy}." + module_hierarchy_questions = [ + f"What is the hierarchy of {module}?", + f"Can you explain the hierarchy of the {module}?", + f"Could you describe the structure of the {module}?", + f"I need to understand the hierarchy of the {module}. Can you help?", + f"Please provide the hierarchy of the {module}.", + f"What does the hierarchy of the {module} look like?", + ] + module_hierarchy_answers = [ + f"The hierarchy of {module} is as follows: {module_hierarchy}.", + f"The hierarchy of the {module} is: {module_hierarchy}.", + f"The structure of the {module} is: {module_hierarchy}.", + f"The hierarchy of the {module} looks like this: {module_hierarchy}.", ] - module_retrieval_chunks.append(f"Hierarchy of {module} is as follows: {module_hierarchy}.") - module_tuning_pairs.extend(allocate_tuning_pairs(module_hierarchy_pairs)) + + module_retrieval_chunks.append(module_hierarchy_context) + module_tuning_documents.extend( + allocate_tuning_triplets( + module_hierarchy_context, module_hierarchy_questions, module_hierarchy_answers + ) + ) module_members_count = len(module_contents.module_members) - module_members_count_pairs = [ - ( - f"How many members does {module} have?", - f"{module} has {module_members_count} many members.", - ), - ( - f"What is the count of members in {module}?", - f"The count of members in {module} is {module_members_count}.", - ), - ( - f"Could you tell me the number of members in {module}?", - f"{module} has {module_members_count} members.", - ), - ( - f"Please provide the count of members for {module}.", - f"The number of members in {module} is {module_members_count}.", - ), - ( - f"Tell me the quantity of members present in {module}.", - f"{module} has {module_members_count} members.", - ), - ( - f"Would you mind letting me know how many members {module} contains?", - f"{module} contains {module_members_count} members.", - ), + + module_members_count_context = f"{module} has {module_members_count} many members." + module_members_count_questions = [ + f"How many members does {module} have?", + f"What is the count of members in {module}?", + f"Could you tell me the number of members in {module}?", + f"Please provide the count of members for {module}.", + f"Tell me the quantity of members present in {module}.", + f"Would you mind letting me know how many members {module} contains?", + ] + module_members_count_answers = [ + f"{module} has {module_members_count} many members.", + f"The count of members in {module} is {module_members_count}.", + f"{module} has {module_members_count} members.", + f"The number of members in {module} is {module_members_count}.", + f"{module} contains {module_members_count} members.", ] - module_retrieval_chunks.append(f"{module} has {module_members_count} many members.") - module_tuning_pairs.extend(allocate_tuning_pairs(module_members_count_pairs)) + + module_retrieval_chunks.append(module_members_count_context) + module_tuning_documents.extend( + allocate_tuning_triplets( + module_members_count_context, + module_members_count_questions, + module_members_count_answers, + ) + ) module_member_names = enumerate_array_elements( module_contents.module_members, attribute="member_name" ) - module_members_pairs = [ - ( - f"List the members of {module}.", - f"Members of {module} are as follows: {module_member_names}.", - ), - ( - f"What are the members of the {module}?", - f"The {module} has the following members: {module_member_names}.", - ), - ( - f"Can you tell me the members of the {module}?", - f"Sure, the members of the {module} are: {module_member_names}.", - ), - ( - f"I need to know the members of the {module}.", - f"Members of {module} you asked for are: {module_member_names}.", - ), - ( - f"Could you list the members of the {module}?", - f"Of course, members of the {module} are: {module_member_names}.", - ), - ( - f"Please provide the members of the {module}.", - f"Members of {module} you requested are: {module_member_names}.", - ), + + module_members_context = f"Members of {module} are as follows: {module_member_names}." + module_members_questions = [ + f"List the members of {module}.", + f"What are the members of the {module}?", + f"Can you tell me the members of the {module}?", + f"I need to know the members of the {module}.", + f"Could you list the members of the {module}?", + f"Please provide the members of the {module}.", + ] + module_members_answers = [ + f"Members of {module} are as follows: {module_member_names}.", + f"The {module} has the following members: {module_member_names}.", + f"The members of the {module} are: {module_member_names}.", + f"Members of {module} you asked for are: {module_member_names}.", + f"Members of the {module} are: {module_member_names}.", + f"Members of {module} you requested are: {module_member_names}.", ] - module_retrieval_chunks.append(f"Members of {module} are as follows: {module_member_names}.") - module_tuning_pairs.extend(allocate_tuning_pairs(module_members_pairs)) + + module_retrieval_chunks.append(module_members_context) + module_tuning_documents.extend( + allocate_tuning_triplets( + module_members_context, module_members_questions, module_members_answers + ) + ) if not (module_summary := module_contents.module_summary): - module_summary_pairs = [ - (f"What is the {module} for?", f"{module} does not have any documentation."), - ( - f"Can you tell me the purpose of the {module}?", - f"The {module} lacks any documentation.", - ), - ( - f"I'd like to know what the {module} is used for.", - f"Unfortunately, there is no documentation for the {module}.", - ), - ( - f"Could you explain the function of the {module}?", - f"Regrettably, the {module} doesn't come with any documentation.", - ), - (f"What does the {module} do?", f"The {module} is without any documentation."), - ] - module_retrieval_chunks.append( + module_summary_context = ( f"Unfortunately, {module} currently does not have any documentation." ) - module_tuning_pairs.extend(allocate_tuning_pairs(module_summary_pairs)) + module_summary_questions = [ + f"What is the {module} for?", + f"Can you tell me the purpose of the {module}?", + f"I'd like to know what the {module} is used for.", + f"Could you explain the function of the {module}?", + f"What does the {module} do?", + ] + module_summary_answers = [ + f"{module} does not have any documentation.", + f"The {module} lacks any documentation.", + f"There is no documentation for the {module}.", + f"The {module} doesn't come with any documentation.", + f"The {module} is without any documentation.", + ] + + module_retrieval_chunks.append(module_summary_context) + module_tuning_documents.extend( + allocate_tuning_triplets( + module_summary_context, module_summary_questions, module_summary_answers + ) + ) else: - module_summary_pairs = [ - ( - f"What is the '{module_name}' module for?", - f"{module} documents itself as follows: '{module_summary}'.", - ), - ( - f"Can you tell me the purpose of the '{module_name}' module?", - f"Purpose of {module} is documented as: '{module_summary}'.", - ), - ( - f"I'm curious about the '{module_name}' module. What does it do?", - f"The {module} is described as: '{module_summary}'.", - ), - ( - f"Could you explain the functionality of the '{module_name}' module?", - f"The functionality of the {module} is described as: '{module_summary}'.", - ), - ( - f"I'd like to know more about the '{module_name}' module. What's its role?", - f"The role of the {module} is: '{module_summary}'.", - ), - ( - f"What's the use of the '{module_name}' module?", - f"Use of the {module} is documented as: '{module_summary}'.", - ), - ] - module_retrieval_chunks.append( + module_summary_context = ( f"The following is the documentation of {module}: {module_summary}." ) - module_tuning_pairs.extend(allocate_tuning_pairs(module_summary_pairs)) + module_summary_questions = [ + f"What is the '{module_name}' module for?", + f"Can you tell me the purpose of the '{module_name}' module?", + f"I'm curious about the '{module_name}' module. What does it do?", + f"Could you explain the functionality of the '{module_name}' module?", + f"I'd like to know more about the '{module_name}' module. What's its role?", + f"What's the use of the '{module_name}' module?", + ] + module_summary_answers = [ + f"{module} documents itself as follows: '{module_summary}'.", + f"Purpose of {module} is documented as: '{module_summary}'.", + f"The {module} is described as: '{module_summary}'.", + f"The functionality of the {module} is described as: '{module_summary}'.", + f"The role of the {module} is: '{module_summary}'.", + f"Use of the {module} is documented as: '{module_summary}'.", + ] + + module_retrieval_chunks.append(module_summary_context) + module_tuning_documents.extend( + allocate_tuning_triplets( + module_summary_context, module_summary_questions, module_summary_answers + ) + ) if not (module_exports := module_contents.module_all_exports): - module_exports_pairs = [ - ( - f"Tell me the public members of the {module}.", - f"{module} lacks any public member exported through '__all__'.", - ), - ( - f"What are the public members of the {module}?", - "There are no public members exported through '__all__' in the {module}.", - ), - ( - f"Could you list the public members of the {module}?", - f"Unfortunately, {module} does not export any public members through '__all__'.", - ), - ( - f"I need to know the public members of the {module}.", - f"The {module} does not have any public members exported through '__all__'.", - ), - ( - f"Can you show me the public members of the {module}?", - f"The {module} does not contain any public members exported through '__all__'.", - ), - ( - f"I'm interested in the public members of the {module}. What are they?", - f"{module} does not export any public members through '__all__'.", - ), - ] - module_retrieval_chunks.append( + module_exports_context = ( f"{module} does not export anything publicly using __all__ variable." ) - module_tuning_pairs.extend(allocate_tuning_pairs(module_exports_pairs)) + module_exports_questions = [ + f"Tell me the public members of the {module}.", + f"What are the public members of the {module}?", + f"Could you list the public members of the {module}?", + f"I need to know the public members of the {module}.", + f"Can you show me the public members of the {module}?", + f"I'm interested in the public members of the {module}. What are they?", + ] + module_exports_answers = [ + f"{module} lacks any public member exported through '__all__'.", + f"There are no public members exported through '__all__' in the {module}.", + f"{module} does not export any public members through '__all__'.", + f"The {module} does not have any public members exported through '__all__'.", + f"The {module} does not contain any public members exported through '__all__'.", + f"{module} does not export any public members through '__all__'.", + ] + + module_retrieval_chunks.append(module_exports_context) + module_tuning_documents.extend( + allocate_tuning_triplets( + module_exports_context, module_exports_questions, module_exports_answers + ) + ) else: module_exports_count = len(module_exports) - module_exports_count_pairs = [ - ( - f"How many objects does {module} export publicly?", - f"{module} exports {module_exports_count} many objects using __all__.", - ), - ( - f"What is the count of publicly exported objects in {module}?", - f"The count of publicly exported objects in {module} is {module_exports_count}.", - ), - ( - f"Could you tell me the number of objects publicly exported by {module}?", - f"{module} exports {module_exports_count} objects using __all__.", - ), - ( - f"Please provide the count of objects publicly exported by {module}.", - f"The number of objects publicly exported by {module} is {module_exports_count}.", - ), - ( - f"Tell me the quantity of objects that {module} exports publicly.", - f"{module} exports {module_exports_count} objects using __all__.", - ), - ( - f"Would you mind letting me know how many objects {module} publicly exports?", - f"{module} publicly exports {module_exports_count} objects.", - ), - ] - module_retrieval_chunks.append(f"{module} has {module_exports_count} many public exports.") - module_tuning_pairs.extend(allocate_tuning_pairs(module_exports_count_pairs)) + + module_exports_count_context = f"{module} has {module_exports_count} many public exports." + module_exports_count_questions = [ + f"How many objects does {module} export publicly?", + f"What is the count of publicly exported objects in {module}?", + f"Could you tell me the number of objects publicly exported by {module}?", + f"Please provide the count of objects publicly exported by {module}.", + f"Tell me the quantity of objects that {module} exports publicly.", + f"Would you mind letting me know how many objects {module} publicly exports?", + ] + module_exports_count_answers = [ + f"{module} exports {module_exports_count} many objects using __all__.", + f"The count of publicly exported objects in {module} is {module_exports_count}.", + f"{module} exports {module_exports_count} objects using __all__.", + f"The number of objects publicly exported by {module} is {module_exports_count}.", + f"{module} exports {module_exports_count} objects using __all__.", + f"{module} publicly exports {module_exports_count} objects.", + ] + + module_retrieval_chunks.append(module_exports_count_context) + module_tuning_documents.extend( + allocate_tuning_triplets( + module_exports_count_context, + module_exports_count_questions, + module_exports_count_answers, + ) + ) module_public_exports = enumerate_array_elements(module_exports) - module_exports_pairs = [ - ( - f"Tell me the public members of the {module}.", - f"{module} publicly exports the following members using '__all__':" - f" {module_public_exports}.", - ), - ( - f"What are the public members of the {module}?", - f"The {module} publicly exports the following members using '__all__':" - f" {module_public_exports}.", - ), - ( - f"Could you list the public members of the {module}?", - f"Sure, the {module} publicly exports these members using '__all__':" - f" {module_public_exports}.", - ), - ( - f"I need to know the public members of the {module}.", - f"The {module} publicly exports these members using '__all__':" - f" {module_public_exports}.", - ), - ( - f"Can you show me the public members of the {module}?", - f"Of course, the {module} publicly exports the following members using '__all__':" - f" {module_public_exports}.", - ), - ] - module_retrieval_chunks.append( + + module_exports_context = ( f"{module} exports following members using __all__: {module_public_exports}." ) - module_tuning_pairs.extend(allocate_tuning_pairs(module_exports_pairs)) + module_exports_questions = [ + f"Tell me the public members of the {module}.", + f"What are the public members of the {module}?", + f"Could you list the public members of the {module}?", + f"I need to know the public members of the {module}.", + f"Can you show me the public members of the {module}?", + ] + module_exports_answers = [ + f"{module} publicly exports the following members using '__all__':" + f" {module_public_exports}.", + f"The {module} publicly exports the following members using '__all__':" + f" {module_public_exports}.", + f"The {module} publicly exports these members using '__all__':" + f" {module_public_exports}.", + ] + + module_retrieval_chunks.append(module_exports_context) + module_tuning_documents.extend( + allocate_tuning_triplets( + module_exports_context, module_exports_questions, module_exports_answers + ) + ) module_dataset = Dataset( - retrieval_chunks=module_retrieval_chunks, tuning_pairs=module_tuning_pairs + retrieval_chunks=module_retrieval_chunks, tuning_documents=module_tuning_documents ) return module_dataset @@ -977,139 +892,123 @@ def generate_enum_member_dataset( f"{enum_member} is a Python enum.", f"{enum_member} has following docstring: {enum_docstring}.", ] - enum_member_tuning_pairs: list[tuple[str, str, SplitName]] = [] + enum_member_tuning_documents: list[Document] = [] enum_member_count = len(member_type_details.enum_members) - enum_member_count_pairs = [ - ( - f"How many members are there in {enum_member}?", - f"{enum_member} has {enum_member_count} members.", - ), - ( - f"What is the count of members in {enum_member}?", - f"The count of members in {enum_member} is {enum_member_count}.", - ), - ( - f"Can you tell me the number of members in {enum_member}?", - f"Sure, the number of members in {enum_member} is {enum_member_count}.", - ), - ( - f"Could you provide the total number of members in {enum_member}?", - f"The total number of members in {enum_member} is {enum_member_count}.", - ), - ( - f"I need to know the quantity of members in {enum_member}.", - f"The quantity of members in {enum_member} is {enum_member_count}.", - ), - ( - f"Please inform me about the number of members in {enum_member}.", - f"The number of members in {enum_member} is {enum_member_count}.", - ), + + enum_member_count_context = f"{enum_member} has {enum_member_count} many members." + enum_member_count_questions = [ + f"How many members are there in {enum_member}?", + f"What is the count of members in {enum_member}?", + f"Can you tell me the number of members in {enum_member}?", + f"Could you provide the total number of members in {enum_member}?", + f"I need to know the quantity of members in {enum_member}.", + f"Please inform me about the number of members in {enum_member}.", + ] + enum_member_count_answers = [ + f"{enum_member} has {enum_member_count} members.", + f"The count of members in {enum_member} is {enum_member_count}.", + f"The number of members in {enum_member} is {enum_member_count}.", + f"The total number of members in {enum_member} is {enum_member_count}.", + f"The quantity of members in {enum_member} is {enum_member_count}.", + f"The number of members in {enum_member} is {enum_member_count}.", ] - enum_member_retrieval_chunks.insert(-1, f"{enum_member} has {enum_member_count} many members.") - enum_member_tuning_pairs.extend(allocate_tuning_pairs(enum_member_count_pairs)) + + enum_member_retrieval_chunks.append(enum_member_count_context) + enum_member_tuning_documents.extend( + allocate_tuning_triplets( + enum_member_count_context, enum_member_count_questions, enum_member_count_answers + ) + ) enum_members = enumerate_array_elements( member_type_details.enum_members, attribute="enum_member" ) - enum_members_pairs = [ - ( - f"What are the different members of {enum_member}?", - f"Different members of {enum_member} are as follows: {enum_members}.", - ), - ( - f"Can you list the different members of {enum_member}?", - f"Sure, the different members of {enum_member} are: {enum_members}.", - ), - ( - f"Could you tell me the different members of {enum_member}?", - f"Of course, the different members of {enum_member} include: {enum_members}.", - ), - ( - f"I need to know the different members of {enum_member}.", - f"The different members of {enum_member} are: {enum_members}.", - ), - ( - f"What does {enum_member} consist of?", - f"{enum_member} consists of the following members: {enum_members}.", - ), + + enum_members_context = f"Members of {enum_member} are as follows: {enum_members}." + enum_members_questions = [ + f"What are the different members of {enum_member}?", + f"Can you list the different members of {enum_member}?", + f"Could you tell me the different members of {enum_member}?", + f"I need to know the different members of {enum_member}.", + f"What does {enum_member} consist of?", ] - enum_member_retrieval_chunks.insert( - -1, f"Members of {enum_member} are as follows: {enum_members}." + enum_members_answers = [ + f"Different members of {enum_member} are as follows: {enum_members}.", + f"The different members of {enum_member} include: {enum_members}.", + f"The different members of {enum_member} are: {enum_members}.", + f"{enum_member} consists of the following members: {enum_members}.", + ] + + enum_member_retrieval_chunks.append(enum_members_context) + enum_member_tuning_documents.extend( + allocate_tuning_triplets( + enum_members_context, enum_members_questions, enum_members_answers + ) ) - enum_member_tuning_pairs.extend(allocate_tuning_pairs(enum_members_pairs)) enum_member_names = enumerate_array_elements( member_type_details.enum_members, attribute="enum_member_name" ) - enum_member_names_pairs = [ - ( - f"List just the names of different members of {enum_member}.", - f"Different members of {enum_member} have the following names: {enum_member_names}.", - ), - ( - f"Can you provide the names of different members of {enum_member}?", - f"Sure, different members of {enum_member} are named as follows: {enum_member_names}.", - ), - ( - f"What are the names of different members of {enum_member}?", - f"The names of different members of {enum_member} are: {enum_member_names}.", - ), - ( - f"I need the names of different members of {enum_member}.", - f"The different members of {enum_member} have these names: {enum_member_names}.", - ), - ( - f"Could you list the names of different members of {enum_member}?", - f"Of course, different members of {enum_member} have these names:" - f" {enum_member_names}.", - ), - ( - f"Show me the names of different members of {enum_member}.", - f"The names of different members of {enum_member} are: {enum_member_names}.", - ), + + enum_member_names_context = ( + f"Names of different members of {enum_member} are as follows: {enum_member_names}." + ) + enum_member_names_questions = [ + f"List just the names of different members of {enum_member}.", + f"Can you provide the names of different members of {enum_member}?", + f"What are the names of different members of {enum_member}?", + f"I need the names of different members of {enum_member}.", + f"Could you list the names of different members of {enum_member}?", + f"Show me the names of different members of {enum_member}.", + ] + enum_member_names_answers = [ + f"Different members of {enum_member} have the following names: {enum_member_names}.", + f"Different members of {enum_member} are named as follows: {enum_member_names}.", + f"The names of different members of {enum_member} are: {enum_member_names}.", + f"Different members of {enum_member} have these names: {enum_member_names}.", ] - enum_member_retrieval_chunks.insert( - -1, f"Names of different members of {enum_member} are as follows: {enum_member_names}." + + enum_member_retrieval_chunks.append(enum_member_names_context) + enum_member_tuning_documents.extend( + allocate_tuning_triplets( + enum_member_names_context, enum_member_names_questions, enum_member_names_answers + ) ) - enum_member_tuning_pairs.extend(allocate_tuning_pairs(enum_member_names_pairs)) enum_member_values = enumerate_array_elements( member_type_details.enum_members, attribute="enum_member_value" ) - enum_member_values_pairs = [ - ( - f"Only show the different values supported by {enum_member}.", - f"{enum_member} supports the following values: {enum_member_values}.", - ), - ( - f"What are the different values that {enum_member} supports?", - f"The different values that {enum_member} supports are: {enum_member_values}.", - ), - ( - f"Can you list the values supported by {enum_member}?", - f"Sure, {enum_member} supports these values: {enum_member_values}.", - ), - ( - f"I need to know the values supported by {enum_member}.", - f"{enum_member} supports these values: {enum_member_values}.", - ), - ( - f"Could you tell me the values that {enum_member} supports?", - f"Of course, the values that {enum_member} supports are: {enum_member_values}.", - ), - ( - f"Please provide the values supported by {enum_member}.", - f"The values supported by {enum_member} are: {enum_member_values}.", - ), + + enum_member_values_context = ( + f"Values of different members of {enum_member} are as follows: {enum_member_values}." + ) + enum_member_values_questions = [ + f"Only show the different values supported by {enum_member}.", + f"What are the different values that {enum_member} supports?", + f"Can you list the values supported by {enum_member}?", + f"I need to know the values supported by {enum_member}.", + f"Could you tell me the values that {enum_member} supports?", + f"Please provide the values supported by {enum_member}.", ] - enum_member_retrieval_chunks.insert( - -1, f"Values of different members of {enum_member} are as follows: {enum_member_values}." + enum_member_values_answers = [ + f"{enum_member} supports the following values: {enum_member_values}.", + f"The different values that {enum_member} supports are: {enum_member_values}.", + f"{enum_member} supports these values: {enum_member_values}.", + f"The values that {enum_member} supports are: {enum_member_values}.", + f"The values supported by {enum_member} are: {enum_member_values}.", + ] + + enum_member_retrieval_chunks.append(enum_member_values_context) + enum_member_tuning_documents.extend( + allocate_tuning_triplets( + enum_member_values_context, enum_member_values_questions, enum_member_values_answers + ) ) - enum_member_tuning_pairs.extend(allocate_tuning_pairs(enum_member_values_pairs)) enum_member_dataset = Dataset( - retrieval_chunks=enum_member_retrieval_chunks, tuning_pairs=enum_member_tuning_pairs + retrieval_chunks=enum_member_retrieval_chunks, + tuning_documents=enum_member_tuning_documents, ) return enum_member_dataset, enum_member_retrieval_chunks @@ -1141,717 +1040,659 @@ def generate_class_member_dataset( # noqa: C901, PLR0912, PLR0915 f"{class_member} is a Python class.", f"{class_member} has following docstring: {class_docstring}.", ] - class_member_tuning_pairs: list[tuple[str, str, SplitName]] = [] + class_member_tuning_documents: list[Document] = [] if not (class_parameters := member_type_details.class_parameters): - class_parameters_pairs = [ - ( - f"What are the different parameters of {class_member}?", - f"{class_member} needs no arguments for instantiation.", - ), - ( - f"Can you tell me the parameters required for {class_member}?", - f"No parameters are required for instantiating {class_member}.", - ), - ( - f"What arguments do I need to instantiate {class_member}?", - f"You don't need any arguments to instantiate {class_member}.", - ), - ( - f"Do I need any parameters to use {class_member}?", - f"{class_member} can be used without any parameters.", - ), - ( - f"What should I pass as arguments when creating an instance of {class_member}?", - "There's no need to pass any arguments" - f" when creating an instance of {class_member}.", - ), - ( - f"Are there any parameters needed for the instantiation of {class_member}?", - f"The instantiation of {class_member} doesn't require any parameters.", - ), - ] - class_member_retrieval_chunks.append( - f"{class_member} requires no arguments for instantiation." - ) - class_member_tuning_pairs.extend(allocate_tuning_pairs(class_parameters_pairs)) + class_parameters_context = f"{class_member} requires no arguments for instantiation." + class_parameters_questions = [ + f"What are the different parameters of {class_member}?", + f"Can you tell me the parameters required for {class_member}?", + f"What arguments do I need to instantiate {class_member}?", + f"Do I need any parameters to use {class_member}?", + f"What should I pass as arguments when creating an instance of {class_member}?", + f"Are there any parameters needed for the instantiation of {class_member}?", + ] + class_parameters_answers = [ + f"{class_member} needs no arguments for instantiation.", + f"No parameters are required for instantiating {class_member}.", + f"Arguments are not needed to instantiate {class_member}.", + f"{class_member} can be used without any parameters.", + f"There's no need to pass any arguments when creating an instance of {class_member}.", + f"The instantiation of {class_member} doesn't require any parameters.", + ] + + class_member_retrieval_chunks.append(class_parameters_context) + class_member_tuning_documents.extend( + allocate_tuning_triplets( + class_parameters_context, class_parameters_questions, class_parameters_answers + ) + ) else: class_parameter_names = enumerate_array_elements( class_parameters, attribute="parameter_details" ) - class_parameters_pairs = [ - ( - f"What are the different parameters of {class_member}?", - f"{class_member} supports these arguments to initiate" - f" a new instance: {class_parameter_names}.", - ), - ( - f"Can you list the parameters for {class_member}?", - f"Sure, {class_member} can be initiated with these arguments:" - f" {class_parameter_names}.", - ), - ( - f"I need to know the parameters of {class_member}.", - f"The parameters to initiate a new instance of {class_member} are:" - f" {class_parameter_names}.", - ), - ( - f"Tell me the parameters that {class_member} supports.", - f"{class_member} can be initiated with these arguments: {class_parameter_names}.", - ), - ( - f"What arguments does {class_member} take for initialisation?", - f"To initialise {class_member}, you can use these arguments:" - f" {class_parameter_names}.", - ), - ] - class_member_retrieval_chunks.append( + + class_parameters_context = ( f"{class_member} requires the following arguments for initialisation:" f" {class_parameter_names}" ) - class_member_tuning_pairs.extend(allocate_tuning_pairs(class_parameters_pairs)) + class_parameters_questions = [ + f"What are the different parameters of {class_member}?", + f"Can you list the parameters for {class_member}?", + f"I need to know the parameters of {class_member}.", + f"Tell me the parameters that {class_member} supports.", + f"What arguments does {class_member} take for initialisation?", + ] + class_parameters_answers = [ + f"{class_member} supports these arguments to initiate" + f" a new instance: {class_parameter_names}.", + f"{class_member} can be initiated with these arguments: {class_parameter_names}.", + f"The parameters to initiate a new instance of {class_member} are:" + f" {class_parameter_names}.", + f"To initialise {class_member}, you can use these arguments: {class_parameter_names}.", + ] + + class_member_retrieval_chunks.append(class_parameters_context) + class_member_tuning_documents.extend( + allocate_tuning_triplets( + class_parameters_context, class_parameters_questions, class_parameters_answers + ) + ) for class_parameter in class_parameters: parameter_name = class_parameter.parameter_name parameter = f"'{parameter_name}' argument in {class_member}" if (parameter_default := class_parameter.parameter_default) is EMPTY_PARAMETER: - class_parameter_defaults_pairs = [ - ( - f"Tell default value of {parameter}.", - f"{parameter} does not have a default value.", - ), - ( - f"What is the default value of {parameter}?", - f"The {parameter} does not have a default value.", - ), - ( - f"Could you inform me about default value of {parameter}?", - f"Sure, the {parameter} does not have a default value.", - ), - ( - f"I need to know the default value of {parameter}. Can you help?", - f"Of course, the {parameter} does not have a default value.", - ), - ( - f"Can you tell me if {parameter} has default value?", - f"No, the {parameter} does not have a default value.", - ), - ( - f"I'm curious about default value of {parameter}.", - f"Well, the {parameter} does not have a default value.", - ), + class_parameter_defaults_context = f"{parameter} does not have a default value." + class_parameter_defaults_questions = [ + f"Tell default value of {parameter}.", + f"What is the default value of {parameter}?", + f"Could you inform me about default value of {parameter}?", + f"I need to know the default value of {parameter}. Can you help?", + f"Can you tell me if {parameter} has default value?", + f"I'm curious about default value of {parameter}.", ] - class_member_retrieval_chunks.append(f"{parameter} does not have a default value.") - class_member_tuning_pairs.extend(allocate_tuning_pairs(class_parameter_defaults_pairs)) - else: - class_parameter_defaults_pairs = [ - ( - f"Tell default value of {parameter}.", - f"{parameter} takes {parameter_default} by default.", - ), - ( - f"What is the default value of {parameter}?", - f"The default value of {parameter} is {parameter_default}.", - ), - ( - f"Could you inform me about default value of {parameter}?", - f"Sure, the default value of {parameter} is {parameter_default}.", - ), - ( - f"I need to know the default value of {parameter}.", - f"The default value of {parameter} is {parameter_default}.", - ), - ( - f"Can you provide default value of {parameter}?", - f"Yes, default value of {parameter} is {parameter_default}.", - ), - ( - f"Please, disclose default value of {parameter}.", - f"Certainly, the default value of {parameter} is {parameter_default}.", - ), + class_parameter_defaults_answers = [ + f"{parameter} does not have a default value.", + f"The {parameter} does not have a default value.", ] - class_member_retrieval_chunks.append( + + class_member_retrieval_chunks.append(class_parameter_defaults_context) + class_member_tuning_documents.extend( + allocate_tuning_triplets( + class_parameter_defaults_context, + class_parameter_defaults_questions, + class_parameter_defaults_answers, + ) + ) + else: + class_parameter_defaults_context = ( f"{parameter_default} is the default value of {parameter}." ) - class_member_tuning_pairs.extend(allocate_tuning_pairs(class_parameter_defaults_pairs)) + class_parameter_defaults_questions = [ + f"Tell default value of {parameter}.", + f"What is the default value of {parameter}?", + f"Could you inform me about default value of {parameter}?", + f"I need to know the default value of {parameter}.", + f"Can you provide default value of {parameter}?", + f"Please, disclose default value of {parameter}.", + ] + class_parameter_defaults_answers = [ + f"{parameter} takes {parameter_default} by default.", + f"Default value of {parameter} is {parameter_default}.", + f"The default value of {parameter} is {parameter_default}.", + ] + + class_member_retrieval_chunks.append(class_parameter_defaults_context) + class_member_tuning_documents.extend( + allocate_tuning_triplets( + class_parameter_defaults_context, + class_parameter_defaults_questions, + class_parameter_defaults_answers, + ) + ) if (parameter_annotation := class_parameter.parameter_annotation) is EMPTY_PARAMETER: - class_parameter_types_pairs = [ - ( - f"Name type hint for {parameter}.", - f"{parameter} does not have a type annotation.", - ), - ( - f"What is the type hint for {parameter}?", - f"There is no type annotation for the {parameter}.", - ), - ( - f"Can you tell me the type hint for {parameter}?", - f"The {parameter} is not annotated with a type.", - ), - ( - f"I'm looking for the type hint for {parameter}. Can you help?", - f"Sure, the {parameter} does not have a type annotation.", - ), - ( - f"Could you provide the type hint for {parameter}?", - f"Unfortunately, {parameter} does not have type annotation.", - ), - ( - f"I need to know the type hint for {parameter}.", - f"The {parameter} does not come with a type annotation.", - ), + class_parameter_types_context = f"Type hint for {parameter} is unavailable." + class_parameter_types_questions = [ + f"Name type hint for {parameter}.", + f"What is the type hint for {parameter}?", + f"Can you tell me the type hint for {parameter}?", + f"I'm looking for the type hint for {parameter}. Can you help?", + f"Could you provide the type hint for {parameter}?", + f"I need to know the type hint for {parameter}.", ] - class_member_retrieval_chunks.append(f"Type hint for {parameter} is unavailable.") - class_member_tuning_pairs.extend(allocate_tuning_pairs(class_parameter_types_pairs)) - else: - class_parameter_types_pairs = [ - ( - f"Name type hint for {parameter}.", - f"{parameter} has '{parameter_annotation}' as type hint.", - ), - ( - f"What is the type hint for {parameter}?", - f"The type hint for {parameter} is '{parameter_annotation}'.", - ), - ( - f"Could you tell me the type hint for {parameter}?", - f"Sure, the type hint for {parameter} is '{parameter_annotation}'.", - ), - ( - f"I need to know the type hint for {parameter}.", - f"The type hint for {parameter} is '{parameter_annotation}'.", - ), - ( - f"Identify the type hint for {parameter}.", - f"The type hint for {parameter} is '{parameter_annotation}'.", - ), - ( - f"Can you specify the type hint for {parameter}?", - f"Yes, the type hint for {parameter} is '{parameter_annotation}'.", - ), + class_parameter_types_answers = [ + f"{parameter} does not have a type annotation.", + f"There is no type annotation for the {parameter}.", + f"The {parameter} is not annotated with a type.", + f"The {parameter} does not have a type annotation.", + f"{parameter} does not have type annotation.", + f"The {parameter} does not come with a type annotation.", ] - class_member_retrieval_chunks.append( + + class_member_retrieval_chunks.append(class_parameter_types_context) + class_member_tuning_documents.extend( + allocate_tuning_triplets( + class_parameter_types_context, + class_parameter_types_questions, + class_parameter_types_answers, + ) + ) + else: + class_parameter_types_context = ( f"{parameter} is annotated as '{parameter_annotation}' type." ) - class_member_tuning_pairs.extend(allocate_tuning_pairs(class_parameter_types_pairs)) + class_parameter_types_questions = [ + f"Name type hint for {parameter}.", + f"What is the type hint for {parameter}?", + f"Could you tell me the type hint for {parameter}?", + f"I need to know the type hint for {parameter}.", + f"Identify the type hint for {parameter}.", + f"Can you specify the type hint for {parameter}?", + ] + class_parameter_types_answers = [ + f"{parameter} has '{parameter_annotation}' as type hint.", + f"The type hint for {parameter} is '{parameter_annotation}'.", + ] + + class_member_retrieval_chunks.append(class_parameter_types_context) + class_member_tuning_documents.extend( + allocate_tuning_triplets( + class_parameter_types_context, + class_parameter_types_questions, + class_parameter_types_answers, + ) + ) if not (parameter_summary := class_parameter.parameter_summary): - class_parameter_summary_pairs = [ - ( - f"What does {parameter} do?", - f"Docstring of {class_member} does not describe '{parameter_name}'.", - ), - ( - f"Can you explain the role of {parameter}?", - f"The docstring of {class_member} does not provide any information about" - f" '{parameter_name}'.", - ), - ( - f"I'm trying to understand what {parameter} does. Can you help?", - f"Unfortunately, the docstring of {class_member} does not mention anything" - f" about '{parameter_name}'.", - ), - ( - f"What is the function of {parameter}?", - f"There is no description of '{parameter_name}' in the docstring of" - f" {class_member}.", - ), - ( - f"Could you tell me what '{parameter_name}' does in {class_member}?", - f"The docstring of {class_member} does not contain any details about" - f" '{parameter_name}'.", - ), - ( - f"I'm curious about the purpose of {parameter}. Can you enlighten me?", - f"I'm sorry, but the docstring of {class_member} does not discuss" - f" '{parameter_name}'.", - ), - ] - class_member_retrieval_chunks.append( + class_parameter_summary_context = ( f"{parameter} lacks any documentation in the docstring." ) - class_member_tuning_pairs.extend(allocate_tuning_pairs(class_parameter_summary_pairs)) - else: - class_parameter_summary_pairs = [ - ( - f"What does {parameter} do?", - f"{class_member} documents role of '{parameter_name}' as follows:" - f" '{parameter_summary}'.", - ), - ( - f"Can you explain the role of {parameter}?", - f"Sure, {class_member} describes '{parameter_name}' as follows:" - f" '{parameter_summary}'.", - ), - ( - f"I'm curious about {parameter}. What does it do?", - f"In {class_member}, '{parameter_name}' is documented as follows:" - f" '{parameter_summary}'.", - ), - ( - f"Could you tell me what {parameter} does?", - f"Of course, {parameter} is described as follows: '{parameter_summary}'.", - ), - ( - f"What's the function of {parameter}?", - f"{class_member} describes the function of '{parameter_name}' as follows:" - f" '{parameter_summary}'.", - ), - ( - f"I'd like to know the purpose of {parameter}.", - f"In {class_member}, the purpose of '{parameter_name}' is defined as follows:" - f" '{parameter_summary}'.", - ), + class_parameter_summary_questions = [ + f"What does {parameter} do?", + f"Can you explain the role of {parameter}?", + f"I'm trying to understand what {parameter} does. Can you help?", + f"What is the function of {parameter}?", + f"Could you tell me what '{parameter_name}' does in {class_member}?", + f"I'm curious about the purpose of {parameter}. Can you enlighten me?", ] - class_member_retrieval_chunks.append( + class_parameter_summary_answers = [ + f"Docstring of {class_member} does not describe '{parameter_name}'.", + f"The docstring of {class_member} does not provide any information about" + f" '{parameter_name}'.", + f"The docstring of {class_member} does not mention anything" + f" about '{parameter_name}'.", + f"There is no description of '{parameter_name}' in the docstring of" + f" {class_member}.", + f"The docstring of {class_member} does not contain any details about" + f" '{parameter_name}'.", + f"The docstring of {class_member} does not discuss '{parameter_name}'.", + ] + + class_member_retrieval_chunks.append(class_parameter_summary_context) + class_member_tuning_documents.extend( + allocate_tuning_triplets( + class_parameter_summary_context, + class_parameter_summary_questions, + class_parameter_summary_answers, + ) + ) + else: + class_parameter_summary_context = ( f"As per docstring, role of {parameter} is: '{parameter_summary}'." ) - class_member_tuning_pairs.extend(allocate_tuning_pairs(class_parameter_summary_pairs)) + class_parameter_summary_questions = [ + f"What does {parameter} do?", + f"Can you explain the role of {parameter}?", + f"I'm curious about {parameter}. What does it do?", + f"Could you tell me what {parameter} does?", + f"What's the function of {parameter}?", + f"I'd like to know the purpose of {parameter}.", + ] + class_parameter_summary_answers = [ + f"{class_member} documents role of '{parameter_name}' as follows:" + f" '{parameter_summary}'.", + f"{class_member} describes '{parameter_name}' as follows: '{parameter_summary}'.", + f"In {class_member}, '{parameter_name}' is documented as follows:" + f" '{parameter_summary}'.", + f"{parameter} is described as follows: '{parameter_summary}'.", + f"{class_member} describes the function of '{parameter_name}' as follows:" + f" '{parameter_summary}'.", + f"In {class_member}, the purpose of '{parameter_name}' is defined as follows:" + f" '{parameter_summary}'.", + ] + + class_member_retrieval_chunks.append(class_parameter_summary_context) + class_member_tuning_documents.extend( + allocate_tuning_triplets( + class_parameter_summary_context, + class_parameter_summary_questions, + class_parameter_summary_answers, + ) + ) if not (class_methods := member_type_details.class_methods): - class_method_names_pairs = [ - ( - f"List names of the public methods of {class_member}.", - f"{class_member} does not have any public methods (not starting with '_').", - ), - ( - f"Can you provide the names of the public methods for {class_member}?", - f"Unfortunately, {class_member} does not have any public methods.", - ), - ( - f"What are the public methods of {class_member}?", - f"There are no public methods (not starting with '_') in {class_member}.", - ), - ( - f"I need to know the public methods of {class_member}. Can you list them?", - f"I'm sorry, but {class_member} does not have any public methods.", - ), - ( - f"Could you list the public methods of {class_member}?", - f"{class_member} does not contain any public methods (not starting with '_').", - ), - ( - f"Show me the public methods of {class_member}.", - f"It appears that {class_member} does not have any public methods.", - ), - ] - class_member_retrieval_chunks.append( + class_method_names_context = ( f"{class_member} has no public (without _ as the prefix) methods." ) - class_member_tuning_pairs.extend(allocate_tuning_pairs(class_method_names_pairs)) + class_method_names_questions = [ + f"List names of the public methods of {class_member}.", + f"Can you provide the names of the public methods for {class_member}?", + f"What are the public methods of {class_member}?", + f"I need to know the public methods of {class_member}. Can you list them?", + f"Could you list the public methods of {class_member}?", + f"Show me the public methods of {class_member}.", + ] + class_method_names_answers = [ + f"{class_member} does not have any public methods (not starting with '_').", + f"{class_member} does not have any public methods.", + f"There are no public methods (not starting with '_') in {class_member}.", + f"{class_member} does not have any public methods.", + f"{class_member} does not contain any public methods (not starting with '_').", + f"It appears that {class_member} does not have any public methods.", + ] + + class_member_retrieval_chunks.append(class_method_names_context) + class_member_tuning_documents.extend( + allocate_tuning_triplets( + class_method_names_context, + class_method_names_questions, + class_method_names_answers, + ) + ) else: class_methods_count = len(class_methods) - class_methods_count_pairs = [ - ( - f"How many public methods does {class_member} have?", - f"{class_member} has {class_methods_count} many public methods.", - ), - ( - f"What is the count of public methods in {class_member}?", - f"The count of public methods in {class_member} is {class_methods_count}.", - ), - ( - f"Could you tell me the number of public methods in {class_member}?", - f"{class_member} has {class_methods_count} public methods.", - ), - ( - f"Please provide the count of public methods for {class_member}.", - f"The number of public methods in {class_member} is {class_methods_count}.", - ), - ( - f"Tell me the quantity of public methods present in {class_member}.", - f"{class_member} has {class_methods_count} public methods.", - ), - ( - f"Would you mind letting me know how many public methods {class_member} contains?", - f"{class_member} contains {class_methods_count} public methods.", - ), - ] - class_member_retrieval_chunks.append( + + class_methods_count_context = ( f"{class_member} has {class_methods_count} many public methods." ) - class_member_tuning_pairs.extend(allocate_tuning_pairs(class_methods_count_pairs)) + class_methods_count_questions = [ + f"How many public methods does {class_member} have?", + f"What is the count of public methods in {class_member}?", + f"Could you tell me the number of public methods in {class_member}?", + f"Please provide the count of public methods for {class_member}.", + f"Tell me the quantity of public methods present in {class_member}.", + f"Would you mind letting me know how many public methods {class_member} contains?", + ] + class_methods_count_answers = [ + f"{class_member} has {class_methods_count} many public methods.", + f"The count of public methods in {class_member} is {class_methods_count}.", + f"{class_member} has {class_methods_count} public methods.", + f"The number of public methods in {class_member} is {class_methods_count}.", + f"{class_member} has {class_methods_count} public methods.", + f"{class_member} contains {class_methods_count} public methods.", + ] + + class_member_retrieval_chunks.append(class_methods_count_context) + class_member_tuning_documents.extend( + allocate_tuning_triplets( + class_methods_count_context, + class_methods_count_questions, + class_methods_count_answers, + ) + ) class_public_methods = enumerate_array_elements(class_methods, attribute="method_name") - class_method_names_pairs = [ - ( - f"List names of the public methods of {class_member}.", - f"Here are the public methods of {class_member}: {class_public_methods}.", - ), - ( - f"Can you provide the names of the public methods for {class_member}?", - f"Sure, the public methods of {class_member} that do not start with '_' are:" - f" {class_public_methods}.", - ), - ( - f"What are the public methods of {class_member}?", - f"The public methods of {class_member} (excluding those starting with '_') are:" - f" {class_public_methods}.", - ), - ( - f"I need to know the public methods of {class_member}.", - f"The public methods of {class_member} (those not starting with '_') are:" - f" {class_public_methods}.", - ), - ( - f"Could you list the public methods of {class_member}?", - f"Of course, the public methods of {class_member} (not beginning with '_') are:" - f" {class_public_methods}.", - ), - ( - f"Please show me the public methods of {class_member}.", - f"Here you go, the public methods of {class_member}" - f" (excluding those with a prefix '_') are: {class_public_methods}.", - ), - ] - class_member_retrieval_chunks.append( + + class_method_names_context = ( f"{class_member} has the following public methods: {class_public_methods}" ) - class_member_tuning_pairs.extend(allocate_tuning_pairs(class_method_names_pairs)) + class_method_names_questions = [ + f"List names of the public methods of {class_member}.", + f"Can you provide the names of the public methods for {class_member}?", + f"What are the public methods of {class_member}?", + f"I need to know the public methods of {class_member}.", + f"Could you list the public methods of {class_member}?", + f"Please show me the public methods of {class_member}.", + ] + class_method_names_answers = [ + f"Here are the public methods of {class_member}: {class_public_methods}.", + f"The public methods of {class_member} that do not start with '_' are:" + f" {class_public_methods}.", + f"The public methods of {class_member} (excluding those starting with '_') are:" + f" {class_public_methods}.", + f"The public methods of {class_member} (those not starting with '_') are:" + f" {class_public_methods}.", + f"The public methods of {class_member} (not beginning with '_') are:" + f" {class_public_methods}.", + f"The public methods of {class_member} (excluding those with a prefix '_') are:" + f" {class_public_methods}.", + ] + + class_member_retrieval_chunks.append(class_method_names_context) + class_member_tuning_documents.extend( + allocate_tuning_triplets( + class_method_names_context, + class_method_names_questions, + class_method_names_answers, + ) + ) for class_method in class_methods: method_name = class_method.method_name method = f"'{method_name}' method of {class_member}" if not (method_parameters := class_method.method_parameters): - class_method_parameters_pairs = [ - (f"What arguments do {method} accept?", f"{method} does not take any parameters."), - ( - f"Can you tell me the parameters that {method} requires?", - f"The {method} does not require any parameters.", - ), - ( - f"What are the inputs for the {method} in {class_member}?", - f"There are no inputs for the {method} in {class_member}.", - ), - ( - f"Does the {method} need any arguments?", - f"No, {method} does not need any arguments.", - ), - ( - f"What parameters should I pass to {method}?", - f"You don't need to pass any parameters to the {method}.", - ), - ( - f"What are required arguments for {method}?", - f"{method} does not require any arguments.", - ), + class_method_parameters_context = f"{method} takes no arguments." + class_method_parameters_questions = [ + f"What arguments do {method} accept?", + f"Can you tell me the parameters that {method} requires?", + f"What are the inputs for the {method} in {class_member}?", + f"Does the {method} need any arguments?", + f"What parameters should I pass to {method}?", + f"What are required arguments for {method}?", + ] + class_method_parameters_answers = [ + f"{method} does not take any parameters.", + f"The {method} does not require any parameters.", + f"There are no inputs for the {method} in {class_member}.", + f"{method} does not need any arguments.", + f"No parameters need to be passed to the {method}.", + f"{method} does not require any arguments.", ] - class_member_retrieval_chunks.append(f"{method} takes no arguments.") - class_member_tuning_pairs.extend(allocate_tuning_pairs(class_method_parameters_pairs)) + + class_member_retrieval_chunks.append(class_method_parameters_context) + class_member_tuning_documents.extend( + allocate_tuning_triplets( + class_method_parameters_context, + class_method_parameters_questions, + class_method_parameters_answers, + ) + ) else: class_method_parameters = enumerate_array_elements(method_parameters) - class_method_parameters_pairs = [ - ( - f"What arguments do {method} accept?", - f"{method} takes the following parameters: {class_method_parameters}.", - ), - ( - f"Can you tell me the parameters that {method} requires?", - f"Sure, {method} requires these parameters: {class_method_parameters}.", - ), - ( - f"I need to know arguments for {method}.", - f"The {method} has these arguments: {class_method_parameters}.", - ), - ( - f"What are the parameters for '{method}'?", - f"The parameters for {method} are: {class_method_parameters}.", - ), - ( - f"Could you list the arguments that the {method} takes?", - f"Certainly, the {method} takes these arguments: {class_method_parameters}.", - ), - ] - class_member_retrieval_chunks.append( + + class_method_parameters_context = ( f"{method} accepts following parameters: {class_method_parameters}" ) - class_member_tuning_pairs.extend(allocate_tuning_pairs(class_method_parameters_pairs)) + class_method_parameters_questions = [ + f"What arguments do {method} accept?", + f"Can you tell me the parameters that {method} requires?", + f"I need to know arguments for {method}.", + f"What are the parameters for '{method}'?", + f"Could you list the arguments that the {method} takes?", + ] + class_method_parameters_answers = [ + f"{method} takes the following parameters: {class_method_parameters}.", + f"{method} requires these parameters: {class_method_parameters}.", + f"The {method} has these arguments: {class_method_parameters}.", + f"The parameters for {method} are: {class_method_parameters}.", + f"The {method} takes these arguments: {class_method_parameters}.", + ] + + class_member_retrieval_chunks.append(class_method_parameters_context) + class_member_tuning_documents.extend( + allocate_tuning_triplets( + class_method_parameters_context, + class_method_parameters_questions, + class_method_parameters_answers, + ) + ) if not (method_summary := class_method.method_summary): - class_method_summary_pairs = [ - (f"What does {method} do?", f"Docstring of {method} is missing."), - ( - f"Can you explain functionality of {method}?", - f"The docstring for {method} is not available.", - ), - ( - f"I'm trying to understand what {method} does. Can you help?", - f"Unfortunately, the docstring for {method} is not provided.", - ), - ( - f"Could you describe the role of {method}?", - f"There is no docstring available for {method}.", - ), - ( - f"I'm not sure what {method} does. Can you clarify?", - f"The {method} lacks a docstring.", - ), - (f"What's the purpose of {method}?", f"The {method} doesn't have a docstring."), + class_method_summary_context = f"Unfortunately, {method} is not documented." + class_method_summary_questions = [ + f"What does {method} do?", + f"Can you explain functionality of {method}?", + f"I'm trying to understand what {method} does. Can you help?", + f"Could you describe the role of {method}?", + f"I'm not sure what {method} does. Can you clarify?", + f"What's the purpose of {method}?", ] - class_member_retrieval_chunks.append(f"Unfortunately, {method} is not documented.") - class_member_tuning_pairs.extend(allocate_tuning_pairs(class_method_summary_pairs)) - else: - class_method_summary_pairs = [ - ( - f"What does {method} do?", - f"Based on method docstring, its role is to '{method_summary}'.", - ), - ( - f"Can you explain the function of {method}?", - f"Sure, according to method docstring, it is designed to '{method_summary}'.", - ), - ( - f"I'm curious about the {method}. What's its purpose?", - f"Well, if we look at the docstring of {method}, we can see that it's meant to" - f" '{method_summary}'.", - ), - ( - f"Could you tell me what the {method} does?", - f"Of course, the docstring of {method} indicates that its function is to" - f" '{method_summary}'.", - ), - ( - f"I'd like to understand role of {method}.", - f"Certainly, method docstring reveals that its job is to '{method_summary}'.", - ), - ( - f"What's the functionality of the {method}?", - f"As per the method docstring, it's designed to '{method_summary}'.", - ), + class_method_summary_answers = [ + f"Docstring of {method} is missing.", + f"The docstring for {method} is not available.", + f"The docstring for {method} is not provided.", + f"There is no docstring available for {method}.", + f"The {method} lacks a docstring.", + f"The {method} doesn't have a docstring.", ] - class_member_retrieval_chunks.append( + + class_member_retrieval_chunks.append(class_method_summary_context) + class_member_tuning_documents.extend( + allocate_tuning_triplets( + class_method_summary_context, + class_method_summary_questions, + class_method_summary_answers, + ) + ) + else: + class_method_summary_context = ( f"Based on docstring, {method} has the purpose of '{method_summary}'." ) - class_member_tuning_pairs.extend(allocate_tuning_pairs(class_method_summary_pairs)) + class_method_summary_questions = [ + f"What does {method} do?", + f"Can you explain the function of {method}?", + f"I'm curious about the {method}. What's its purpose?", + f"Could you tell me what the {method} does?", + f"I'd like to understand role of {method}.", + f"What's the functionality of the {method}?", + ] + class_method_summary_answers = [ + f"Based on method docstring, its role is to '{method_summary}'.", + f"According to method docstring, it is designed to '{method_summary}'.", + f"If we look at the docstring of {method}, we can see that it's meant to" + f" '{method_summary}'.", + f"The docstring of {method} indicates that its function is to '{method_summary}'.", + f"Method docstring reveals that its job is to '{method_summary}'.", + f"As per the method docstring, it's designed to '{method_summary}'.", + ] + + class_member_retrieval_chunks.append(class_method_summary_context) + class_member_tuning_documents.extend( + allocate_tuning_triplets( + class_method_summary_context, + class_method_summary_questions, + class_method_summary_answers, + ) + ) if not (class_attributes := member_type_details.class_attributes): - class_attribute_names_pairs = [ - ( - f"Are there any public attributes of {class_member}?", - f"{class_member} has no public attributes (not starting with '_').", - ), - ( - f"Does {class_member} have any public attributes?", - f"No, {class_member} does not have any public attributes.", - ), - ( - f"Can you tell me if {class_member} has any public attributes?", - f"{class_member} does not have any public attributes (not starting with '_').", - ), - ( - f"I'm looking for public attributes of {class_member}. Are there any?", - f"There are no public attributes (not starting with '_') for {class_member}.", - ), - ( - f"Is it possible to find any public attributes in {class_member}?", - f"It's not possible to find any public attributes in {class_member}.", - ), - ] - class_member_retrieval_chunks.append(f"{class_member} has no public attributes.") - class_member_tuning_pairs.extend(allocate_tuning_pairs(class_attribute_names_pairs)) + class_attribute_names_context = f"{class_member} has no public attributes." + class_attribute_names_questions = [ + f"Are there any public attributes of {class_member}?", + f"Does {class_member} have any public attributes?", + f"Can you tell me if {class_member} has any public attributes?", + f"I'm looking for public attributes of {class_member}. Are there any?", + f"Is it possible to find any public attributes in {class_member}?", + ] + class_attribute_names_answers = [ + f"{class_member} has no public attributes (not starting with '_').", + f"{class_member} does not have any public attributes.", + f"{class_member} does not have any public attributes (not starting with '_').", + f"There are no public attributes (not starting with '_') for {class_member}.", + f"It's not possible to find any public attributes in {class_member}.", + ] + + class_member_retrieval_chunks.append(class_attribute_names_context) + class_member_tuning_documents.extend( + allocate_tuning_triplets( + class_attribute_names_context, + class_attribute_names_questions, + class_attribute_names_answers, + ) + ) else: class_attributes_count = len(class_attributes) - class_attributes_count_pairs = [ - ( - f"How many public attributes does {class_member} have?", - f"{class_member} has {class_attributes_count} many public attributes.", - ), - ( - f"What is the count of public attributes in {class_member}?", - f"The count of public attributes in {class_member} is {class_attributes_count}.", - ), - ( - f"Could you tell me the number of public attributes in {class_member}?", - f"{class_member} has {class_attributes_count} public attributes.", - ), - ( - f"Please provide the count of public attributes for {class_member}.", - f"Number of public attributes in {class_member} is {class_attributes_count}.", - ), - ( - f"Tell me the quantity of public attributes present in {class_member}.", - f"{class_member} has {class_attributes_count} public attributes.", - ), - ( - f"Would you mind letting me know how many public attributes {class_member}" - " contains?", - f"{class_member} contains {class_attributes_count} public attributes.", - ), - ] - class_member_retrieval_chunks.append( + + class_attributes_count_context = ( f"{class_member} has {class_attributes_count} many public attributes." ) - class_member_tuning_pairs.extend(allocate_tuning_pairs(class_attributes_count_pairs)) + class_attributes_count_questions = [ + f"How many public attributes does {class_member} have?", + f"What is the count of public attributes in {class_member}?", + f"Could you tell me the number of public attributes in {class_member}?", + f"Please provide the count of public attributes for {class_member}.", + f"Tell me the quantity of public attributes present in {class_member}.", + f"Would you mind letting me know how many public attributes {class_member} contains?", + ] + class_attributes_count_answers = [ + f"{class_member} has {class_attributes_count} many public attributes.", + f"The count of public attributes in {class_member} is {class_attributes_count}.", + f"{class_member} has {class_attributes_count} public attributes.", + f"Number of public attributes in {class_member} is {class_attributes_count}.", + f"{class_member} has {class_attributes_count} public attributes.", + f"{class_member} contains {class_attributes_count} public attributes.", + ] + + class_member_retrieval_chunks.append(class_attributes_count_context) + class_member_tuning_documents.extend( + allocate_tuning_triplets( + class_attributes_count_context, + class_attributes_count_questions, + class_attributes_count_answers, + ) + ) class_public_attributes = enumerate_array_elements( class_attributes, attribute="attribute_name" ) - class_attribute_names_pairs = [ - ( - f"Are there any public attributes of {class_member}?", - f"These are the public attributes of {class_member}: {class_public_attributes}.", - ), - ( - f"Can you list the public attributes of {class_member}?", - f"{class_member} has the following public attributes (not starting with '_'):" - f" {class_public_attributes}.", - ), - ( - f"What are the public attributes of {class_member}?", - f"The public attributes of {class_member} (those not starting with '_') are:" - f" {class_public_attributes}.", - ), - ( - f"I need to know the public attributes of {class_member}.", - f"Sure, the public attributes of {class_member} are: {class_public_attributes}.", - ), - ( - f"Could you tell me the public attributes of {class_member}?", - f"Of course, public attributes of {class_member} (not starting with '_') are:" - f" {class_public_attributes}.", - ), - ] - class_member_retrieval_chunks.append( + + class_attribute_names_context = ( f"{class_member} has following public attributes: {class_public_attributes}" ) - class_member_tuning_pairs.extend(allocate_tuning_pairs(class_attribute_names_pairs)) + class_attribute_names_questions = [ + f"Are there any public attributes of {class_member}?", + f"Can you list the public attributes of {class_member}?", + f"What are the public attributes of {class_member}?", + f"I need to know the public attributes of {class_member}.", + f"Could you tell me the public attributes of {class_member}?", + ] + class_attribute_names_answers = [ + f"These are the public attributes of {class_member}: {class_public_attributes}.", + f"{class_member} has the following public attributes (not starting with '_'):" + f" {class_public_attributes}.", + f"The public attributes of {class_member} (those not starting with '_') are:" + f" {class_public_attributes}.", + f"The public attributes of {class_member} are: {class_public_attributes}.", + f"Public attributes of {class_member} (not starting with '_') are:" + f" {class_public_attributes}.", + ] + + class_member_retrieval_chunks.append(class_attribute_names_context) + class_member_tuning_documents.extend( + allocate_tuning_triplets( + class_attribute_names_context, + class_attribute_names_questions, + class_attribute_names_answers, + ) + ) if not (class_summary := member_type_details.class_summary): - class_summary_pairs = [ - ( - f"What does {class_member} do in short?", - f"Docstring of {class_member} lacks a summary of its objective.", - ), - ( - f"Can you briefly explain the function of {class_member}?", - f"Docstring of {class_member} doesn't provide a concise summary of its purpose.", - ), - ( - f"Could you tell me what {class_member} is used for?", - f"Unfortunately, the docstring of {class_member} doesn't contain" - " a brief description of its function.", - ), - ( - f"I'm not sure what {class_member} does. Can you clarify?", - f"The docstring of {class_member} doesn't succinctly explain its role.", - ), - ( - f"What's the purpose of {class_member}?", - f"Docstring of {class_member} doesn't have any explanation of its objective.", - ), - ] - class_member_retrieval_chunks.append( - f"Unfortunately, {class_member} does not document its objective." - ) - class_member_tuning_pairs.extend(allocate_tuning_pairs(class_summary_pairs)) + class_summary_context = f"Unfortunately, {class_member} does not document its objective." + class_summary_questions = [ + f"What does {class_member} do in short?", + f"Can you briefly explain the function of {class_member}?", + f"Could you tell me what {class_member} is used for?", + f"I'm not sure what {class_member} does. Can you clarify?", + f"What's the purpose of {class_member}?", + ] + class_summary_answers = [ + f"Docstring of {class_member} lacks a summary of its objective.", + f"Docstring of {class_member} doesn't provide a concise summary of its purpose.", + f"The docstring of {class_member} doesn't contain" + " a brief description of its function.", + f"The docstring of {class_member} doesn't succinctly explain its role.", + f"Docstring of {class_member} doesn't have any explanation of its objective.", + ] + + class_member_retrieval_chunks.append(class_summary_context) + class_member_tuning_documents.extend( + allocate_tuning_triplets( + class_summary_context, class_summary_questions, class_summary_answers + ) + ) else: - class_summary_pairs = [ - ( - f"What does {class_member} do in short?", - f"Based on documentation, objective of {class_member} is to: '{class_summary}'.", - ), - ( - f"Can you briefly explain the function of {class_member}?", - f"Sure, according to the documentation, {class_member} is designed to:" - f" '{class_summary}'.", - ), - ( - f"I'm curious about {class_member}, what's its purpose?", - f"Well, as per the documentation, {class_member} aims to: '{class_summary}'.", - ), - ( - f"Could you give me a quick rundown on what {class_member} does?", - f"Absolutely, the documentation states that the role of {class_member} is to:" - f" '{class_summary}'.", - ), - ( - f"What's the role of {class_member} in a nutshell?", - f"The documentation indicates that the purpose of {class_member} is to:" - f" '{class_summary}'.", - ), - ( - f"Can you summarise the function of {class_member}?", - f"Of course, the documentation outlines that {class_member} is intended to:" - f" '{class_summary}'.", - ), - ] - class_member_retrieval_chunks.append( + class_summary_context = ( f"{class_member} documents its purpose as follows: '{class_summary}'." ) - class_member_tuning_pairs.extend(allocate_tuning_pairs(class_summary_pairs)) + class_summary_questions = [ + f"What does {class_member} do in short?", + f"Can you briefly explain the function of {class_member}?", + f"I'm curious about {class_member}, what's its purpose?", + f"Could you give me a quick rundown on what {class_member} does?", + f"What's the role of {class_member} in a nutshell?", + f"Can you summarise the function of {class_member}?", + ] + class_summary_answers = [ + f"Based on documentation, objective of {class_member} is to: '{class_summary}'.", + f"According to the documentation, {class_member} is designed to: '{class_summary}'.", + f"As per the documentation, {class_member} aims to: '{class_summary}'.", + f"The documentation states that the role of {class_member} is to: '{class_summary}'.", + f"The documentation indicates that the purpose of {class_member} is to:" + f" '{class_summary}'.", + f"The documentation outlines that {class_member} is intended to: '{class_summary}'.", + ] + + class_member_retrieval_chunks.append(class_summary_context) + class_member_tuning_documents.extend( + allocate_tuning_triplets( + class_summary_context, class_summary_questions, class_summary_answers + ) + ) if not (class_notes := member_type_details.class_notes): - class_notes_pairs = [ - ( - f"Mention any specific details for {class_member} to be aware of.", - f"Docstring of {class_member} does not note on specific details.", - ), - ( - f"What are the specific details to be aware of for {class_member}?", - f"There are no specific details noted in the docstring of {class_member}.", - ), - ( - f"Could you tell me any specifics for {class_member} that I should be aware of?", - f"The docstring of {class_member} doesn't highlight any details.", - ), - ( - f"Are there any specific details for {class_member} that I need to know?", - f"No specific details are mentioned in the docstring of {class_member}.", - ), - ( - f"I need to know the specific details for {class_member}. Can you provide them?", - f"Unfortunately, the docstring of {class_member} does not contain any details.", - ), - ( - f"Can you specify any details for {class_member} that I should be aware of?", - f"The docstring of {class_member} does not specify any details to be aware of.", - ), - ] - class_member_retrieval_chunks.append( + class_notes_context = ( f"Docstring of {class_member} has contains no specific implementation details." ) - class_member_tuning_pairs.extend(allocate_tuning_pairs(class_notes_pairs)) + class_notes_questions = [ + f"Mention any specific details for {class_member} to be aware of.", + f"What are the specific details to be aware of for {class_member}?", + f"Could you tell me any specifics for {class_member} that I should be aware of?", + f"Are there any specific details for {class_member} that I need to know?", + f"I need to know the specific details for {class_member}. Can you provide them?", + f"Can you specify any details for {class_member} that I should be aware of?", + ] + class_notes_answers = [ + f"Docstring of {class_member} does not note on specific details.", + f"There are no specific details noted in the docstring of {class_member}.", + f"The docstring of {class_member} doesn't highlight any details.", + f"No specific details are mentioned in the docstring of {class_member}.", + f"The docstring of {class_member} does not contain any details.", + f"The docstring of {class_member} does not specify any details to be aware of.", + ] + + class_member_retrieval_chunks.append(class_notes_context) + class_member_tuning_documents.extend( + allocate_tuning_triplets( + class_notes_context, class_notes_questions, class_notes_answers + ) + ) else: - class_notes_pairs = [ - ( - f"Mention any specific details for {class_member} to be aware of.", - f"The {class_member} docstring highlights the following: '{class_notes}'.", - ), - ( - f"What are specifics that I should be aware of before using {class_member}?", - f"The details you should know to use {class_member} are highlighted in docstring:" - f" '{class_notes}'.", - ), - ( - f"Could you specify the details for {class_member} to take note of?", - f"Sure, the docstring for {class_member} specifies the following details:" - f" '{class_notes}'.", - ), - ( - f"Can you list the details for {class_member} to keep in mind?", - f"Certainly, the docstring for {class_member} lists the following details:" - f" '{class_notes}'.", - ), - ( - f"What should users of {class_member} be mindful of?", - f"The docstring for {class_member} mentions the following points to be mindful of:" - f" '{class_notes}'.", - ), - ( - f"What details does the user of {class_member} need to know?", - f"User of {class_member} needs to know the following details: '{class_notes}'.", - ), - ] - class_member_retrieval_chunks.append( + class_notes_context = ( f"In docstring, {class_member} specifies the following: '{class_notes}'." ) - class_member_tuning_pairs.extend(allocate_tuning_pairs(class_notes_pairs)) + class_notes_questions = [ + f"Mention any specific details for {class_member} to be aware of.", + f"What are specifics that I should be aware of before using {class_member}?", + f"Could you specify the details for {class_member} to take note of?", + f"Can you list the details for {class_member} to keep in mind?", + f"What should users of {class_member} be mindful of?", + f"What details does the user of {class_member} need to know?", + ] + class_notes_answers = [ + f"The {class_member} docstring highlights the following: '{class_notes}'.", + f"The details you should know to use {class_member} are highlighted in docstring:" + f" '{class_notes}'.", + f"The docstring for {class_member} specifies the following details: '{class_notes}'.", + f"The docstring for {class_member} lists the following details: '{class_notes}'.", + f"The docstring for {class_member} mentions the following points to be mindful of:" + f" '{class_notes}'.", + f"User of {class_member} needs to know the following details: '{class_notes}'.", + ] + + class_member_retrieval_chunks.append(class_notes_context) + class_member_tuning_documents.extend( + allocate_tuning_triplets( + class_notes_context, class_notes_questions, class_notes_answers + ) + ) class_member_dataset = Dataset( - retrieval_chunks=class_member_retrieval_chunks[:2], tuning_pairs=class_member_tuning_pairs + retrieval_chunks=class_member_retrieval_chunks[:2], + tuning_documents=class_member_tuning_documents, ) return class_member_dataset, class_member_retrieval_chunks @@ -1883,837 +1724,747 @@ def generate_function_member_dataset( # noqa: C901, PLR0912, PLR0915 f"{function_member} is a Python function.", f"{function_member} has following docstring: {function_docstring}.", ] - function_member_tuning_pairs: list[tuple[str, str, SplitName]] = [] + function_member_tuning_documents: list[Document] = [] if not (function_parameters := member_type_details.function_parameters): - function_parameters_pairs = [ - ( - f"List various parameters of {function_member}.", - f"{function_member} does not take any parameters.", - ), - ( - f"What are the parameters of {function_member}?", - f"{function_member} has no parameters.", - ), - ( - f"Could you tell me the parameters that {function_member} takes?", - f"{function_member} doesn't require any parameters.", - ), - ( - f"I need to know the parameters for {function_member}.", - f"There are no parameters for {function_member}.", - ), - ( - f"Can you list the parameters for {function_member}?", - f"Actually, {function_member} doesn't have any parameters.", - ), - ( - f"Please provide the parameters of {function_member}.", - f"Sorry, but {function_member} does not have any parameters.", - ), - ] - function_member_retrieval_chunks.append(f"{function_member} takes no parameters.") - function_member_tuning_pairs.extend(allocate_tuning_pairs(function_parameters_pairs)) + function_parameters_context = f"{function_member} takes no parameters." + function_parameters_questions = [ + f"List various parameters of {function_member}.", + f"What are the parameters of {function_member}?", + f"Could you tell me the parameters that {function_member} takes?", + f"I need to know the parameters for {function_member}.", + f"Can you list the parameters for {function_member}?", + f"Please provide the parameters of {function_member}.", + ] + function_parameters_answers = [ + f"{function_member} does not take any parameters.", + f"{function_member} has no parameters.", + f"{function_member} doesn't require any parameters.", + f"There are no parameters for {function_member}.", + f"Actually, {function_member} doesn't have any parameters.", + f"Sorry, but {function_member} does not have any parameters.", + ] + + function_member_retrieval_chunks.append(function_parameters_context) + function_member_tuning_documents.extend( + allocate_tuning_triplets( + function_parameters_context, + function_parameters_questions, + function_parameters_answers, + ) + ) else: function_parameter_names = enumerate_array_elements( function_parameters, attribute="parameter_details" ) - function_parameters_pairs = [ - ( - f"List various parameters of {function_member}.", - f"Different parameters of {function_member} are as follows:" - f" {function_parameter_names}.", - ), - ( - f"What are the different parameters of {function_member}?", - f"{function_member} has the following parameters: {function_parameter_names}.", - ), - ( - f"Could you tell me the parameters of {function_member}?", - f"Sure, the parameters of {function_member} are: {function_parameter_names}.", - ), - ( - f"I need to know the parameters of {function_member}.", - f"The parameters of {function_member} are: {function_parameter_names}.", - ), - ( - f"Can you list the parameters for {function_member}?", - f"Yes, the parameters for {function_member} are: {function_parameter_names}.", - ), - ( - f"Please provide the parameters of {function_member}.", - f"Parameters of {function_member} are as follows: {function_parameter_names}.", - ), - ] - function_member_retrieval_chunks.append( + + function_parameters_context = ( f"{function_member} takes the following parameters: {function_parameter_names}" ) - function_member_tuning_pairs.extend(allocate_tuning_pairs(function_parameters_pairs)) + function_parameters_questions = [ + f"List various parameters of {function_member}.", + f"What are the different parameters of {function_member}?", + f"Could you tell me the parameters of {function_member}?", + f"I need to know the parameters of {function_member}.", + f"Can you list the parameters for {function_member}?", + f"Please provide the parameters of {function_member}.", + ] + function_parameters_answers = [ + f"Different parameters of {function_member} are as follows:" + f" {function_parameter_names}.", + f"{function_member} has the following parameters: {function_parameter_names}.", + f"The parameters of {function_member} are: {function_parameter_names}.", + f"Yes, the parameters for {function_member} are: {function_parameter_names}.", + f"Parameters of {function_member} are as follows: {function_parameter_names}.", + ] + + function_member_retrieval_chunks.append(function_parameters_context) + function_member_tuning_documents.extend( + allocate_tuning_triplets( + function_parameters_context, + function_parameters_questions, + function_parameters_answers, + ) + ) for function_parameter in function_parameters: parameter_name = function_parameter.parameter_name parameter = f"'{parameter_name}' argument in {function_member}" if (parameter_default := function_parameter.parameter_default) is EMPTY_PARAMETER: - function_parameter_defaults_pairs = [ - (f"Default value of {parameter}?", f"{parameter} does not have a default value."), - ( - f"What is the default value for {parameter}?", - f"The {parameter} does not come with a default value.", - ), - ( - f"Could you tell me default value of {parameter}?", - f"Sure, the {parameter} does not possess a default value.", - ), - ( - f"I'm curious about default value of {parameter}.", - f"In response to your curiosity, {parameter} is not assigned a default value.", - ), - ( - f"I'd like to know the default value of {parameter}.", - f"To answer your query, {parameter} does not hold a default value.", - ), - ( - f"Can you inform me about the default value of {parameter}?", - f"Certainly, {parameter} does not contain a default value.", - ), + function_parameter_defaults_context = f"{parameter} has no default value." + function_parameter_defaults_questions = [ + f"Default value of {parameter}?", + f"What is the default value for {parameter}?", + f"Could you tell me default value of {parameter}?", + f"I'm curious about default value of {parameter}.", + f"I'd like to know the default value of {parameter}.", + f"Can you inform me about the default value of {parameter}?", + ] + function_parameter_defaults_answers = [ + f"{parameter} does not have a default value." + f"The {parameter} does not come with a default value.", + f"The {parameter} does not possess a default value.", + f"In response to your curiosity, {parameter} is not assigned a default value.", + f"To answer your query, {parameter} does not hold a default value.", + f"{parameter} does not contain a default value.", ] - function_member_retrieval_chunks.append(f"{parameter} has no default value.") - function_member_tuning_pairs.extend( - allocate_tuning_pairs(function_parameter_defaults_pairs) + + function_member_retrieval_chunks.append(function_parameter_defaults_context) + function_member_tuning_documents.extend( + allocate_tuning_triplets( + function_parameter_defaults_context, + function_parameter_defaults_questions, + function_parameter_defaults_answers, + ) ) else: - function_parameter_defaults_pairs = [ - ( - f"Default value of {parameter}?", - f"{parameter} has default value of {parameter_default}.", - ), - ( - f"What is the default value for {parameter}?", - f"The default value for {parameter} is {parameter_default}.", - ), - ( - f"Could you tell me default value of {parameter}?", - f"Sure, the default value of {parameter} is {parameter_default}.", - ), - ( - f"I would like to know the default value of {parameter}.", - f"The {parameter} has a default value of {parameter_default}.", - ), - ( - f"Can you inform me about the default value of {parameter}?", - f"Of course, the {parameter} defaults to {parameter_default}.", - ), - ( - f"I'm interested in default value of {parameter}.", - f"The default value of the {parameter} is {parameter_default}.", - ), - ] - function_member_retrieval_chunks.append( + function_parameter_defaults_context = ( f"{parameter} has the default value of {parameter_default}." ) - function_member_tuning_pairs.extend( - allocate_tuning_pairs(function_parameter_defaults_pairs) + function_parameter_defaults_questions = [ + f"Default value of {parameter}?", + f"What is the default value for {parameter}?", + f"Could you tell me default value of {parameter}?", + f"I would like to know the default value of {parameter}.", + f"Can you inform me about the default value of {parameter}?", + f"I'm interested in default value of {parameter}.", + ] + function_parameter_defaults_answers = [ + f"{parameter} has default value of {parameter_default}.", + f"The default value for {parameter} is {parameter_default}.", + f"The default value of {parameter} is {parameter_default}.", + f"The {parameter} has a default value of {parameter_default}.", + f"The {parameter} defaults to {parameter_default}.", + f"The default value of the {parameter} is {parameter_default}.", + ] + + function_member_retrieval_chunks.append(function_parameter_defaults_context) + function_member_tuning_documents.extend( + allocate_tuning_triplets( + function_parameter_defaults_context, + function_parameter_defaults_questions, + function_parameter_defaults_answers, + ) ) if (parameter_annotation := function_parameter.parameter_annotation) is EMPTY_PARAMETER: - function_parameter_types_pairs = [ - ( - f"What is type annotation of {parameter}?", - f"{parameter} does not have a type annotation.", - ), - ( - f"Can you tell me type annotation of {parameter}?", - f"The {parameter} does not have a type annotation.", - ), - ( - f"I'm curious about the type annotation of {parameter}." - " Can you provide some information?", - f"Sure, the {parameter} does not have a type annotation.", - ), - ( - f"Do you have any information on the type annotation of {parameter}?", - f"Yes, the {parameter} does not have a type annotation.", - ), - ( - f"Could you inform me about the type annotation of {parameter}?", - f"Certainly, {parameter} does not have a type annotation.", - ), - ( - f"I'd like to know the type annotation of {parameter}.", - f"The {parameter} you're asking about does not have a type annotation.", - ), - ] - function_member_retrieval_chunks.append( + function_parameter_types_context = ( f"Unfortunately, type hint for {parameter} is missing." ) - function_member_tuning_pairs.extend( - allocate_tuning_pairs(function_parameter_types_pairs) + function_parameter_types_questions = [ + f"What is type annotation of {parameter}?", + f"Can you tell me type annotation of {parameter}?", + f"I'm curious about the type annotation of {parameter}." + " Can you provide some information?", + f"Do you have any information on the type annotation of {parameter}?", + f"Could you inform me about the type annotation of {parameter}?", + f"I'd like to know the type annotation of {parameter}.", + ] + function_parameter_types_answers = [ + f"{parameter} does not have a type annotation.", + f"The {parameter} does not have a type annotation.", + f"{parameter} does not have a type annotation.", + f"The {parameter} you're asking about does not have a type annotation.", + ] + + function_member_retrieval_chunks.append(function_parameter_types_context) + function_member_tuning_documents.extend( + allocate_tuning_triplets( + function_parameter_types_context, + function_parameter_types_questions, + function_parameter_types_answers, + ) ) else: - function_parameter_types_pairs = [ - ( - f"What is type annotation of {parameter}?", - f"Type annotation of {parameter} is '{parameter_annotation}'.", - ), - ( - f"Can you tell me type annotation of {parameter}?", - f"Sure, the type annotation of {parameter} is '{parameter_annotation}'.", - ), - ( - f"I'm curious about the type annotation of {parameter}. What is it?", - f"The type annotation of {parameter} is '{parameter_annotation}'.", - ), - ( - f"Do you know type annotation of {parameter}?", - f"Yes, the type annotation of {parameter} is '{parameter_annotation}'.", - ), - ( - f"Could you inform me about the type annotation of {parameter}?", - f"Of course, the type annotation of {parameter} is '{parameter_annotation}'.", - ), - ( - f"What's the type annotation for {parameter}?", - f"The type annotation for {parameter} is '{parameter_annotation}'.", - ), - ] - function_member_retrieval_chunks.append( + function_parameter_types_context = ( f"{parameter} has '{parameter_annotation}' as type annotation." ) - function_member_tuning_pairs.extend( - allocate_tuning_pairs(function_parameter_types_pairs) + function_parameter_types_questions = [ + f"What is type annotation of {parameter}?", + f"Can you tell me type annotation of {parameter}?", + f"I'm curious about the type annotation of {parameter}. What is it?", + f"Do you know type annotation of {parameter}?", + f"Could you inform me about the type annotation of {parameter}?", + f"What's the type annotation for {parameter}?", + ] + function_parameter_types_answers = [ + f"Type annotation of {parameter} is '{parameter_annotation}'.", + f"The type annotation of {parameter} is '{parameter_annotation}'.", + f"The type annotation for {parameter} is '{parameter_annotation}'.", + ] + + function_member_retrieval_chunks.append(function_parameter_types_context) + function_member_tuning_documents.extend( + allocate_tuning_triplets( + function_parameter_types_context, + function_parameter_types_questions, + function_parameter_types_answers, + ) ) if not (parameter_summary := function_parameter.parameter_summary): - function_parameter_summary_pairs = [ - ( - f"What is {parameter} for?", - f"Docstring of {function_member} lacks a description for '{parameter_name}'.", - ), - ( - f"Can you explain the purpose of {parameter}?", - f"The docstring of {function_member} doesn't provide a description.", - ), - ( - f"I'm not sure what {parameter} does. Can you help?", - f"Unfortunately, the docstring of {function_member} doesn't include" - " a description.", - ), - ( - f"Could you clarify the role of {parameter}?", - f"The description is missing in the docstring of {function_member}.", - ), - ( - f"I'm confused about the {parameter}. What does it do?", - f"The docstring of {function_member} doesn't contain a description.", - ), - ( - f"What does {parameter} do?", - f"There's no description in the docstring of {function_member}.", - ), + function_parameter_summary_context = f"{parameter} is not documented in the docstring." + function_parameter_summary_questions = [ + f"What is {parameter} for?", + f"Can you explain the purpose of {parameter}?", + f"I'm not sure what {parameter} does. Can you help?", + f"Could you clarify the role of {parameter}?", + f"I'm confused about the {parameter}. What does it do?", + f"What does {parameter} do?", ] - function_member_retrieval_chunks.append( - f"{parameter} is not documented in the docstring." - ) - function_member_tuning_pairs.extend( - allocate_tuning_pairs(function_parameter_summary_pairs) + function_parameter_summary_answers = [ + f"Docstring of {function_member} lacks a description for '{parameter_name}'.", + f"The docstring of {function_member} doesn't provide a description.", + f"Unfortunately, the docstring of {function_member} doesn't include" + " a description.", + f"The description is missing in the docstring of {function_member}.", + f"The docstring of {function_member} doesn't contain a description.", + f"There's no description in the docstring of {function_member}.", + ] + + function_member_retrieval_chunks.append(function_parameter_summary_context) + function_member_tuning_documents.extend( + allocate_tuning_triplets( + function_parameter_summary_context, + function_parameter_summary_questions, + function_parameter_summary_answers, + ) ) else: - function_parameter_summary_pairs = [ - ( - f"What is {parameter} for?", - f"Based on {function_member} docstring, its role is '{parameter_summary}'.", - ), - ( - f"Can you explain the role of {parameter}?", - f"Sure, according to the docstring of {function_member}," - f" '{parameter_name}' is used for '{parameter_summary}'.", - ), - ( - f"I'm curious about the {parameter}. What does it do?", - f"Well, if you look at the docstring of {function_member}, you'll see that" - f" '{parameter_name}' is responsible for '{parameter_summary}'.", - ), - ( - f"Could you tell me the purpose of {parameter}?", - f"Of course, the docstring of {function_member} indicates that" - f" '{parameter_name}' serves the purpose of '{parameter_summary}'.", - ), - ( - f"What's the function of {parameter}?", - f"As per the docstring of {function_member}, '{parameter_name}' functions as:" - f" '{parameter_summary}'.", - ), - ( - f"I'd like to know what '{parameter_name}' does in {function_member}.", - f"Sure thing, the docstring of {function_member} states that" - f" '{parameter_name}' does '{parameter_summary}'.", - ), - ] - function_member_retrieval_chunks.append( + function_parameter_summary_context = ( f"In the docstring, {parameter} is described as '{parameter_summary}'." ) - function_member_tuning_pairs.extend( - allocate_tuning_pairs(function_parameter_summary_pairs) + function_parameter_summary_questions = [ + f"What is {parameter} for?", + f"Can you explain the role of {parameter}?", + f"I'm curious about the {parameter}. What does it do?", + f"Could you tell me the purpose of {parameter}?", + f"What's the function of {parameter}?", + f"I'd like to know what '{parameter_name}' does in {function_member}.", + ] + function_parameter_summary_answers = [ + f"Based on {function_member} docstring, its role is '{parameter_summary}'.", + f"According to the docstring of {function_member}," + f"'{parameter_name}' is used for '{parameter_summary}'.", + f"If you look at the docstring of {function_member}, you'll see that" + f" '{parameter_name}' is responsible for '{parameter_summary}'.", + f"The docstring of {function_member} indicates that" + f" '{parameter_name}' serves the purpose of '{parameter_summary}'.", + f"As per the docstring of {function_member}, '{parameter_name}' functions as:" + f" '{parameter_summary}'.", + f"The docstring of {function_member} states that" + f" '{parameter_name}' does '{parameter_summary}'.", + ] + + function_member_retrieval_chunks.append(function_parameter_summary_context) + function_member_tuning_documents.extend( + allocate_tuning_triplets( + function_parameter_summary_context, + function_parameter_summary_questions, + function_parameter_summary_answers, + ) ) if ( returns_annotation := member_type_details.function_returns.returns_annotation ) is EMPTY_SIGNATURE: - function_return_type_pairs = [ - ( - f"What is the return type annotation of {function_member}?", - f"{function_member} lacks a return type annotation. It may still return though.", - ), - ( - f"Can you tell me the return type annotation of {function_member}?", - f"The function {function_member} does not have a return type annotation." - " However, it may still return.", - ), - ( - f"I'm curious about return type annotation of {function_member}. What is it?", - f"Well, {function_member} doesn't have a return type annotation." - " But, it could still return.", - ), - ( - f"Do you know the return type annotation of {function_member}?", - f"Actually, {function_member} doesn't come with a return type annotation." - " It's possible that it still returns though.", - ), - ( - f"Could you inform me about the return type annotation of {function_member}?", - f"Sure, {function_member} is missing a return type annotation." - " It might still return though.", - ), - ( - f"What's the return type annotation for {function_member}?", - f"It appears that {function_member} is without a return type annotation." - " It may still have a return.", - ), - ] - function_member_retrieval_chunks.append( + function_return_type_context = ( f"{function_member} has no return annotation, but its return can still be non-null." ) - function_member_tuning_pairs.extend(allocate_tuning_pairs(function_return_type_pairs)) + function_return_type_questions = [ + f"What is the return type annotation of {function_member}?", + f"Can you tell me the return type annotation of {function_member}?", + f"I'm curious about return type annotation of {function_member}. What is it?", + f"Do you know the return type annotation of {function_member}?", + f"Could you inform me about the return type annotation of {function_member}?", + f"What's the return type annotation for {function_member}?", + ] + function_return_type_answers = [ + f"{function_member} lacks a return type annotation. It may still return though.", + f"The function {function_member} does not have a return type annotation." + " However, it may still return.", + f"{function_member} doesn't have a return type annotation." + " But, it could still return.", + f"Actually, {function_member} doesn't come with a return type annotation." + " It's possible that it still returns though.", + f"Sure, {function_member} is missing a return type annotation." + " It might still return though.", + f"It appears that {function_member} is without a return type annotation." + " It may still have a return.", + ] + + function_member_retrieval_chunks.append(function_return_type_context) + function_member_tuning_documents.extend( + allocate_tuning_triplets( + function_return_type_context, + function_return_type_questions, + function_return_type_answers, + ) + ) else: - function_return_type_pairs = [ - ( - f"What is the return type annotation of {function_member}?", - f"Return type annotation for {function_member} is '{returns_annotation}'.", - ), - ( - f"Can you tell me the return type annotation of {function_member}?", - f"Sure, return type annotation for {function_member} is '{returns_annotation}'.", - ), - ( - f"I need to know the return type annotation of {function_member}.", - f"The return type annotation for {function_member} is '{returns_annotation}'.", - ), - ( - f"Do you know the return type annotation of {function_member}?", - f"Yes, return type annotation for {function_member} is '{returns_annotation}'.", - ), - ( - f"Could you inform me about the return type annotation of {function_member}?", - f"Of course, the return type for {function_member} is '{returns_annotation}'.", - ), - ( - f"I'm curious about the return type annotation of {function_member}.", - f"The return type annotation for {function_member} is '{returns_annotation}'.", - ), - ] - function_member_retrieval_chunks.append( + function_return_type_context = ( f"Return of {function_member} is annotated as '{returns_annotation}'." ) - function_member_tuning_pairs.extend(allocate_tuning_pairs(function_return_type_pairs)) + function_return_type_questions = [ + f"What is the return type annotation of {function_member}?", + f"Can you tell me the return type annotation of {function_member}?", + f"I need to know the return type annotation of {function_member}.", + f"Do you know the return type annotation of {function_member}?", + f"Could you inform me about the return type annotation of {function_member}?", + f"I'm curious about the return type annotation of {function_member}.", + ] + function_return_type_answers = [ + f"Return type annotation for {function_member} is '{returns_annotation}'.", + f"The return type annotation for {function_member} is '{returns_annotation}'.", + f"The return type for {function_member} is '{returns_annotation}'.", + ] + + function_member_retrieval_chunks.append(function_return_type_context) + function_member_tuning_documents.extend( + allocate_tuning_triplets( + function_return_type_context, + function_return_type_questions, + function_return_type_answers, + ) + ) if not (returns_summary := member_type_details.function_returns.returns_summary): - function_return_summary_pairs = [ - ( - f"What does {function_member} return?", - f"Docstring of {function_member} does not describe its return.", - ), - ( - f"Can you tell me what {function_member} returns?", - f"Docstring of {function_member} doesn't provide information about its return.", - ), - ( - f"Do you know the return of {function_member}?", - f"Unfortunately, docstring of {function_member} doesn't specify what it returns.", - ), - ( - f"I'm curious about what {function_member} returns. Can you help?", - f"I'm sorry, but the docstring of {function_member} doesn't clarify its return.", - ), - ( - f"What's the return of {function_member}?", - f"The return of {function_member} is not described in its docstring.", - ), - ( - f"Could you inform me about the return of {function_member}?", - f"Regrettably, the docstring of {function_member} doesn't detail its return.", - ), - ] - function_member_retrieval_chunks.append(f"{function_member} does not document its return.") - function_member_tuning_pairs.extend(allocate_tuning_pairs(function_return_summary_pairs)) + function_return_summary_context = f"{function_member} does not document its return." + function_return_summary_questions = [ + f"What does {function_member} return?", + f"Can you tell me what {function_member} returns?", + f"Do you know the return of {function_member}?", + f"I'm curious about what {function_member} returns. Can you help?", + f"What's the return of {function_member}?", + f"Could you inform me about the return of {function_member}?", + ] + function_return_summary_answers = [ + f"Docstring of {function_member} does not describe its return.", + f"Docstring of {function_member} doesn't provide information about its return.", + f"Docstring of {function_member} doesn't specify what it returns.", + f"The docstring of {function_member} doesn't clarify its return.", + f"The return of {function_member} is not described in its docstring.", + f"The docstring of {function_member} doesn't detail its return.", + ] + + function_member_retrieval_chunks.append(function_return_summary_context) + function_member_tuning_documents.extend( + allocate_tuning_triplets( + function_return_summary_context, + function_return_summary_questions, + function_return_summary_answers, + ) + ) else: - function_return_summary_pairs = [ - ( - f"What does {function_member} return?", - f"Based on {function_member} docstring, the return contains: '{returns_summary}'.", - ), - ( - f"Can you tell me what {function_member} returns?", - f"Sure, as per docstring of {function_member}, it returns: '{returns_summary}'.", - ), - ( - f"I'm curious about what {function_member} returns. Can you help?", - f"Absolutely! The docstring of {function_member} indicates that it returns:" - f" '{returns_summary}'.", - ), - ( - f"Do you know what {function_member} returns?", - f"Yes, the docstring of {function_member} states that it returns:" - f" '{returns_summary}'.", - ), - ( - f"I'd like to know what {function_member} returns.", - f"Of course, the docstring of {function_member} reveals that its return contains:" - f" '{returns_summary}'.", - ), - ( - f"Could you inform me about the return of {function_member}?", - f"Certainly, the docstring of {function_member} specifies that it returns:" - f" '{returns_summary}'.", - ), - ] - function_member_retrieval_chunks.append( + function_return_summary_context = ( f"Based on docstring, return of {function_member} is as follows: '{returns_summary}'." ) - function_member_tuning_pairs.extend(allocate_tuning_pairs(function_return_summary_pairs)) + function_return_summary_questions = [ + f"What does {function_member} return?", + f"Can you tell me what {function_member} returns?", + f"I'm curious about what {function_member} returns. Can you help?", + f"Do you know what {function_member} returns?", + f"I'd like to know what {function_member} returns.", + f"Could you inform me about the return of {function_member}?", + ] + function_return_summary_answers = [ + f"Based on {function_member} docstring, the return contains: '{returns_summary}'.", + f"As per docstring of {function_member}, it returns: '{returns_summary}'.", + f"The docstring of {function_member} indicates that it returns: '{returns_summary}'.", + f"The docstring of {function_member} states that it returns: '{returns_summary}'.", + f"The docstring of {function_member} reveals that its return contains:" + f" '{returns_summary}'.", + f"The docstring of {function_member} specifies that it returns: '{returns_summary}'.", + ] + + function_member_retrieval_chunks.append(function_return_summary_context) + function_member_tuning_documents.extend( + allocate_tuning_triplets( + function_return_summary_context, + function_return_summary_questions, + function_return_summary_answers, + ) + ) if not (function_summary := member_type_details.function_summary): - function_summary_pairs = [ - ( - f"Summarise role of {function_member} in short.", - f"{function_member} docstring lacks a summary of its objective.", - ), - ( - f"Can you briefly explain the role of {function_member}?", - f"The docstring of {function_member} doesn't provide its purpose.", - ), - ( - f"What is the purpose of {function_member} as per its docstring?", - f"The docstring of {function_member} doesn't clearly state its purpose.", - ), - ( - f"Could you provide a summary of objective of {function_member}?", - f"The objective of {function_member} is not summarised in its docstring.", - ), - ( - f"What does {function_member} do according to its docstring?", - f"According to its docstring, role of {function_member} is not summarised.", - ), - ] - function_member_retrieval_chunks.append(f"Documentation for {function_member} is missing.") - function_member_tuning_pairs.extend(allocate_tuning_pairs(function_summary_pairs)) + function_summary_context = f"Documentation for {function_member} is missing." + function_summary_questions = [ + f"Summarise role of {function_member} in short.", + f"Can you briefly explain the role of {function_member}?", + f"What is the purpose of {function_member} as per its docstring?", + f"Could you provide a summary of objective of {function_member}?", + f"What does {function_member} do according to its docstring?", + ] + function_summary_answers = [ + f"{function_member} docstring lacks a summary of its objective.", + f"The docstring of {function_member} doesn't provide its purpose.", + f"The docstring of {function_member} doesn't clearly state its purpose.", + f"The objective of {function_member} is not summarised in its docstring.", + f"According to its docstring, role of {function_member} is not summarised.", + ] + + function_member_retrieval_chunks.append(function_summary_context) + function_member_tuning_documents.extend( + allocate_tuning_triplets( + function_summary_context, function_summary_questions, function_summary_answers + ) + ) else: - function_summary_pairs = [ - ( - f"Summarise role of {function_member} in short.", - f"Based on docstring, objective of {function_member} is to: '{function_summary}'.", - ), - ( - f"Can you briefly explain the role of {function_member}?", - f"Sure, according to the docstring, the purpose of {function_member} is:" - f" '{function_summary}'.", - ), - ( - f"What does {function_member} do, in a nutshell?", - f"In a nutshell, {function_member} is designed to: '{function_summary}'.", - ), - ( - f"Could you provide a short summary of role of {function_member}?", - f"Certainly, from docstring, {function_member} aims to: '{function_summary}'.", - ), - ( - f"I need a brief explanation of what {function_member} does.", - f"Of course, {function_member} is intended to: '{function_summary}'.", - ), - ( - f"In brief, what is the role of {function_member}?", - f"Briefly, the role of {function_member} is to: '{function_summary}'," - " according to the docstring.", - ), - ] - function_member_retrieval_chunks.append( + function_summary_context = ( f"{function_member} documents itself as follows: '{function_summary}'." ) - function_member_tuning_pairs.extend(allocate_tuning_pairs(function_summary_pairs)) + function_summary_questions = [ + f"Summarise role of {function_member} in short.", + f"Can you briefly explain the role of {function_member}?", + f"What does {function_member} do, in a nutshell?", + f"Could you provide a short summary of role of {function_member}?", + f"I need a brief explanation of what {function_member} does.", + f"In brief, what is the role of {function_member}?", + ] + function_summary_answers = [ + f"Based on docstring, objective of {function_member} is to: '{function_summary}'.", + f"According to the docstring, the purpose of {function_member} is:" + f" '{function_summary}'.", + f"In a nutshell, {function_member} is designed to: '{function_summary}'.", + f"From docstring, {function_member} aims to: '{function_summary}'.", + f"{function_member} is intended to: '{function_summary}'.", + f"The role of {function_member} is to: '{function_summary}'," + " according to the docstring.", + ] + + function_member_retrieval_chunks.append(function_summary_context) + function_member_tuning_documents.extend( + allocate_tuning_triplets( + function_summary_context, function_summary_questions, function_summary_answers + ) + ) if not (function_raises := member_type_details.function_raises): - function_raise_types_pairs = [ - ( - f"Does {function_member} raise any specific exception?", - f"Docstring of {function_member} does not mention any specific exceptions.", - ), - ( - f"Are there any specific exceptions that {function_member} raises?", - f"No specific exceptions are mentioned in the docstring of {function_member}.", - ), - ( - f"Can you tell me if {function_member} raises any specific exceptions?", - f"According to docstring, {function_member} does not raise exceptions.", - ), - ( - f"I want to know if {function_member} raises any specific exceptions." - " Can you confirm?", - f"I can confirm that docstring of {function_member} does not mention exceptions.", - ), - ( - f"Could {function_member} possibly raise any specific exceptions?", - f"The docstring of {function_member} does not indicate that" - " it raises any specific exceptions.", - ), - ( - f"Is it possible for {function_member} to raise any specific exceptions?", - f"The docstring of {function_member} does not suggest that" - " it raises any specific exceptions.", - ), - ] - function_member_retrieval_chunks.append( + function_raise_types_context = ( f"{function_member} does not document any specific exceptions in the docstring." ) - function_member_tuning_pairs.extend(allocate_tuning_pairs(function_raise_types_pairs)) + function_raise_types_questions = [ + f"Does {function_member} raise any specific exception?", + f"Are there any specific exceptions that {function_member} raises?", + f"Can you tell me if {function_member} raises any specific exceptions?", + f"I want to know if {function_member} raises any specific exceptions." + " Can you confirm?", + f"Could {function_member} possibly raise any specific exceptions?", + f"Is it possible for {function_member} to raise any specific exceptions?", + ] + function_raise_types_answers = [ + f"Docstring of {function_member} does not mention any specific exceptions.", + f"No specific exceptions are mentioned in the docstring of {function_member}.", + f"According to docstring, {function_member} does not raise exceptions.", + f"Docstring of {function_member} does not mention exceptions.", + f"The docstring of {function_member} does not indicate that" + " it raises any specific exceptions.", + f"The docstring of {function_member} does not suggest that" + " it raises any specific exceptions.", + ] + + function_member_retrieval_chunks.append(function_raise_types_context) + function_member_tuning_documents.extend( + allocate_tuning_triplets( + function_raise_types_context, + function_raise_types_questions, + function_raise_types_answers, + ) + ) else: function_raise_types = enumerate_array_elements( function_raises, attribute="raises_details" ) - function_raise_types_pairs = [ - ( - f"Does {function_member} raise any specific exception?", - f"Based on docstring of {function_member}, it can raise the following:" - f" {function_raise_types}.", - ), - ( - f"Can you tell me if {function_member} raises any specific exceptions?", - f"Yes, according to docstring of {function_member}, it can raise these exceptions:" - f" {function_raise_types}.", - ), - ( - f"What exceptions, if any, does {function_member} raise?", - f"{function_member} can raise these exceptions as per its docstring:" - f" {function_raise_types}.", - ), - ( - f"I need to know if {function_member} throws any specific exceptions." - " Can you help?", - f"Sure, {function_member} can throw following exceptions according to docstring:" - f" {function_raise_types}.", - ), - ( - f"Could you inform me about any specific exceptions that" - f" {function_member} might raise?", - f"Certainly, the docstring of {function_member} indicates that" - f" it can raise these exceptions: {function_raise_types}.", - ), - ( - f"I'm curious about the exceptions that {function_member} might throw." - " Do you have any information?", - f"Yes, the docstring of {function_member} suggests that" - f" it can throw the following exceptions: {function_raise_types}.", - ), - ] - function_member_retrieval_chunks.append( + + function_raise_types_context = ( f"From docstring, {function_member} can raise the following: {function_raise_types}" ) - function_member_tuning_pairs.extend(allocate_tuning_pairs(function_raise_types_pairs)) + function_raise_types_questions = [ + f"Does {function_member} raise any specific exception?", + f"Can you tell me if {function_member} raises any specific exceptions?", + f"What exceptions, if any, does {function_member} raise?", + f"I need to know if {function_member} throws any specific exceptions. Can you help?", + f"Could you inform me about any specific exceptions that" + f" {function_member} might raise?", + f"I'm curious about the exceptions that {function_member} might throw." + " Do you have any information?", + ] + function_raise_types_answers = [ + f"Based on docstring of {function_member}, it can raise the following:" + f" {function_raise_types}.", + f"According to docstring of {function_member}, it can raise these exceptions:" + f" {function_raise_types}.", + f"{function_member} can raise these exceptions as per its docstring:" + f" {function_raise_types}.", + f"{function_member} can throw following exceptions according to docstring:" + f" {function_raise_types}.", + f"The docstring of {function_member} indicates that" + f" it can raise these exceptions: {function_raise_types}.", + f"The docstring of {function_member} suggests that" + f" it can throw the following exceptions: {function_raise_types}.", + ] + + function_member_retrieval_chunks.append(function_raise_types_context) + function_member_tuning_documents.extend( + allocate_tuning_triplets( + function_raise_types_context, + function_raise_types_questions, + function_raise_types_answers, + ) + ) if not (function_warns := member_type_details.function_warns): - function_warn_types_pairs = [ - ( - f"Does {function_member} throw any specific warnings?", - f"Docstring of {function_member} lacks any mention of specific warnings.", - ), - ( - f"Are there any specific warnings that {function_member} throws?", - f"There are no specific warnings mentioned in docstring of {function_member}.", - ), - ( - f"Can you tell me if {function_member} throws any specific warnings?", - f"According to the docstring of {function_member}," - " it doesn't throw any specific warnings.", - ), - ( - f"I want to know if {function_member} throws any specific warnings." - " Can you help?", - f"Sure, I checked the docstring of {function_member} and" - " found no mention of specific warnings.", - ), - ( - f"Could you check if {function_member} throws any specific warnings?", - f"I've checked the docstring of {function_member} and" - " it doesn't mention any specific warnings.", - ), - ( - f"Is it possible that {function_member} throws any specific warnings?", - f"Based on the docstring of {function_member}," - " it doesn't seem to throw any specific warnings.", - ), - ] - function_member_retrieval_chunks.append( + function_warn_types_context = ( f"Mention of any warnings is missing in docstring of {function_member}." ) - function_member_tuning_pairs.extend(allocate_tuning_pairs(function_warn_types_pairs)) + function_warn_types_questions = [ + f"Does {function_member} throw any specific warnings?", + f"Are there any specific warnings that {function_member} throws?", + f"Can you tell me if {function_member} throws any specific warnings?", + f"I want to know if {function_member} throws any specific warnings. Can you help?", + f"Could you check if {function_member} throws any specific warnings?", + f"Is it possible that {function_member} throws any specific warnings?", + ] + function_warn_types_answers = [ + f"Docstring of {function_member} lacks any mention of specific warnings.", + f"There are no specific warnings mentioned in docstring of {function_member}.", + f"According to the docstring of {function_member}," + " it doesn't throw any specific warnings.", + f"No mention of specific warnings are found in the docstring of {function_member}.", + f"Based on the docstring of {function_member}," + " it doesn't seem to throw any specific warnings.", + ] + + function_member_retrieval_chunks.append(function_warn_types_context) + function_member_tuning_documents.extend( + allocate_tuning_triplets( + function_warn_types_context, + function_warn_types_questions, + function_warn_types_answers, + ) + ) else: function_warn_types = enumerate_array_elements(function_warns, attribute="warns_details") - function_warn_types_pairs = [ - ( - f"Does {function_member} throw any specific warnings?", - f"Based on the docstring, {function_member} can throw the following warnings:" - f" {function_warn_types}.", - ), - ( - f"Can you tell me if {function_member} throws any specific warnings?", - f"Yes, according to docstring, {function_member} may throw these warnings:" - f" {function_warn_types}.", - ), - ( - f"I'm curious, does {function_member} generate any particular warnings?", - f"Indeed, docstring indicates that {function_member} can generate these warnings:" - f" {function_warn_types}.", - ), - ( - f"What specific warnings, if any, does {function_member} throw?", - f"{function_member} throws the following warnings as per the docstring:" - f" {function_warn_types}.", - ), - ( - f"Could {function_member} possibly throw any specific warnings?", - f"Yes, it could. Docstring of {function_member} mentions these specific warnings:" - f" {function_warn_types}.", - ), - ( - f"Are there any specific warnings that {function_member} throws?", - f"Yes, there are. The docstring for {function_member} lists following warnings:" - f" {function_warn_types}.", - ), - ] - function_member_retrieval_chunks.append( + + function_warn_types_context = ( f"{function_member} documents the following warnings: {function_warn_types}" ) - function_member_tuning_pairs.extend(allocate_tuning_pairs(function_warn_types_pairs)) + function_warn_types_questions = [ + f"Does {function_member} throw any specific warnings?", + f"Can you tell me if {function_member} throws any specific warnings?", + f"I'm curious, does {function_member} generate any particular warnings?", + f"What specific warnings, if any, does {function_member} throw?", + f"Could {function_member} possibly throw any specific warnings?", + f"Are there any specific warnings that {function_member} throws?", + ] + function_warn_types_answers = [ + f"Based on the docstring, {function_member} can throw the following warnings:" + f" {function_warn_types}.", + f"According to docstring, {function_member} may throw these warnings:" + f" {function_warn_types}.", + f"Docstring indicates that {function_member} can generate these warnings:" + f" {function_warn_types}.", + f"{function_member} throws the following warnings as per the docstring:" + f" {function_warn_types}.", + f"Docstring of {function_member} mentions these specific warnings:" + f" {function_warn_types}.", + f"The docstring for {function_member} lists following warnings:" + f" {function_warn_types}.", + ] + + function_member_retrieval_chunks.append(function_warn_types_context) + function_member_tuning_documents.extend( + allocate_tuning_triplets( + function_warn_types_context, + function_warn_types_questions, + function_warn_types_answers, + ) + ) if not (function_notes := member_type_details.function_notes): - function_notes_pairs = [ - ( - f"Is there any specific details for {function_member} to be aware of?", - f"Docstring of {function_member} lacks any notes on specific details.", - ), - ( - f"Are there any particular details I should know about {function_member}?", - f"There are no specific details noted in the docstring of {function_member}.", - ), - ( - f"What should I be aware of when using {function_member}?", - f"The docstring of {function_member} does not contain any details to be aware of.", - ), - ( - f"Could you tell me if there are any specific details for {function_member}?", - f"No specific details are mentioned in the docstring of {function_member}.", - ), - ( - f"I'm curious if there are any specific details about {function_member}?", - f"The docstring of {function_member} does not provide any specific details.", - ), - ( - f"Do I need to be aware of any specific details for {function_member}?", - f"The docstring of {function_member} does not include any specific details.", - ), - ] - function_member_retrieval_chunks.append( - f"{function_member} has no specific notes in the docstring." - ) - function_member_tuning_pairs.extend(allocate_tuning_pairs(function_notes_pairs)) + function_notes_context = f"{function_member} has no specific notes in the docstring." + function_notes_questions = [ + f"Is there any specific details for {function_member} to be aware of?", + f"Are there any particular details I should know about {function_member}?", + f"What should I be aware of when using {function_member}?", + f"Could you tell me if there are any specific details for {function_member}?", + f"I'm curious if there are any specific details about {function_member}?", + f"Do I need to be aware of any specific details for {function_member}?", + ] + function_notes_answers = [ + f"Docstring of {function_member} lacks any notes on specific details.", + f"There are no specific details noted in the docstring of {function_member}.", + f"The docstring of {function_member} does not contain any details to be aware of.", + f"No specific details are mentioned in the docstring of {function_member}.", + f"The docstring of {function_member} does not provide any specific details.", + f"The docstring of {function_member} does not include any specific details.", + ] + + function_member_retrieval_chunks.append(function_notes_context) + function_member_tuning_documents.extend( + allocate_tuning_triplets( + function_notes_context, function_notes_questions, function_notes_answers + ) + ) else: - function_notes_pairs = [ - ( - f"Is there any specific details for {function_member} to be aware of?", - f"Docstring of {function_member} highlights the following: '{function_notes}'.", - ), - ( - f"What should I know about {function_member}?", - "You should be aware that docstring includes the following details:" - f" '{function_notes}'.", - ), - ( - f"Could you provide some details about {function_member}?", - f"Sure, the docstring of {function_member} provides the following information:" - f" '{function_notes}'.", - ), - ( - f"What are the important details of {function_member}?", - f"The important details of {function_member} are highlighted in its docstring:" - f" '{function_notes}'.", - ), - ( - f"Can you tell me more about {function_member}?", - f"Of course, the docstring of {function_member} contains the following details:" - f" '{function_notes}'.", - ), - ( - f"I need information about {function_member}.", - f"The docstring of {function_member} contains the following information:" - f" '{function_notes}'.", - ), - ] - function_member_retrieval_chunks.append( + function_notes_context = ( f"Docstring for {function_member} has following notes: '{function_notes}'." ) - function_member_tuning_pairs.extend(allocate_tuning_pairs(function_notes_pairs)) + function_notes_questions = [ + f"Is there any specific details for {function_member} to be aware of?", + f"What should I know about {function_member}?", + f"Could you provide some details about {function_member}?", + f"What are the important details of {function_member}?", + f"Can you tell me more about {function_member}?", + f"I need information about {function_member}.", + ] + function_notes_answers = [ + f"Docstring of {function_member} highlights the following: '{function_notes}'.", + "Users should be aware that docstring includes the following details:" + f" '{function_notes}'.", + f"The docstring of {function_member} provides the following information:" + f" '{function_notes}'.", + f"The important details of {function_member} are highlighted in its docstring:" + f" '{function_notes}'.", + f"The docstring of {function_member} contains the following details:" + f" '{function_notes}'.", + f"The docstring of {function_member} contains the following information:" + f" '{function_notes}'.", + ] + + function_member_retrieval_chunks.append(function_notes_context) + function_member_tuning_documents.extend( + allocate_tuning_triplets( + function_notes_context, function_notes_questions, function_notes_answers + ) + ) if not (function_references := member_type_details.function_references): - function_references_pairs = [ - ( - f"Is there any reference for {function_member}?", - f"Documentation for {function_member} contains no references.", - ), - ( - f"Can I find any references in the documentation for {function_member}?", - f"No, the documentation for {function_member} does not contain any references.", - ), - ( - f"Does the documentation for {function_member} include any references?", - f"No, there are no references in the documentation for {function_member}.", - ), - ( - f"Are there references available in the {function_member} documentation?", - f"No, the {function_member} documentation does not include any references.", - ), - ( - f"I'm looking for references in {function_member} documentation. Are there any?", - f"Unfortunately, the documentation for {function_member} contains no references.", - ), - ( - f"Could you tell me if there are any references for {function_member}?", - f"I'm sorry, but documentation for {function_member} lacks any references.", - ), - ] - function_member_retrieval_chunks.append( + function_references_context = ( f"{function_member} documents no references in its docstring." ) - function_member_tuning_pairs.extend(allocate_tuning_pairs(function_references_pairs)) + function_references_questions = [ + f"Is there any reference for {function_member}?", + f"Can I find any references in the documentation for {function_member}?", + f"Does the documentation for {function_member} include any references?", + f"Are there references available in the {function_member} documentation?", + f"I'm looking for references in {function_member} documentation. Are there any?", + f"Could you tell me if there are any references for {function_member}?", + ] + function_references_answers = [ + f"Documentation for {function_member} contains no references.", + f"The documentation for {function_member} does not contain any references.", + f"There are no references in the documentation for {function_member}.", + f"The {function_member} documentation does not include any references.", + f"The documentation for {function_member} contains no references.", + f"Documentation for {function_member} lacks any references.", + ] + + function_member_retrieval_chunks.append(function_references_context) + function_member_tuning_documents.extend( + allocate_tuning_triplets( + function_references_context, + function_references_questions, + function_references_answers, + ) + ) else: - function_references_pairs = [ - ( - f"Is there any reference for {function_member}?", - f"The docstring links the following: '{function_references}'.", - ), - ( - f"Can you provide a reference for {function_member}?", - f"Sure, the docstring provides the following reference: '{function_references}'.", - ), - ( - f"Where can I find a reference for {function_member}?", - f"You can find it in the docstring, which links to: '{function_references}'.", - ), - ( - f"Could you point me to the reference for {function_member}?", - f"Of course, the docstring points to these reference: '{function_references}'.", - ), - ( - f"I'm looking for a reference for {function_member}. Can you help?", - f"Absolutely, the docstring links to this reference: '{function_references}'.", - ), - ( - f"What's the reference for {function_member}?", - f"The reference for that is in the docstring: '{function_references}'.", - ), - ] - function_member_retrieval_chunks.append( + function_references_context = ( f"{function_member} list the following references: {function_references}" ) - function_member_tuning_pairs.extend(allocate_tuning_pairs(function_references_pairs)) + function_references_questions = [ + f"Is there any reference for {function_member}?", + f"Can you provide a reference for {function_member}?", + f"Where can I find a reference for {function_member}?", + f"Could you point me to the reference for {function_member}?", + f"I'm looking for a reference for {function_member}. Can you help?", + f"What's the reference for {function_member}?", + ] + function_references_answers = [ + f"The docstring links the following: '{function_references}'.", + f"The docstring provides the following reference: '{function_references}'.", + f"The docstring links to: '{function_references}'.", + f"The docstring points to these reference: '{function_references}'.", + f"The docstring links to this reference: '{function_references}'.", + f"The reference for that is in the docstring: '{function_references}'.", + ] + + function_member_retrieval_chunks.append(function_references_context) + function_member_tuning_documents.extend( + allocate_tuning_triplets( + function_references_context, + function_references_questions, + function_references_answers, + ) + ) if not (function_examples := member_type_details.function_examples): - function_examples_pairs = [ - ( - f"Is there any example for {function_member}?", - f"Docstring for {function_member} lacks any examples.", - ), - ( - f"Can I find an example for {function_member} in the docstring?", - f"Unfortunately, docstring for {function_member} does not contain any examples.", - ), - ( - f"Does the docstring for {function_member} include any examples?", - f"No, the docstring for {function_member} does not include any examples.", - ), - ( - f"I'm looking for an example of {function_member} in docstring, is there one?", - f"I'm sorry, but docstring for {function_member} does not provide any examples.", - ), - ( - f"Are there any examples provided in the docstring for {function_member}?", - f"No examples are provided in the docstring for {function_member}.", - ), - ( - f"Could you tell me if there's an example for {function_member} in docstring?", - f"I regret to inform you that {function_member} documents no examples.", - ), - ] - function_member_retrieval_chunks.append( - f"Documentation of {function_member} lacks any examples." - ) - function_member_tuning_pairs.extend(allocate_tuning_pairs(function_examples_pairs)) + function_examples_context = f"Documentation of {function_member} lacks any examples." + function_examples_questions = [ + f"Is there any example for {function_member}?", + f"Can I find an example for {function_member} in the docstring?", + f"Does the docstring for {function_member} include any examples?", + f"I'm looking for an example of {function_member} in docstring, is there one?", + f"Are there any examples provided in the docstring for {function_member}?", + f"Could you tell me if there's an example for {function_member} in docstring?", + ] + function_examples_answers = [ + f"Docstring for {function_member} lacks any examples.", + f"Docstring for {function_member} does not contain any examples.", + f"The docstring for {function_member} does not include any examples.", + f"Docstring for {function_member} does not provide any examples.", + f"No examples are provided in the docstring for {function_member}.", + f"{function_member} documents no examples.", + ] + + function_member_retrieval_chunks.append(function_examples_context) + function_member_tuning_documents.extend( + allocate_tuning_triplets( + function_examples_context, function_examples_questions, function_examples_answers + ) + ) else: - function_examples_pairs = [ - ( - f"Is there any example for {function_member}?", - f"Documentation of {function_member} contains these examples:" - f" '{function_examples}'.", - ), - ( - f"Can you provide an example of {function_member}?", - f"Sure, you can find examples of {function_member} in its documentation:" - f" '{function_examples}'.", - ), - ( - f"I'm looking for examples of {function_member}, can you help?", - f"Absolutely, examples for {function_member} are available in its documentation:" - f" '{function_examples}'.", - ), - ( - f"Where can I find examples for {function_member}?", - f"You can find examples for {function_member} in its documentation:" - f" '{function_examples}'.", - ), - ( - f"Could you show me some examples of {function_member}?", - f"Of course, the documentation of {function_member} includes these examples:" - f" '{function_examples}'.", - ), - ( - f"I need examples for {function_member}, where can I find them?", - f"You can find examples for {function_member} in its documentation:" - f" '{function_examples}'.", - ), - ] - function_member_retrieval_chunks.append( + function_examples_context = ( f"Docstring of {function_member} contains following examples: '{function_examples}'." ) - function_member_tuning_pairs.extend(allocate_tuning_pairs(function_examples_pairs)) + function_examples_questions = [ + f"Is there any example for {function_member}?", + f"Can you provide an example of {function_member}?", + f"I'm looking for examples of {function_member}, can you help?", + f"Where can I find examples for {function_member}?", + f"Could you show me some examples of {function_member}?", + f"I need examples for {function_member}, where can I find them?", + ] + function_examples_answers = [ + f"Documentation of {function_member} contains these examples: '{function_examples}'.", + f"In documentation of {function_member}, these examples can be found:" + f" '{function_examples}'.", + f"Examples for {function_member} are available in its documentation:" + f" '{function_examples}'.", + f"In documentation for {function_member}, these examples can be found:" + f" '{function_examples}'.", + f"The documentation of {function_member} includes these examples:" + f" '{function_examples}'.", + ] + + function_member_retrieval_chunks.append(function_examples_context) + function_member_tuning_documents.extend( + allocate_tuning_triplets( + function_examples_context, function_examples_questions, function_examples_answers + ) + ) function_member_dataset = Dataset( retrieval_chunks=function_member_retrieval_chunks[:2], - tuning_pairs=function_member_tuning_pairs, + tuning_documents=function_member_tuning_documents, ) return function_member_dataset, function_member_retrieval_chunks @pydantic.validate_call(validate_return=True) -def generate_member_dataset(member_details: MemberDetails) -> tuple[Dataset, ...]: +def generate_member_dataset(member_details: MemberDetails) -> tuple[Dataset, ...]: # noqa: PLR0915 """Create a dataset for a member. Parameters @@ -2724,7 +2475,7 @@ def generate_member_dataset(member_details: MemberDetails) -> tuple[Dataset, ... Returns ------- tuple[Dataset, ...] - all documents for retrieval and tuning pairs for querying member documentation + all documents for retrieval and tuning for querying member documentation Raises ------ @@ -2741,188 +2492,163 @@ def generate_member_dataset(member_details: MemberDetails) -> tuple[Dataset, ... member = f"'{member_name}' object" member_retrieval_chunks: list[str] = [] - member_tuning_pairs: list[tuple[str, str, SplitName]] = [] - - module_parent_pairs = [ - ( - f"What is the parent module of {member}?", - f"'{member_details.member_module}' is the name of its parent module.", - ), - ( - f"Can you tell me the parent module of {member}?", - f"Sure, the parent module of {member} is '{member_details.member_module}'.", - ), - ( - f"I'm trying to find the parent module of {member}, can you help?", - f"Of course, parent module of {member} is '{member_details.member_module}'.", - ), - ( - f"Do you know the parent module of {member}?", - f"Yes, the parent module of {member} is '{member_details.member_module}'.", - ), - ( - f"I need to know the parent module of {member}, can you provide that?", - f"Absolutely, parent module of {member} is '{member_details.member_module}'.", - ), - ( - f"Could you inform me about the parent module of {member}?", - f"Certainly, '{member_details.member_module}' is parent module of {member}.", - ), + member_tuning_documents: list[Document] = [] + + module_parent_context = f"{member} is part of parent module {member_details.member_module}." + module_parent_questions = [ + f"What is the parent module of {member}?", + f"Can you tell me the parent module of {member}?", + f"I'm trying to find the parent module of {member}, can you help?", + f"Do you know the parent module of {member}?", + f"I need to know the parent module of {member}, can you provide that?", + f"Could you inform me about the parent module of {member}?", ] - member_retrieval_chunks.append( - f"{member} is part of parent module {member_details.member_module}." + module_parent_answers = [ + f"'{member_details.member_module}' is the name of its parent module.", + f"The parent module of {member} is '{member_details.member_module}'.", + f"Parent module of {member} is '{member_details.member_module}'.", + f"'{member_details.member_module}' is parent module of {member}.", + ] + + member_retrieval_chunks.append(module_parent_context) + member_tuning_documents.extend( + allocate_tuning_triplets( + module_parent_context, module_parent_questions, module_parent_answers + ) ) - member_tuning_pairs.extend(allocate_tuning_pairs(module_parent_pairs)) - - member_full_name_pairs = [ - ( - f"What is the full name of {member}?", - f"'{member_full_name}' is its fully qualified name.", - ), - ( - f"Can you tell me the full name of the {member}?", - f"Sure, the fully qualified name of {member} is '{member_full_name}'.", - ), - ( - f"I need to know the full name of {member}. Can you help?", - f"Of course, the full name of {member} is '{member_full_name}'.", - ), - ( - f"What's the fully qualified name for the {member}?", - f"The fully qualified name for {member} is '{member_full_name}'.", - ), - ( - f"Could you provide the full name of the {member}?", - f"Certainly, the full name of the {member} is '{member_full_name}'.", - ), - ( - f"I'm looking for the full name of {member}. What is it?", - f"The full name of {member} is '{member_full_name}'.", - ), + + member_full_name_context = f"Full name of {member} is '{member_full_name}'." + member_full_name_questions = [ + f"What is the full name of {member}?", + f"Can you tell me the full name of the {member}?", + f"I need to know the full name of {member}. Can you help?", + f"What's the fully qualified name for the {member}?", + f"Could you provide the full name of the {member}?", + f"I'm looking for the full name of {member}. What is it?", + ] + member_full_name_answers = [ + f"'{member_full_name}' is its fully qualified name.", + f"The fully qualified name of {member} is '{member_full_name}'.", + f"The full name of {member} is '{member_full_name}'.", + f"The fully qualified name for {member} is '{member_full_name}'.", + f"The full name of the {member} is '{member_full_name}'.", ] - member_retrieval_chunks.append(f"Full name of {member} is '{member_full_name}'.") - member_tuning_pairs.extend(allocate_tuning_pairs(member_full_name_pairs)) + + member_retrieval_chunks.append(member_full_name_context) + member_tuning_documents.extend( + allocate_tuning_triplets( + member_full_name_context, member_full_name_questions, member_full_name_answers + ) + ) member_hierarchy = enumerate_array_elements(member_details.member_hierarchy) - member_hierarchy_pairs = [ - ( - f"What is the hierarchy of {member}?", - f"The hierarchy of {member} is as follows: {member_hierarchy}.", - ), - ( - f"Can you explain the hierarchy of the {member}?", - f"Sure, the hierarchy of the {member} is: {member_hierarchy}.", - ), - ( - f"Could you tell me the hierarchy of {member}?", - f"Of course, the hierarchy of {member} is: {member_hierarchy}.", - ), - ( - f"I would like to know the hierarchy of {member}. Can you provide that?", - f"Absolutely, the hierarchy of {member} is: {member_hierarchy}.", - ), - ( - f"Please provide the hierarchy of {member}.", - f"The hierarchy of {member} is: {member_hierarchy}.", - ), - ( - f"I'm interested in the hierarchy of {member}. Could you share it?", - f"Sure, the hierarchy of {member} is: {member_hierarchy}.", - ), + + member_hierarchy_context = f"Hierarchy of {member} is as follows: {member_hierarchy}." + member_hierarchy_questions = [ + f"What is the hierarchy of {member}?", + f"Can you explain the hierarchy of the {member}?", + f"Could you tell me the hierarchy of {member}?", + f"I would like to know the hierarchy of {member}. Can you provide that?", + f"Please provide the hierarchy of {member}.", + f"I'm interested in the hierarchy of {member}. Could you share it?", + ] + member_hierarchy_answers = [ + f"The hierarchy of {member} is as follows: {member_hierarchy}.", + f"The hierarchy of the {member} is: {member_hierarchy}.", + f"The hierarchy of {member} is: {member_hierarchy}.", ] - member_retrieval_chunks.append(f"Hierarchy of {member} is as follows: {member_hierarchy}.") - member_tuning_pairs.extend(allocate_tuning_pairs(member_hierarchy_pairs)) + + member_retrieval_chunks.append(member_hierarchy_context) + member_tuning_documents.extend( + allocate_tuning_triplets( + member_hierarchy_context, member_hierarchy_questions, member_hierarchy_answers + ) + ) if not (member_docstring := member_details.member_docstring): - member_documentation_pairs = [ - ( - f"What is the documentation of {member}?", - f"{member} does not have any documentation.", - ), - ( - f"Can you provide the documentation for the {member}?", - f"Sorry, the {member} does not have any documentation.", - ), - ( - f"Is there any documentation available for the {member}?", - f"No, there is no documentation available for the {member}.", - ), - ( - f"Could you show me the documentation of the {member}?", - f"Unfortunately, the {member} does not have any documentation.", - ), - ( - f"I'm looking for the documentation of {member}. Can you help?", - f"I'm sorry, but the {member} does not have any documentation.", - ), - ] - member_retrieval_chunks.append( + member_documentation_context = ( f"Unfortunately, {member} currently does not have any documentation." ) - member_tuning_pairs.extend(allocate_tuning_pairs(member_documentation_pairs)) + member_documentation_questions = [ + f"What is the documentation of {member}?", + f"Can you provide the documentation for the {member}?", + f"Is there any documentation available for the {member}?", + f"Could you show me the documentation of the {member}?", + f"I'm looking for the documentation of {member}. Can you help?", + ] + member_documentation_answers = [ + f"{member} does not have any documentation.", + f"The {member} does not have any documentation.", + f"There is no documentation available for the {member}.", + ] + + member_retrieval_chunks.append(member_documentation_context) + member_tuning_documents.extend( + allocate_tuning_triplets( + member_documentation_context, + member_documentation_questions, + member_documentation_answers, + ) + ) else: - member_documentation_pairs = [ - (f"What does {member} do?", f"Its documentation is as follows: '{member_docstring}'."), - ( - f"Can you explain the function of the {member}?", - f"Sure, here is its documentation: '{member_docstring}'.", - ), - ( - f"I'm not sure what {member} does. Can you clarify?", - f"Of course, here's its documentation for clarification: '{member_docstring}'.", - ), - ( - f"Could you tell me about the {member}?", - f"Certainly, its documentation is: '{member_docstring}'.", - ), - ( - f"I need information on the {member}.", - f"Here's the documentation you need: '{member_docstring}'.", - ), - ( - f"What's the purpose of the {member}?", - f"The purpose is described in its documentation: '{member_docstring}'.", - ), - ] - member_retrieval_chunks.append( + member_documentation_context = ( f"The following is the documentation of {member}: '{member_docstring}'." ) - member_tuning_pairs.extend(allocate_tuning_pairs(member_documentation_pairs)) + member_documentation_questions = [ + f"What does {member} do?", + f"Can you explain the function of the {member}?", + f"I'm not sure what {member} does. Can you clarify?", + f"Could you tell me about the {member}?", + f"I need information on the {member}.", + f"What's the purpose of the {member}?", + ] + member_documentation_answers = [ + f"Its documentation is as follows: '{member_docstring}'.", + f"Here is its documentation: '{member_docstring}'.", + f"Here's its documentation for clarification: '{member_docstring}'.", + f"Its documentation is: '{member_docstring}'.", + f"Here's the documentation you need: '{member_docstring}'.", + f"The purpose is described in its documentation: '{member_docstring}'.", + ] + + member_retrieval_chunks.append(member_documentation_context) + member_tuning_documents.extend( + allocate_tuning_triplets( + member_documentation_context, + member_documentation_questions, + member_documentation_answers, + ) + ) if (member_type_details := member_details.member_type_details) is not None: member_type = member_type_details.member_type - member_type_pairs = [ - (f"What is the type of {member}?", f"{member} is of '{member_type.value}' type."), - ( - f"Can you tell me the type of the {member}?", - f"Sure, the {member} is of '{member_type.value}' type.", - ), - ( - f"I would like to know the type of {member}. Can you help?", - f"Absolutely, the {member} is of '{member_type.value}' type.", - ), - ( - f"Do you know the type of {member}?", - f"Yes, the {member} is of '{member_type.value}' type.", - ), - ( - f"Could you inform me about the type of {member}?", - f"Of course, the {member} is of '{member_type.value}' type.", - ), - ( - f"I'm curious about type of {member}. Can you provide some information?", - f"Certainly, the {member} is of '{member_type.value}' type.", - ), - ] - member_retrieval_chunks.insert(-1, f"'{member_name}' is a Python {member_type.value}.") - member_tuning_pairs.extend(allocate_tuning_pairs(member_type_pairs)) + member_type_context = f"'{member_name}' is a Python {member_type.value}." + member_type_questions = [ + f"What is the type of {member}?", + f"Can you tell me the type of the {member}?", + f"I would like to know the type of {member}. Can you help?", + f"Do you know the type of {member}?", + f"Could you inform me about the type of {member}?", + f"I'm curious about type of {member}. Can you provide some information?", + ] + member_type_answers = [ + f"{member} is of '{member_type.value}' type.", + f"The {member} is of '{member_type.value}' type.", + ] + + member_retrieval_chunks.append(member_type_context) + member_tuning_documents.extend( + allocate_tuning_triplets( + member_type_context, member_type_questions, member_type_answers + ) + ) if member_type_details is None: member_retrieval_chunks.insert(0, f"'{member_name}' is a Python object.") member_dataset = Dataset( - retrieval_chunks=member_retrieval_chunks, tuning_pairs=member_tuning_pairs + retrieval_chunks=member_retrieval_chunks, tuning_documents=member_tuning_documents ) return (member_dataset,) @@ -2947,7 +2673,7 @@ def generate_member_dataset(member_details: MemberDetails) -> tuple[Dataset, ... member_dataset = Dataset( retrieval_chunks=member_retrieval_chunks + member_type_retrieval_chunks, - tuning_pairs=member_tuning_pairs, + tuning_documents=member_tuning_documents, ) return (member_dataset, member_type_dataset) diff --git a/src/generative_ai/dataset_generation/utils_generation.py b/src/generative_ai/dataset_generation/utils_generation.py index 43d5939..d05303d 100644 --- a/src/generative_ai/dataset_generation/utils_generation.py +++ b/src/generative_ai/dataset_generation/utils_generation.py @@ -447,32 +447,12 @@ class Dataset(pydantic.BaseModel): ---------- retrieval_chunks : list[str] chunks of text to be used for retrieval - tuning_pairs : list[tuple[str, str, SplitName]] + tuning_triplets : list[tuple[str, str, SplitName]] pairs of question and answer to be used for tuning and their split allocation """ retrieval_chunks: list[str] - tuning_pairs: list[tuple[str, str, SplitName]] - - @pydantic.computed_field - @functools.cached_property - def tuning_documents(self: "Dataset") -> list[Document]: - """Store tuning documents using concatenated retrieval sources, questions and answers. - - Returns - ------- - list[Document] - tuning documents using concatenated retrieval sources, questions and answers - """ - return [ - Document( - context=" ".join(self.retrieval_chunks), - question=question, - answer=answer, - split=split, - ) - for question, answer, split in self.tuning_pairs - ] + tuning_documents: list[Document] class JSONDocument(pydantic.BaseModel): From 5e9fb6ef82edacae452d3ab099d6e84d95135811 Mon Sep 17 00:00:00 2001 From: Anirban Ray <39331844+yarnabrina@users.noreply.github.com> Date: Wed, 13 Mar 2024 09:45:59 +0530 Subject: [PATCH 18/26] updated docs --- docs/source/guidelines/CLI.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/guidelines/CLI.md b/docs/source/guidelines/CLI.md index cdfdbb0..a7bf8ed 100644 --- a/docs/source/guidelines/CLI.md +++ b/docs/source/guidelines/CLI.md @@ -35,10 +35,10 @@ Database generation complete: '/full/path/to/working/directory/vector_database'. $ docs-cli answer-query "How many modules are there in information retrieval sub-package?" --embedding-model "all-mpnet-base-v2" --database-directory "vector_database" --search-type "similarity" --number-of-documents 5 --language-model-type "standard_transformers" --standard-pipeline-type "text2text-generation" --standard-model-name "google/flan-t5-large" Query: How many modules are there in information retrieval sub-package? Answer: 5 -Duration: 2.28 seconds +Duration: 1.69 seconds Source 1: 'information_retrieval' package has 5 many modules. Source 2: Modules of 'information_retrieval' package are as follows: 1. orchestrate_retrieval 2. step_1_retrieval 3. step_2_retrieval 4. step_3_retrieval 5. utils_retrieval. -Source 3: 'information_retrieval' package has 23 many public exports. +Source 3: 'information_retrieval' package has 22 many public exports. Source 4: Hierarchy of 'information_retrieval' package is as follows: 1. generative_ai 2. information_retrieval. Source 5: Hierarchy of 'utils_retrieval' module is as follows: 1. generative_ai 2. information_retrieval 3. utils_retrieval. ``` @@ -50,7 +50,7 @@ $ docs-cli answer-query "What are different types of retrieval supported by this Query: What are different types of retrieval supported by this package? Answer: MMR and Similarity. -Duration: 20.75 seconds +Duration: 22.97 seconds Source 1: The following is the documentation of 'RetrievalType' object: 'Define supported retrieval types.'. Source 2: 'information_retrieval' package has 5 many modules. Source 3: Names of different members of 'RetrievalType' enum are as follows: 1. MMR 2. SIMILARITY. @@ -64,7 +64,7 @@ Source 5: The following is the documentation of 'information_retrieval' package: $ docs-cli answer-query "List public exports of dataset generation package." --embedding-model "all-mpnet-base-v2" --database-directory "vector_database" --search-type "mmr" --number-of-documents 5 --initial-number-of-documents 10 --language-model-type "quantised_ctransformers" --quantised-model-name "TheBloke/Mistral-7B-v0.1-GGUF" --quantised-model-file "mistral-7b-v0.1.Q4_K_M.gguf" --quantised-model-type "mistral" Query: List public exports of dataset generation package. Answer: The following is the list of public exports of 'dataset_generation' package: 1. JSONDataset 2. JSONDocument 3. generate_json_dataset 4. generate_member_dataset 5. generate_module_dataset 6. generate_package_dataset 7. generate_raw_datasets 8. get_all_member_details 9. get_all_module_contents 10. get_all_package_contents 11. load_json_dataset 12. store_json_dataset. -Duration: 58.37 seconds +Duration: 57.91 seconds Source 1: 'dataset_generation' package has 12 many public exports. Source 2: Documentation of 'generate_package_dataset' function lacks any examples. Source 3: The following is the documentation of 'generate_raw_datasets' object: 'Generate all retrieval and tuning documents for exploring documentation of a package. From b045ed7d6afd08b9cdfc1996704f32c0d3d2a432 Mon Sep 17 00:00:00 2001 From: Anirban Ray <39331844+yarnabrina@users.noreply.github.com> Date: Sun, 24 Mar 2024 16:22:36 +0530 Subject: [PATCH 19/26] updated versions --- pyproject.toml | 16 ++++++++-------- requirements/constraints.fine_tuning.txt | 4 ++-- requirements/constraints.txt | 12 ++++++------ 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 580c481..8843af8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,14 +44,14 @@ dynamic=[ dependencies = [ "chromadb<0.5,>=0.4.15", "ctransformers<0.3,>=0.2.27", - "gradio<4.22,>=4.19.2", - "jq<1.7,>=1.6", - "langchain<0.1.8,>=0.1.1", + "gradio<4.23,>=4.19.2", + "jq<1.8,>=1.6", + "langchain<0.2,>=0.1.1", "numpydoc<1.7,>=1.6", "pydantic<2.7,>=2.4.2", - "sentence-transformers<2.6,>=2.2.2", - "transformers<4.39,>=4.36", - "typer<0.10,>=0.9", + "sentence-transformers<2.7,>=2.2.2", + "transformers<4.40,>=4.36", + "typer<0.11,>=0.9", ] [project.optional-dependencies] all = [ @@ -94,12 +94,12 @@ fine-tuning = [ "bitsandbytes<0.44,>=0.41.2", "datasets<2.19,>=2.15", "evaluate<0.5,>=0.4.1", - "peft<0.10,>=0.6.2", + "peft<0.11,>=0.6.2", "rouge-score<0.2,>=0.1.2", "safetensors<0.5,>=0.4", "scikit-learn<1.5,>=1.3", "torch<2.3,>=2.1.1", - "trl<0.8,>=0.7.4", + "trl<0.9,>=0.7.4", ] format = [ "autoflake", diff --git a/requirements/constraints.fine_tuning.txt b/requirements/constraints.fine_tuning.txt index 4004e32..1fb4d80 100644 --- a/requirements/constraints.fine_tuning.txt +++ b/requirements/constraints.fine_tuning.txt @@ -3,9 +3,9 @@ bitsandbytes<0.44,>=0.41.2 datasets<2.19,>=2.15.0 evaluate<0.5,>=0.4.1 nltk<3.9,>=3.8.1 -peft<0.10,>=0.6.2 +peft<0.11,>=0.6.2 rouge-score<0.2,>=0.1.2 safetensors<0.5,>=0.4.0 scikit-learn<1.5,>=1.3.0 torch<2.3,>=2.1.1 -trl<0.8,>=0.7.4 +trl<0.9,>=0.7.4 diff --git a/requirements/constraints.txt b/requirements/constraints.txt index 604b111..c11c64b 100644 --- a/requirements/constraints.txt +++ b/requirements/constraints.txt @@ -1,10 +1,10 @@ chromadb<0.5,>=0.4.15 ctransformers<0.3,>=0.2.27 -gradio<4.22,>=4.19.2 -jq<1.7,>=1.6 -langchain<0.1.8,>=0.1.1 +gradio<4.23,>=4.19.2 +jq<1.8,>=1.6 +langchain<0.2,>=0.1.1 numpydoc<1.7,>=1.6 pydantic<2.7,>=2.4.2 -sentence-transformers<2.6,>=2.2.2 -transformers<4.39,>=4.36 -typer<0.10,>=0.9 +sentence-transformers<2.7,>=2.2.2 +transformers<4.40,>=4.36 +typer<0.11,>=0.9 From ec7951a1f91e16064cfaff4171cd2e51fec017ea Mon Sep 17 00:00:00 2001 From: Anirban Ray <39331844+yarnabrina@users.noreply.github.com> Date: Sun, 7 Apr 2024 14:04:09 +0530 Subject: [PATCH 20/26] pre-commit autoupdate --- .pre-commit-config.yaml | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a9f1db3..9e51f07 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v4.6.0 hooks: - id: check-ast - id: check-case-conflict @@ -28,19 +28,23 @@ repos: - id: requirements-txt-fixer - id: trailing-whitespace - repo: https://github.com/asottile/pyupgrade - rev: v3.15.1 + rev: v3.15.2 hooks: - id: pyupgrade args: - --keep-runtime-typing - --py311-plus + stages: + - manual - repo: https://github.com/pycqa/autoflake - rev: v2.3.0 + rev: v2.3.1 hooks: - id: autoflake args: - src pass_filenames: false + stages: + - manual - repo: https://github.com/pycqa/isort rev: 5.13.2 hooks: @@ -48,8 +52,10 @@ repos: args: - src pass_filenames: false + stages: + - manual - repo: https://github.com/psf/black - rev: 24.2.0 + rev: 24.3.0 hooks: - id: black additional_dependencies: @@ -57,8 +63,10 @@ repos: args: - src pass_filenames: false + stages: + - manual - repo: https://github.com/pycqa/bandit - rev: 1.7.7 + rev: 1.7.8 hooks: - id: bandit args: @@ -76,8 +84,10 @@ repos: args: - src pass_filenames: false + stages: + - manual - repo: https://github.com/PyCQA/pylint - rev: v3.0.3 + rev: v3.1.0 hooks: - id: pylint args: @@ -88,7 +98,7 @@ repos: stages: - manual - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.2.2 + rev: v0.3.5 hooks: - id: ruff args: @@ -123,14 +133,14 @@ repos: - --target-version - py311 - repo: https://github.com/econchick/interrogate - rev: 1.5.0 + rev: 1.6.0 hooks: - id: interrogate args: - src pass_filenames: false - repo: https://github.com/numpy/numpydoc - rev: v1.6.0 + rev: v1.7.0 hooks: - id: numpydoc-validation stages: @@ -154,7 +164,7 @@ repos: stages: - manual - repo: https://github.com/crate-ci/typos - rev: v1.18.2 + rev: v1.20.4 hooks: - id: typos args: From a7decb45724f865cca2d5ba3f419930a2ee39780 Mon Sep 17 00:00:00 2001 From: Anirban Ray <39331844+yarnabrina@users.noreply.github.com> Date: Sun, 7 Apr 2024 14:10:31 +0530 Subject: [PATCH 21/26] pre-commit run --all-files --- src/generative_ai/dataset_generation/step_1_generation.py | 2 +- src/generative_ai/top_level.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/generative_ai/dataset_generation/step_1_generation.py b/src/generative_ai/dataset_generation/step_1_generation.py index d48f509..68ab8ef 100644 --- a/src/generative_ai/dataset_generation/step_1_generation.py +++ b/src/generative_ai/dataset_generation/step_1_generation.py @@ -6,7 +6,7 @@ import inspect import logging import pkgutil -import types # noqa: TCH003 +import types import typing import pydantic diff --git a/src/generative_ai/top_level.py b/src/generative_ai/top_level.py index 013166d..e7bbed5 100644 --- a/src/generative_ai/top_level.py +++ b/src/generative_ai/top_level.py @@ -1,7 +1,7 @@ """Define functionalities for top level modules.""" import logging -import pathlib # noqa: TCH003 +import pathlib import shutil import pydantic From bb91e3928b4e205eacbba52e998e2d923a47c494 Mon Sep 17 00:00:00 2001 From: Anirban Ray <39331844+yarnabrina@users.noreply.github.com> Date: Sun, 7 Apr 2024 14:17:09 +0530 Subject: [PATCH 22/26] updated LLM prompt --- .../information_retrieval/step_2_retrieval.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/generative_ai/information_retrieval/step_2_retrieval.py b/src/generative_ai/information_retrieval/step_2_retrieval.py index 2e2b7bb..4c509fa 100644 --- a/src/generative_ai/information_retrieval/step_2_retrieval.py +++ b/src/generative_ai/information_retrieval/step_2_retrieval.py @@ -161,23 +161,25 @@ def generate_retrieval_chain( * The prompt template instructs the model to not answer if it is missing in the context. * It also instructs the model to keep the answer as concise as possible. """ - prompt_template = """You are a chat assistant for question answering tasks. + prompt_template = """You are an assistant for question answering tasks. -Use the following retrieved context to answer the given question. +### Instructions -If the answer is not in the context, say "I do not know.". +1. Use only the following retrieved context to answer the given question. -Keep your answer as concise as possible. +2. If the answer is not in the context, say "I do not know.". -Context +3. Keep your answer as concise as possible. + +### Context {context} -Question +### Question {question} -Answer +### Answer """ From b923a0dfa9d66fe231a74500ec6d4790c5f13b35 Mon Sep 17 00:00:00 2001 From: Anirban Ray <39331844+yarnabrina@users.noreply.github.com> Date: Sun, 7 Apr 2024 16:31:48 +0530 Subject: [PATCH 23/26] updated dependencies --- pyproject.toml | 12 ++++++------ requirements/constraints.fine_tuning.txt | 2 +- requirements/constraints.txt | 10 +++++----- requirements/requirements.txt | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8843af8..968d711 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,14 +44,14 @@ dynamic=[ dependencies = [ "chromadb<0.5,>=0.4.15", "ctransformers<0.3,>=0.2.27", - "gradio<4.23,>=4.19.2", + "gradio<4.26,>=4.19.2", "jq<1.8,>=1.6", - "langchain<0.2,>=0.1.1", - "numpydoc<1.7,>=1.6", + "langchain<0.2,>=0.1.13", + "numpydoc<1.8,>=1.6", "pydantic<2.7,>=2.4.2", "sentence-transformers<2.7,>=2.2.2", - "transformers<4.40,>=4.36", - "typer<0.11,>=0.9", + "transformers<4.39,>=4.36", + "typer-slim<0.13,>=0.12.1", ] [project.optional-dependencies] all = [ @@ -90,7 +90,7 @@ doc = [ "sphinx-copybutton", ] fine-tuning = [ - "accelerate<0.29,>=0.24.1", + "accelerate<0.30,>=0.24.1", "bitsandbytes<0.44,>=0.41.2", "datasets<2.19,>=2.15", "evaluate<0.5,>=0.4.1", diff --git a/requirements/constraints.fine_tuning.txt b/requirements/constraints.fine_tuning.txt index 1fb4d80..a3e21c0 100644 --- a/requirements/constraints.fine_tuning.txt +++ b/requirements/constraints.fine_tuning.txt @@ -1,4 +1,4 @@ -accelerate<0.29,>=0.24.1 +accelerate<0.30,>=0.24.1 bitsandbytes<0.44,>=0.41.2 datasets<2.19,>=2.15.0 evaluate<0.5,>=0.4.1 diff --git a/requirements/constraints.txt b/requirements/constraints.txt index c11c64b..e810768 100644 --- a/requirements/constraints.txt +++ b/requirements/constraints.txt @@ -1,10 +1,10 @@ chromadb<0.5,>=0.4.15 ctransformers<0.3,>=0.2.27 -gradio<4.23,>=4.19.2 +gradio<4.26,>=4.19.2 jq<1.8,>=1.6 -langchain<0.2,>=0.1.1 -numpydoc<1.7,>=1.6 +langchain<0.2,>=0.1.13 +numpydoc<1.8,>=1.6 pydantic<2.7,>=2.4.2 sentence-transformers<2.7,>=2.2.2 -transformers<4.40,>=4.36 -typer<0.11,>=0.9 +transformers<4.39,>=4.36 +typer-slim<0.13,>=0.12.1 diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 706c904..2e924d3 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -7,4 +7,4 @@ numpydoc pydantic sentence-transformers transformers -typer +typer-slim From 94d6750cceda5848cc89131f2256d46142041d10 Mon Sep 17 00:00:00 2001 From: Anirban Ray <39331844+yarnabrina@users.noreply.github.com> Date: Sun, 7 Apr 2024 17:02:46 +0530 Subject: [PATCH 24/26] updated files and notebooks --- .../fine_tuning/v3_opt_350m/step_1.ipynb | 11 +- .../fine_tuning/v3_opt_350m/step_2.ipynb | 311 +++++++++++++----- .../fine_tuning/v3_opt_350m/step_3.ipynb | 223 +++++++++++++ .../fine_tuning/v3_opt_350m/step_4.ipynb | 247 ++++++++++++++ .../information_retrieval/step_2_retrieval.py | 14 +- 5 files changed, 715 insertions(+), 91 deletions(-) create mode 100644 src/generative_ai/fine_tuning/v3_opt_350m/step_3.ipynb create mode 100644 src/generative_ai/fine_tuning/v3_opt_350m/step_4.ipynb diff --git a/src/generative_ai/fine_tuning/v3_opt_350m/step_1.ipynb b/src/generative_ai/fine_tuning/v3_opt_350m/step_1.ipynb index 66c3284..b0105e8 100644 --- a/src/generative_ai/fine_tuning/v3_opt_350m/step_1.ipynb +++ b/src/generative_ai/fine_tuning/v3_opt_350m/step_1.ipynb @@ -32,8 +32,15 @@ "metadata": {}, "outputs": [], "source": [ - "raw_dataset_path = pathlib.Path(input_directory, \"json_documents.json\")\n", - "\n", + "raw_dataset_path = pathlib.Path(input_directory, \"json_documents.json\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "hugging_face_dataset_path = pathlib.Path(working_directory, \"hugging_face_dataset_directory\")\n", "hugging_face_dataset_archive = pathlib.Path(output_directory, \"hugging_face_dataset_archive.zip\")" ] diff --git a/src/generative_ai/fine_tuning/v3_opt_350m/step_2.ipynb b/src/generative_ai/fine_tuning/v3_opt_350m/step_2.ipynb index 1e4c0d7..e5de177 100644 --- a/src/generative_ai/fine_tuning/v3_opt_350m/step_2.ipynb +++ b/src/generative_ai/fine_tuning/v3_opt_350m/step_2.ipynb @@ -6,15 +6,17 @@ "metadata": {}, "outputs": [], "source": [ + "import gc\n", "import pathlib\n", "import shutil\n", + "import time\n", "\n", "import numpy\n", "from datasets import load_from_disk\n", "from evaluate import load\n", "from peft import LoraConfig, PeftType, TaskType\n", "from sklearn.metrics import accuracy_score, f1_score, fbeta_score, precision_score, recall_score\n", - "from torch import Tensor, float16\n", + "from torch import Tensor, cuda, float16\n", "from transformers import (\n", " AutoModelForCausalLM,\n", " AutoTokenizer,\n", @@ -23,10 +25,20 @@ " EvalPrediction,\n", " SchedulerType,\n", " TrainingArguments,\n", + " set_seed,\n", ")\n", "from trl import DataCollatorForCompletionOnlyLM, SFTTrainer" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "set_seed(0)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -56,7 +68,8 @@ "metadata": {}, "outputs": [], "source": [ - "shutil.unpack_archive(hugging_face_dataset_archive, extract_dir=working_directory)" + "tuning_checkpoints_path = pathlib.Path(working_directory, \"tuning_checkpoints_directory\")\n", + "tuning_checkpoints_archive = pathlib.Path(output_directory, \"tuning_checkpoints_archive.zip\")" ] }, { @@ -65,10 +78,8 @@ "metadata": {}, "outputs": [], "source": [ - "hugging_face_dataset = load_from_disk(hugging_face_dataset_path)\n", - "\n", - "train_subset = hugging_face_dataset[\"train\"]\n", - "validation_subset = hugging_face_dataset[\"validation\"]" + "tuned_adapter_path = pathlib.Path(working_directory, \"tuned_adapter_directory\")\n", + "tuned_adapter_archive = pathlib.Path(output_directory, \"tuned_adapter_archive.zip\")" ] }, { @@ -84,8 +95,15 @@ " bnb_4bit_compute_dtype=float16,\n", " bnb_4bit_quant_type=\"nf4\",\n", " bnb_4bit_use_double_quant=True,\n", - ")\n", - "\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "mask_token_index = -100" ] }, @@ -95,11 +113,8 @@ "metadata": {}, "outputs": [], "source": [ - "tuning_checkpoints_path = pathlib.Path(working_directory, \"tuning_checkpoints_directory\")\n", - "tuning_checkpoints_archive = pathlib.Path(output_directory, \"tuning_checkpoints_archive.zip\")\n", - "\n", - "tuned_adapter_path = pathlib.Path(working_directory, \"tuned_adapter_directory\")\n", - "tuned_adapter_archive = pathlib.Path(output_directory, \"tuned_adapter_archive.zip\")" + "maximum_sequence_length = 512\n", + "number_of_sequences = 1024" ] }, { @@ -108,15 +123,26 @@ "metadata": {}, "outputs": [], "source": [ - "model = AutoModelForCausalLM.from_pretrained(\n", - " base_model_identifier,\n", - " quantization_config=quantisation_configuration,\n", - " device_map=\"auto\",\n", - " low_cpu_mem_usage=True,\n", - ")\n", - "\n", - "model.config.use_cache = False\n", - "model.config.pretraining_tp = 1" + "shutil.unpack_archive(hugging_face_dataset_archive, extract_dir=working_directory)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "hugging_face_dataset = load_from_disk(hugging_face_dataset_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "train_subset = hugging_face_dataset[\"train\"]\n", + "validation_subset = hugging_face_dataset[\"validation\"]" ] }, { @@ -131,6 +157,119 @@ "tokeniser.padding_side = \"right\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "instruction_template = \"\"\"You are an assistant for question answering tasks.\n", + "\n", + "### Instructions\n", + "\n", + "1. Use only the following retrieved context to answer the given question.\n", + "2. If the answer is not in the context, say \"I do not know.\".\n", + "3. Keep your answer as concise as possible.\"\"\"\n", + "\n", + "context_template = \"\"\"### Context:\"\"\"\n", + "question_template = \"### Question:\"\n", + "answer_template = \"### Answer:\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def format_inputs(examples):\n", + " return [\n", + " \"\\n\\n\".join(\n", + " [\n", + " instruction_template,\n", + " f\"{context_template} {examples['context'][counter]}\",\n", + " f\"{question_template} {examples['question'][counter]}\",\n", + " f\"{answer_template} {examples['answer'][counter]}\",\n", + " ]\n", + " )\n", + " for counter in range(len(examples))\n", + " ]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def determine_characters_per_token_ratio(dataset, tokeniser):\n", + " total_characters = 0\n", + " total_tokens = 0\n", + "\n", + " for example in iter(dataset):\n", + " example_prompt = format_inputs(example)\n", + "\n", + " total_characters += len(example_prompt)\n", + "\n", + " total_tokens += (\n", + " len(tokeniser(example_prompt).tokens())\n", + " if tokeniser.is_fast\n", + " else len(tokeniser.tokenize(example_prompt))\n", + " )\n", + "\n", + " return total_characters / total_tokens" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "characters_per_token_ratio = determine_characters_per_token_ratio(train_subset, tokeniser)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "instruction_template_with_context = f\"\\n{context_template}\"\n", + "instruction_template_token_indices = tokeniser.encode(\n", + " instruction_template_with_context, add_special_tokens=False\n", + ")[2:]\n", + "\n", + "response_template_with_context = f\"\\n{answer_template}\"\n", + "response_template_token_indices = tokeniser.encode(\n", + " response_template_with_context, add_special_tokens=False\n", + ")[2:]\n", + "\n", + "collator = DataCollatorForCompletionOnlyLM(\n", + " response_template_token_indices,\n", + " instruction_template=instruction_template_token_indices,\n", + " tokenizer=tokeniser,\n", + " ignore_index=mask_token_index,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = AutoModelForCausalLM.from_pretrained(\n", + " base_model_identifier,\n", + " quantization_config=quantisation_configuration,\n", + " device_map=\"auto\",\n", + " low_cpu_mem_usage=True,\n", + ")\n", + "\n", + "model.config.use_cache = False\n", + "model.config.pretraining_tp = 1" + ] + }, { "cell_type": "code", "execution_count": null, @@ -145,6 +284,7 @@ " lora_dropout=0.1,\n", " bias=\"none\",\n", " use_rslora=True,\n", + " init_lora_weights=True,\n", ")" ] }, @@ -157,18 +297,20 @@ "training_configuration = TrainingArguments(\n", " output_dir=str(tuning_checkpoints_path),\n", " overwrite_output_dir=True,\n", - " evaluation_strategy=\"epoch\",\n", + " evaluation_strategy=\"steps\",\n", " gradient_accumulation_steps=1,\n", - " eval_delay=3,\n", - " learning_rate=1e-4,\n", + " eval_delay=2,\n", + " learning_rate=1e-5,\n", " weight_decay=0.001,\n", " max_grad_norm=0.3,\n", - " num_train_epochs=50,\n", + " num_train_epochs=100,\n", " lr_scheduler_type=SchedulerType.REDUCE_ON_PLATEAU,\n", " warmup_ratio=0.03,\n", " log_level=\"error\",\n", - " logging_strategy=\"epoch\",\n", - " save_strategy=\"epoch\",\n", + " logging_strategy=\"steps\",\n", + " logging_steps=0.05,\n", + " save_strategy=\"steps\",\n", + " save_steps=0.05,\n", " save_total_limit=5,\n", " save_safetensors=True,\n", " save_only_model=True,\n", @@ -178,6 +320,7 @@ " fp16=True,\n", " half_precision_backend=\"auto\",\n", " fp16_full_eval=False,\n", + " eval_steps=0.05,\n", " load_best_model_at_end=True,\n", " metric_for_best_model=\"eval_google_bleu\",\n", " greater_is_better=True,\n", @@ -190,52 +333,6 @@ ")" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "context_template = \" ### Context:\"\n", - "question_template = \" ### Question:\"\n", - "answer_template = \" ### Answer:\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def format_inputs(examples: list[dict[str, str]]) -> list[str]:\n", - " return [\n", - " \"\\n\".join(\n", - " [\n", - " f\"{context_template} {examples['context'][counter]}\",\n", - " f\"{question_template} {examples['question'][counter]}\",\n", - " f\"{answer_template} {examples['answer'][counter]}\",\n", - " ]\n", - " )\n", - " for counter in range(len(examples))\n", - " ]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "response_template_with_context = f\"\\n{answer_template}\"\n", - "response_template_token_indices = tokeniser.encode(\n", - " response_template_with_context, add_special_tokens=False\n", - ")[2:]\n", - "\n", - "collator = DataCollatorForCompletionOnlyLM(\n", - " response_template_token_indices, tokenizer=tokeniser, ignore_index=mask_token_index\n", - ")" - ] - }, { "cell_type": "code", "execution_count": null, @@ -352,7 +449,9 @@ " peft_config=peft_configuration,\n", " formatting_func=format_inputs,\n", " packing=False,\n", - " max_seq_length=512,\n", + " max_seq_length=maximum_sequence_length,\n", + " num_of_sequences=number_of_sequences,\n", + " chars_per_token=characters_per_token_ratio,\n", ")" ] }, @@ -362,7 +461,16 @@ "metadata": {}, "outputs": [], "source": [ - "supervised_trainer.train()" + "supervised_trainer.model.print_trainable_parameters()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "supervised_trainer.train(resume_from_checkpoint=False)" ] }, { @@ -385,8 +493,15 @@ " tuned_adapter_archive.suffix[1:],\n", " root_dir=working_directory,\n", " base_dir=tuned_adapter_path.stem,\n", - ")\n", - "\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "_ = shutil.make_archive(\n", " str(pathlib.Path(tuning_checkpoints_archive.parent, tuning_checkpoints_archive.stem)),\n", " tuning_checkpoints_archive.suffix[1:],\n", @@ -394,6 +509,46 @@ " base_dir=tuning_checkpoints_path.stem,\n", ")" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "del supervised_trainer\n", + "del tokeniser\n", + "del model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "_ = gc.collect()\n", + "cuda.empty_cache()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "time.sleep(30)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "_ = gc.collect()\n", + "cuda.empty_cache()" + ] } ], "metadata": { diff --git a/src/generative_ai/fine_tuning/v3_opt_350m/step_3.ipynb b/src/generative_ai/fine_tuning/v3_opt_350m/step_3.ipynb new file mode 100644 index 0000000..f6484a5 --- /dev/null +++ b/src/generative_ai/fine_tuning/v3_opt_350m/step_3.ipynb @@ -0,0 +1,223 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import gc\n", + "import pathlib\n", + "import time\n", + "\n", + "from torch import cuda, float16\n", + "from transformers import (\n", + " AutoModelForCausalLM,\n", + " AutoTokenizer,\n", + " BitsAndBytesConfig,\n", + " pipeline,\n", + " set_seed,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "set_seed(0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "step_identifier = pathlib.Path(\"step_3\")\n", + "\n", + "input_directory = pathlib.Path(step_identifier, \"input_directory\")\n", + "working_directory = pathlib.Path(step_identifier, \"working_directory\")\n", + "output_directory = pathlib.Path(step_identifier, \"output_directory\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "base_model_identifier = \"facebook/opt-350m\"\n", + "\n", + "quantisation_configuration = BitsAndBytesConfig(\n", + " load_in_4bit=True,\n", + " bnb_4bit_compute_dtype=float16,\n", + " bnb_4bit_quant_type=\"nf4\",\n", + " bnb_4bit_use_double_quant=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tokeniser = AutoTokenizer.from_pretrained(base_model_identifier)\n", + "\n", + "tokeniser.pad_token = tokeniser.eos_token\n", + "tokeniser.padding_side = \"right\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "untuned_model = AutoModelForCausalLM.from_pretrained(\n", + " base_model_identifier,\n", + " quantization_config=quantisation_configuration,\n", + " device_map=\"auto\",\n", + " low_cpu_mem_usage=True,\n", + ")\n", + "\n", + "_ = untuned_model.eval()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "untuned_pipeline = pipeline(\n", + " \"text-generation\",\n", + " model=untuned_model,\n", + " tokenizer=tokeniser,\n", + " device_map=\"auto\",\n", + " torch_dtype=float16,\n", + " model_kwargs={\"low_cpu_mem_usage\": True},\n", + " max_new_tokens=256,\n", + " do_sample=True,\n", + " top_k=1,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "instruction_template = \"\"\"You are an assistant for question answering tasks.\n", + "\n", + "### Instructions\n", + "\n", + "1. Use only the following retrieved context to answer the given question.\n", + "2. If the answer is not in the context, say \"I do not know.\".\n", + "3. Keep your answer as concise as possible.\"\"\"\n", + "\n", + "context_template = \"\"\"### Context:\"\"\"\n", + "question_template = \"### Question:\"\n", + "answer_template = \"### Answer:\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "input_question = \"Name the root package.\"\n", + "retrieved_context = \"'package_name_to_import_with' is the root package.\"\n", + "\n", + "model_input = \"\\n\\n\".join(\n", + " [\n", + " instruction_template,\n", + " f\"{context_template} {retrieved_context}\",\n", + " f\"{question_template} {input_question}\",\n", + " f\"{answer_template} \",\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model_input" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "untuned_model_output = untuned_pipeline(input_question, return_full_text=False)\n", + "\n", + "untuned_output_answer = untuned_model_output[0][\"generated_text\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "untuned_output_answer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "del untuned_pipeline\n", + "del untuned_model\n", + "del tokeniser" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "_ = gc.collect()\n", + "cuda.empty_cache()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "time.sleep(30)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "_ = gc.collect()\n", + "cuda.empty_cache()" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/generative_ai/fine_tuning/v3_opt_350m/step_4.ipynb b/src/generative_ai/fine_tuning/v3_opt_350m/step_4.ipynb new file mode 100644 index 0000000..02636fe --- /dev/null +++ b/src/generative_ai/fine_tuning/v3_opt_350m/step_4.ipynb @@ -0,0 +1,247 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import gc\n", + "import pathlib\n", + "import shutil\n", + "import time\n", + "\n", + "from peft import PeftModel\n", + "from torch import cuda, float16\n", + "from transformers import (\n", + " AutoModelForCausalLM,\n", + " AutoTokenizer,\n", + " BitsAndBytesConfig,\n", + " pipeline,\n", + " set_seed,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "set_seed(0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "step_identifier = pathlib.Path(\"step_4\")\n", + "\n", + "input_directory = pathlib.Path(step_identifier, \"input_directory\")\n", + "working_directory = pathlib.Path(step_identifier, \"working_directory\")\n", + "output_directory = pathlib.Path(step_identifier, \"output_directory\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tuned_adapter_archive = pathlib.Path(input_directory, \"tuned_adapter_archive.zip\")\n", + "tuned_adapter_path = pathlib.Path(working_directory, \"tuned_adapter_directory\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "shutil.unpack_archive(tuned_adapter_archive, extract_dir=working_directory)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "base_model_identifier = \"facebook/opt-350m\"\n", + "\n", + "quantisation_configuration = BitsAndBytesConfig(\n", + " load_in_4bit=True,\n", + " bnb_4bit_compute_dtype=float16,\n", + " bnb_4bit_quant_type=\"nf4\",\n", + " bnb_4bit_use_double_quant=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tokeniser = AutoTokenizer.from_pretrained(base_model_identifier)\n", + "\n", + "tokeniser.pad_token = tokeniser.eos_token\n", + "tokeniser.padding_side = \"right\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "untuned_model = AutoModelForCausalLM.from_pretrained(\n", + " base_model_identifier,\n", + " quantization_config=quantisation_configuration,\n", + " device_map=\"auto\",\n", + " low_cpu_mem_usage=True,\n", + ")\n", + "\n", + "peft_model = PeftModel.from_pretrained(untuned_model, tuned_adapter_path)\n", + "\n", + "_ = peft_model.eval()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tuned_pipeline = pipeline(\n", + " \"text-generation\",\n", + " model=peft_model,\n", + " tokenizer=tokeniser,\n", + " device_map=\"auto\",\n", + " torch_dtype=float16,\n", + " model_kwargs={\"low_cpu_mem_usage\": True},\n", + " max_new_tokens=256,\n", + " do_sample=True,\n", + " top_k=1,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "instruction_template = \"\"\"You are an assistant for question answering tasks.\n", + "\n", + "### Instructions\n", + "\n", + "1. Use only the following retrieved context to answer the given question.\n", + "2. If the answer is not in the context, say \"I do not know.\".\n", + "3. Keep your answer as concise as possible.\"\"\"\n", + "\n", + "context_template = \"\"\"### Context:\"\"\"\n", + "question_template = \"### Question:\"\n", + "answer_template = \"### Answer:\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "input_question = \"Name the root package.\"\n", + "retrieved_context = \"'package_name_to_import_with' is the root package.\"\n", + "\n", + "model_input = \"\\n\\n\".join(\n", + " [\n", + " instruction_template,\n", + " f\"{context_template} {retrieved_context}\",\n", + " f\"{question_template} {input_question}\",\n", + " f\"{answer_template} \",\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model_input" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tuned_model_output = tuned_pipeline(input_question, return_full_text=False)\n", + "\n", + "tuned_output_answer = tuned_model_output[0][\"generated_text\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tuned_output_answer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "del tuned_pipeline\n", + "del peft_model\n", + "del untuned_model\n", + "del tokeniser" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "_ = gc.collect()\n", + "cuda.empty_cache()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "time.sleep(30)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "_ = gc.collect()\n", + "cuda.empty_cache()" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/generative_ai/information_retrieval/step_2_retrieval.py b/src/generative_ai/information_retrieval/step_2_retrieval.py index 4c509fa..4a74a4b 100644 --- a/src/generative_ai/information_retrieval/step_2_retrieval.py +++ b/src/generative_ai/information_retrieval/step_2_retrieval.py @@ -166,22 +166,14 @@ def generate_retrieval_chain( ### Instructions 1. Use only the following retrieved context to answer the given question. - 2. If the answer is not in the context, say "I do not know.". - 3. Keep your answer as concise as possible. -### Context - -{context} - -### Question - -{question} +### Context: {context} -### Answer +### Question: {question} -""" +### Answer: """ prompt = PromptTemplate.from_template(prompt_template) From 664992d4c8606f1b76ac6bebcd466bcfaf6f1d9d Mon Sep 17 00:00:00 2001 From: Anirban Ray <39331844+yarnabrina@users.noreply.github.com> Date: Sun, 7 Apr 2024 20:55:34 +0530 Subject: [PATCH 25/26] sample end-to-end notebook --- ...b-2024-04-07-opt-350m-lora-causal-lm.ipynb | 9339 +++++++++++++++++ 1 file changed, 9339 insertions(+) create mode 100644 src/generative_ai/fine_tuning/v3_opt_350m/colab-2024-04-07-opt-350m-lora-causal-lm.ipynb diff --git a/src/generative_ai/fine_tuning/v3_opt_350m/colab-2024-04-07-opt-350m-lora-causal-lm.ipynb b/src/generative_ai/fine_tuning/v3_opt_350m/colab-2024-04-07-opt-350m-lora-causal-lm.ipynb new file mode 100644 index 0000000..530f5e7 --- /dev/null +++ b/src/generative_ai/fine_tuning/v3_opt_350m/colab-2024-04-07-opt-350m-lora-causal-lm.ipynb @@ -0,0 +1,9339 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "XamGYrd5XQXc" + }, + "source": [ + "# Overall Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wF26u9iiXQXi" + }, + "source": [ + "### Install System Dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", + "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LH1jfk8qXQXk", + "outputId": "c9b0e038-7dc9-4527-a2cf-be0d3416f9a2", + "scrolled": true, + "trusted": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\r0% [Working]\r \rGet:1 http://security.ubuntu.com/ubuntu jammy-security InRelease [110 kB]\n", + "\r \rGet:2 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,626 B]\n", + "Hit:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 InRelease\n", + "Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease\n", + "Get:5 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [119 kB]\n", + "Hit:6 http://archive.ubuntu.com/ubuntu jammy-backports InRelease\n", + "Hit:7 https://ppa.launchpadcontent.net/c2d4u.team/c2d4u4.0+/ubuntu jammy InRelease\n", + "Get:8 http://security.ubuntu.com/ubuntu jammy-security/universe amd64 Packages [1,081 kB]\n", + "Get:9 http://archive.ubuntu.com/ubuntu jammy-updates/restricted amd64 Packages [2,107 kB]\n", + "Get:10 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease [18.1 kB]\n", + "Get:11 http://security.ubuntu.com/ubuntu jammy-security/main amd64 Packages [1,641 kB]\n", + "Hit:12 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease\n", + "Get:13 http://archive.ubuntu.com/ubuntu jammy-updates/multiverse amd64 Packages [61.2 kB]\n", + "Get:14 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 Packages [1,920 kB]\n", + "Get:15 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 Packages [1,357 kB]\n", + "Hit:16 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease\n", + "Get:17 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy/main amd64 Packages [27.7 kB]\n", + "Fetched 8,447 kB in 1s (6,022 kB/s)\n", + "Reading package lists... Done\n", + "Reading package lists... Done\n", + "Building dependency tree... Done\n", + "Reading state information... Done\n", + "The following additional packages will be installed:\n", + " cuda-cccl-11-8 cuda-command-line-tools-11-8 cuda-compiler-11-8 cuda-cudart-11-8\n", + " cuda-cudart-dev-11-8 cuda-cuobjdump-11-8 cuda-cupti-11-8 cuda-cupti-dev-11-8 cuda-cuxxfilt-11-8\n", + " cuda-documentation-11-8 cuda-driver-dev-11-8 cuda-gdb-11-8 cuda-libraries-11-8\n", + " cuda-libraries-dev-11-8 cuda-memcheck-11-8 cuda-nsight-11-8 cuda-nsight-compute-11-8\n", + " cuda-nsight-systems-11-8 cuda-nvcc-11-8 cuda-nvdisasm-11-8 cuda-nvml-dev-11-8 cuda-nvprof-11-8\n", + " cuda-nvprune-11-8 cuda-nvrtc-11-8 cuda-nvrtc-dev-11-8 cuda-nvtx-11-8 cuda-nvvp-11-8\n", + " cuda-profiler-api-11-8 cuda-sanitizer-11-8 cuda-toolkit-11-8-config-common\n", + " cuda-toolkit-11-config-common cuda-tools-11-8 cuda-visual-tools-11-8 default-jre\n", + " default-jre-headless gds-tools-11-8 libcublas-11-8 libcublas-dev-11-8 libcufft-11-8\n", + " libcufft-dev-11-8 libcufile-11-8 libcufile-dev-11-8 libcurand-11-8 libcurand-dev-11-8\n", + " libcusolver-11-8 libcusolver-dev-11-8 libcusparse-11-8 libcusparse-dev-11-8 libnpp-11-8\n", + " libnpp-dev-11-8 libnvjpeg-11-8 libnvjpeg-dev-11-8 libtinfo5 libxcb-icccm4 libxcb-image0\n", + " libxcb-keysyms1 libxcb-render-util0 libxcb-util1 libxcb-xinerama0 libxcb-xinput0 libxcb-xkb1\n", + " libxkbcommon-x11-0 libxtst6 nsight-compute-2022.3.0 nsight-systems-2022.4.2 openjdk-11-jre\n", + "Recommended packages:\n", + " libatk-wrapper-java-jni fonts-dejavu-extra\n", + "The following NEW packages will be installed:\n", + " cuda-cccl-11-8 cuda-command-line-tools-11-8 cuda-compiler-11-8 cuda-cudart-11-8\n", + " cuda-cudart-dev-11-8 cuda-cuobjdump-11-8 cuda-cupti-11-8 cuda-cupti-dev-11-8 cuda-cuxxfilt-11-8\n", + " cuda-documentation-11-8 cuda-driver-dev-11-8 cuda-gdb-11-8 cuda-libraries-11-8\n", + " cuda-libraries-dev-11-8 cuda-memcheck-11-8 cuda-nsight-11-8 cuda-nsight-compute-11-8\n", + " cuda-nsight-systems-11-8 cuda-nvcc-11-8 cuda-nvdisasm-11-8 cuda-nvml-dev-11-8 cuda-nvprof-11-8\n", + " cuda-nvprune-11-8 cuda-nvrtc-11-8 cuda-nvrtc-dev-11-8 cuda-nvtx-11-8 cuda-nvvp-11-8\n", + " cuda-profiler-api-11-8 cuda-sanitizer-11-8 cuda-toolkit-11-8 cuda-toolkit-11-8-config-common\n", + " cuda-toolkit-11-config-common cuda-tools-11-8 cuda-visual-tools-11-8 default-jre\n", + " default-jre-headless gds-tools-11-8 libcublas-11-8 libcublas-dev-11-8 libcufft-11-8\n", + " libcufft-dev-11-8 libcufile-11-8 libcufile-dev-11-8 libcurand-11-8 libcurand-dev-11-8\n", + " libcusolver-11-8 libcusolver-dev-11-8 libcusparse-11-8 libcusparse-dev-11-8 libnpp-11-8\n", + " libnpp-dev-11-8 libnvjpeg-11-8 libnvjpeg-dev-11-8 libtinfo5 libxcb-icccm4 libxcb-image0\n", + " libxcb-keysyms1 libxcb-render-util0 libxcb-util1 libxcb-xinerama0 libxcb-xinput0 libxcb-xkb1\n", + " libxkbcommon-x11-0 libxtst6 nsight-compute-2022.3.0 nsight-systems-2022.4.2 openjdk-11-jre\n", + "0 upgraded, 67 newly installed, 0 to remove and 45 not upgraded.\n", + "Need to get 2,711 MB of archives.\n", + "After this operation, 6,650 MB of additional disk space will be used.\n", + "Get:1 http://archive.ubuntu.com/ubuntu jammy/main amd64 default-jre-headless amd64 2:1.11-72build2 [3,042 B]\n", + "Get:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-cccl-11-8 11.8.89-1 [1,040 kB]\n", + "Get:3 http://archive.ubuntu.com/ubuntu jammy/main amd64 libxtst6 amd64 2:1.2.3-1build4 [13.4 kB]\n", + "Get:4 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 openjdk-11-jre amd64 11.0.22+7-0ubuntu2~22.04.1 [214 kB]\n", + "Get:5 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-cupti-11-8 11.8.87-1 [15.4 MB]\n", + "Get:6 http://archive.ubuntu.com/ubuntu jammy/main amd64 default-jre amd64 2:1.11-72build2 [896 B]\n", + "Get:7 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 libtinfo5 amd64 6.3-2ubuntu0.1 [100 kB]\n", + "Get:8 http://archive.ubuntu.com/ubuntu jammy/main amd64 libxcb-xinerama0 amd64 1.14-3ubuntu3 [5,414 B]\n", + "Get:9 http://archive.ubuntu.com/ubuntu jammy/main amd64 libxcb-icccm4 amd64 0.4.1-1.1build2 [11.5 kB]\n", + "Get:10 http://archive.ubuntu.com/ubuntu jammy/main amd64 libxcb-util1 amd64 0.4.0-1build2 [11.4 kB]\n", + "Get:11 http://archive.ubuntu.com/ubuntu jammy/main amd64 libxcb-image0 amd64 0.4.0-2 [11.5 kB]\n", + "Get:12 http://archive.ubuntu.com/ubuntu jammy/main amd64 libxcb-keysyms1 amd64 0.4.0-1build3 [8,746 B]\n", + "Get:13 http://archive.ubuntu.com/ubuntu jammy/main amd64 libxcb-render-util0 amd64 0.3.9-1build3 [10.3 kB]\n", + "Get:14 http://archive.ubuntu.com/ubuntu jammy/main amd64 libxcb-xkb1 amd64 1.14-3ubuntu3 [32.8 kB]\n", + "Get:15 http://archive.ubuntu.com/ubuntu jammy/main amd64 libxkbcommon-x11-0 amd64 1.4.0-1 [14.4 kB]\n", + "Get:16 http://archive.ubuntu.com/ubuntu jammy/main amd64 libxcb-xinput0 amd64 1.14-3ubuntu3 [34.3 kB]\n", + "Get:17 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-cupti-dev-11-8 11.8.87-1 [2,552 kB]\n", + "Get:18 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-nvdisasm-11-8 11.8.86-1 [50.8 MB]\n", + "Get:19 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-cuobjdump-11-8 11.8.86-1 [165 kB]\n", + "Get:20 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-gdb-11-8 11.8.86-1 [4,138 kB]\n", + "Get:21 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-memcheck-11-8 11.8.86-1 [142 kB]\n", + "Get:22 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-nvprof-11-8 11.8.87-1 [1,959 kB]\n", + "Get:23 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-nvtx-11-8 11.8.86-1 [51.3 kB]\n", + "Get:24 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-sanitizer-11-8 11.8.86-1 [8,784 kB]\n", + "Get:25 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-command-line-tools-11-8 11.8.0-1 [2,472 B]\n", + "Get:26 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-cuxxfilt-11-8 11.8.86-1 [189 kB]\n", + "Get:27 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-toolkit-11-config-common 11.8.89-1 [16.4 kB]\n", + "Get:28 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-toolkit-11-8-config-common 11.8.89-1 [16.3 kB]\n", + "Get:29 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-cudart-11-8 11.8.89-1 [165 kB]\n", + "Get:30 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-driver-dev-11-8 11.8.89-1 [27.3 kB]\n", + "Get:31 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-cudart-dev-11-8 11.8.89-1 [820 kB]\n", + "Get:32 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-nvcc-11-8 11.8.89-1 [43.5 MB]\n", + "Get:33 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-nvprune-11-8 11.8.86-1 [58.1 kB]\n", + "Get:34 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-compiler-11-8 11.8.0-1 [2,432 B]\n", + "Get:35 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-documentation-11-8 11.8.86-1 [49.8 kB]\n", + "Get:36 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-nvrtc-11-8 11.8.89-1 [16.4 MB]\n", + "Get:37 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 libcublas-11-8 11.11.3.6-1 [248 MB]\n", + "Get:38 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 libcufft-11-8 10.9.0.58-1 [94.2 MB]\n", + "Get:39 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 libcufile-11-8 1.4.0.31-1 [474 kB]\n", + "Get:40 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 libcurand-11-8 10.3.0.86-1 [42.2 MB]\n", + "Get:41 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 libcusolver-11-8 11.4.1.48-1 [52.3 MB]\n", + "Get:42 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 libcusparse-11-8 11.7.5.86-1 [116 MB]\n", + "Get:43 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 libnpp-11-8 11.8.0.86-1 [102 MB]\n", + "Get:44 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 libnvjpeg-11-8 11.9.0.86-1 [1,865 kB]\n", + "Get:45 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-libraries-11-8 11.8.0-1 [2,518 B]\n", + "Get:46 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-profiler-api-11-8 11.8.86-1 [18.5 kB]\n", + "Get:47 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-nvrtc-dev-11-8 11.8.89-1 [13.5 MB]\n", + "Get:48 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 libcublas-dev-11-8 11.11.3.6-1 [269 MB]\n", + "Get:49 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 libcufft-dev-11-8 10.9.0.58-1 [189 MB]\n", + "Get:50 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 libcufile-dev-11-8 1.4.0.31-1 [1,062 kB]\n", + "Get:51 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 libcurand-dev-11-8 10.3.0.86-1 [42.9 MB]\n", + "Get:52 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 libcusolver-dev-11-8 11.4.1.48-1 [35.7 MB]\n", + "Get:53 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 libcusparse-dev-11-8 11.7.5.86-1 [116 MB]\n", + "Get:54 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 libnpp-dev-11-8 11.8.0.86-1 [100 MB]\n", + "Get:55 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 libnvjpeg-dev-11-8 11.9.0.86-1 [1,536 kB]\n", + "Get:56 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-libraries-dev-11-8 11.8.0-1 [2,554 B]\n", + "Get:57 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-nsight-11-8 11.8.86-1 [119 MB]\n", + "Get:58 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 nsight-compute-2022.3.0 2022.3.0.22-1 [580 MB]\n", + "Get:59 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-nsight-compute-11-8 11.8.0-1 [3,790 B]\n", + "Get:60 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 nsight-systems-2022.4.2 2022.4.2.50-32196742v0 [286 MB]\n", + "Get:61 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-nsight-systems-11-8 11.8.0-1 [3,310 B]\n", + "Get:62 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-nvml-dev-11-8 11.8.86-1 [81.4 kB]\n", + "Get:63 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-nvvp-11-8 11.8.87-1 [114 MB]\n", + "Get:64 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-visual-tools-11-8 11.8.0-1 [2,870 B]\n", + "Get:65 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 gds-tools-11-8 1.4.0.31-1 [38.7 MB]\n", + "Get:66 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-tools-11-8 11.8.0-1 [2,390 B]\n", + "Get:67 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 cuda-toolkit-11-8 11.8.0-1 [3,374 B]\n", + "Fetched 2,711 MB in 33s (82.8 MB/s)\n", + "Extracting templates from packages: 100%\n", + "Selecting previously unselected package cuda-cccl-11-8.\n", + "(Reading database ... 121753 files and directories currently installed.)\n", + "Preparing to unpack .../00-cuda-cccl-11-8_11.8.89-1_amd64.deb ...\n", + "Unpacking cuda-cccl-11-8 (11.8.89-1) ...\n", + "Selecting previously unselected package cuda-cupti-11-8.\n", + "Preparing to unpack .../01-cuda-cupti-11-8_11.8.87-1_amd64.deb ...\n", + "Unpacking cuda-cupti-11-8 (11.8.87-1) ...\n", + "Selecting previously unselected package cuda-cupti-dev-11-8.\n", + "Preparing to unpack .../02-cuda-cupti-dev-11-8_11.8.87-1_amd64.deb ...\n", + "Unpacking cuda-cupti-dev-11-8 (11.8.87-1) ...\n", + "Selecting previously unselected package cuda-nvdisasm-11-8.\n", + "Preparing to unpack .../03-cuda-nvdisasm-11-8_11.8.86-1_amd64.deb ...\n", + "Unpacking cuda-nvdisasm-11-8 (11.8.86-1) ...\n", + "Selecting previously unselected package cuda-cuobjdump-11-8.\n", + "Preparing to unpack .../04-cuda-cuobjdump-11-8_11.8.86-1_amd64.deb ...\n", + "Unpacking cuda-cuobjdump-11-8 (11.8.86-1) ...\n", + "Selecting previously unselected package cuda-gdb-11-8.\n", + "Preparing to unpack .../05-cuda-gdb-11-8_11.8.86-1_amd64.deb ...\n", + "Unpacking cuda-gdb-11-8 (11.8.86-1) ...\n", + "Selecting previously unselected package cuda-memcheck-11-8.\n", + "Preparing to unpack .../06-cuda-memcheck-11-8_11.8.86-1_amd64.deb ...\n", + "Unpacking cuda-memcheck-11-8 (11.8.86-1) ...\n", + "Selecting previously unselected package cuda-nvprof-11-8.\n", + "Preparing to unpack .../07-cuda-nvprof-11-8_11.8.87-1_amd64.deb ...\n", + "Unpacking cuda-nvprof-11-8 (11.8.87-1) ...\n", + "Selecting previously unselected package cuda-nvtx-11-8.\n", + "Preparing to unpack .../08-cuda-nvtx-11-8_11.8.86-1_amd64.deb ...\n", + "Unpacking cuda-nvtx-11-8 (11.8.86-1) ...\n", + "Selecting previously unselected package cuda-sanitizer-11-8.\n", + "Preparing to unpack .../09-cuda-sanitizer-11-8_11.8.86-1_amd64.deb ...\n", + "Unpacking cuda-sanitizer-11-8 (11.8.86-1) ...\n", + "Selecting previously unselected package cuda-command-line-tools-11-8.\n", + "Preparing to unpack .../10-cuda-command-line-tools-11-8_11.8.0-1_amd64.deb ...\n", + "Unpacking cuda-command-line-tools-11-8 (11.8.0-1) ...\n", + "Selecting previously unselected package cuda-cuxxfilt-11-8.\n", + "Preparing to unpack .../11-cuda-cuxxfilt-11-8_11.8.86-1_amd64.deb ...\n", + "Unpacking cuda-cuxxfilt-11-8 (11.8.86-1) ...\n", + "Selecting previously unselected package cuda-toolkit-11-config-common.\n", + "Preparing to unpack .../12-cuda-toolkit-11-config-common_11.8.89-1_all.deb ...\n", + "Unpacking cuda-toolkit-11-config-common (11.8.89-1) ...\n", + "Selecting previously unselected package cuda-toolkit-11-8-config-common.\n", + "Preparing to unpack .../13-cuda-toolkit-11-8-config-common_11.8.89-1_all.deb ...\n", + "Unpacking cuda-toolkit-11-8-config-common (11.8.89-1) ...\n", + "Selecting previously unselected package cuda-cudart-11-8.\n", + "Preparing to unpack .../14-cuda-cudart-11-8_11.8.89-1_amd64.deb ...\n", + "Unpacking cuda-cudart-11-8 (11.8.89-1) ...\n", + "Selecting previously unselected package cuda-driver-dev-11-8.\n", + "Preparing to unpack .../15-cuda-driver-dev-11-8_11.8.89-1_amd64.deb ...\n", + "Unpacking cuda-driver-dev-11-8 (11.8.89-1) ...\n", + "Selecting previously unselected package cuda-cudart-dev-11-8.\n", + "Preparing to unpack .../16-cuda-cudart-dev-11-8_11.8.89-1_amd64.deb ...\n", + "Unpacking cuda-cudart-dev-11-8 (11.8.89-1) ...\n", + "Selecting previously unselected package cuda-nvcc-11-8.\n", + "Preparing to unpack .../17-cuda-nvcc-11-8_11.8.89-1_amd64.deb ...\n", + "Unpacking cuda-nvcc-11-8 (11.8.89-1) ...\n", + "Selecting previously unselected package cuda-nvprune-11-8.\n", + "Preparing to unpack .../18-cuda-nvprune-11-8_11.8.86-1_amd64.deb ...\n", + "Unpacking cuda-nvprune-11-8 (11.8.86-1) ...\n", + "Selecting previously unselected package cuda-compiler-11-8.\n", + "Preparing to unpack .../19-cuda-compiler-11-8_11.8.0-1_amd64.deb ...\n", + "Unpacking cuda-compiler-11-8 (11.8.0-1) ...\n", + "Selecting previously unselected package cuda-documentation-11-8.\n", + "Preparing to unpack .../20-cuda-documentation-11-8_11.8.86-1_amd64.deb ...\n", + "Unpacking cuda-documentation-11-8 (11.8.86-1) ...\n", + "Selecting previously unselected package cuda-nvrtc-11-8.\n", + "Preparing to unpack .../21-cuda-nvrtc-11-8_11.8.89-1_amd64.deb ...\n", + "Unpacking cuda-nvrtc-11-8 (11.8.89-1) ...\n", + "Selecting previously unselected package libcublas-11-8.\n", + "Preparing to unpack .../22-libcublas-11-8_11.11.3.6-1_amd64.deb ...\n", + "Unpacking libcublas-11-8 (11.11.3.6-1) ...\n", + "Selecting previously unselected package libcufft-11-8.\n", + "Preparing to unpack .../23-libcufft-11-8_10.9.0.58-1_amd64.deb ...\n", + "Unpacking libcufft-11-8 (10.9.0.58-1) ...\n", + "Selecting previously unselected package libcufile-11-8.\n", + "Preparing to unpack .../24-libcufile-11-8_1.4.0.31-1_amd64.deb ...\n", + "Unpacking libcufile-11-8 (1.4.0.31-1) ...\n", + "Selecting previously unselected package libcurand-11-8.\n", + "Preparing to unpack .../25-libcurand-11-8_10.3.0.86-1_amd64.deb ...\n", + "Unpacking libcurand-11-8 (10.3.0.86-1) ...\n", + "Selecting previously unselected package libcusolver-11-8.\n", + "Preparing to unpack .../26-libcusolver-11-8_11.4.1.48-1_amd64.deb ...\n", + "Unpacking libcusolver-11-8 (11.4.1.48-1) ...\n", + "Selecting previously unselected package libcusparse-11-8.\n", + "Preparing to unpack .../27-libcusparse-11-8_11.7.5.86-1_amd64.deb ...\n", + "Unpacking libcusparse-11-8 (11.7.5.86-1) ...\n", + "Selecting previously unselected package libnpp-11-8.\n", + "Preparing to unpack .../28-libnpp-11-8_11.8.0.86-1_amd64.deb ...\n", + "Unpacking libnpp-11-8 (11.8.0.86-1) ...\n", + "Selecting previously unselected package libnvjpeg-11-8.\n", + "Preparing to unpack .../29-libnvjpeg-11-8_11.9.0.86-1_amd64.deb ...\n", + "Unpacking libnvjpeg-11-8 (11.9.0.86-1) ...\n", + "Selecting previously unselected package cuda-libraries-11-8.\n", + "Preparing to unpack .../30-cuda-libraries-11-8_11.8.0-1_amd64.deb ...\n", + "Unpacking cuda-libraries-11-8 (11.8.0-1) ...\n", + "Selecting previously unselected package cuda-profiler-api-11-8.\n", + "Preparing to unpack .../31-cuda-profiler-api-11-8_11.8.86-1_amd64.deb ...\n", + "Unpacking cuda-profiler-api-11-8 (11.8.86-1) ...\n", + "Selecting previously unselected package cuda-nvrtc-dev-11-8.\n", + "Preparing to unpack .../32-cuda-nvrtc-dev-11-8_11.8.89-1_amd64.deb ...\n", + "Unpacking cuda-nvrtc-dev-11-8 (11.8.89-1) ...\n", + "Selecting previously unselected package libcublas-dev-11-8.\n", + "Preparing to unpack .../33-libcublas-dev-11-8_11.11.3.6-1_amd64.deb ...\n", + "Unpacking libcublas-dev-11-8 (11.11.3.6-1) ...\n", + "Selecting previously unselected package libcufft-dev-11-8.\n", + "Preparing to unpack .../34-libcufft-dev-11-8_10.9.0.58-1_amd64.deb ...\n", + "Unpacking libcufft-dev-11-8 (10.9.0.58-1) ...\n", + "Selecting previously unselected package libcufile-dev-11-8.\n", + "Preparing to unpack .../35-libcufile-dev-11-8_1.4.0.31-1_amd64.deb ...\n", + "Unpacking libcufile-dev-11-8 (1.4.0.31-1) ...\n", + "Selecting previously unselected package libcurand-dev-11-8.\n", + "Preparing to unpack .../36-libcurand-dev-11-8_10.3.0.86-1_amd64.deb ...\n", + "Unpacking libcurand-dev-11-8 (10.3.0.86-1) ...\n", + "Selecting previously unselected package libcusolver-dev-11-8.\n", + "Preparing to unpack .../37-libcusolver-dev-11-8_11.4.1.48-1_amd64.deb ...\n", + "Unpacking libcusolver-dev-11-8 (11.4.1.48-1) ...\n", + "Selecting previously unselected package libcusparse-dev-11-8.\n", + "Preparing to unpack .../38-libcusparse-dev-11-8_11.7.5.86-1_amd64.deb ...\n", + "Unpacking libcusparse-dev-11-8 (11.7.5.86-1) ...\n", + "Selecting previously unselected package libnpp-dev-11-8.\n", + "Preparing to unpack .../39-libnpp-dev-11-8_11.8.0.86-1_amd64.deb ...\n", + "Unpacking libnpp-dev-11-8 (11.8.0.86-1) ...\n", + "Selecting previously unselected package libnvjpeg-dev-11-8.\n", + "Preparing to unpack .../40-libnvjpeg-dev-11-8_11.9.0.86-1_amd64.deb ...\n", + "Unpacking libnvjpeg-dev-11-8 (11.9.0.86-1) ...\n", + "Selecting previously unselected package cuda-libraries-dev-11-8.\n", + "Preparing to unpack .../41-cuda-libraries-dev-11-8_11.8.0-1_amd64.deb ...\n", + "Unpacking cuda-libraries-dev-11-8 (11.8.0-1) ...\n", + "Selecting previously unselected package default-jre-headless.\n", + "Preparing to unpack .../42-default-jre-headless_2%3a1.11-72build2_amd64.deb ...\n", + "Unpacking default-jre-headless (2:1.11-72build2) ...\n", + "Selecting previously unselected package libxtst6:amd64.\n", + "Preparing to unpack .../43-libxtst6_2%3a1.2.3-1build4_amd64.deb ...\n", + "Unpacking libxtst6:amd64 (2:1.2.3-1build4) ...\n", + "Selecting previously unselected package openjdk-11-jre:amd64.\n", + "Preparing to unpack .../44-openjdk-11-jre_11.0.22+7-0ubuntu2~22.04.1_amd64.deb ...\n", + "Unpacking openjdk-11-jre:amd64 (11.0.22+7-0ubuntu2~22.04.1) ...\n", + "Selecting previously unselected package default-jre.\n", + "Preparing to unpack .../45-default-jre_2%3a1.11-72build2_amd64.deb ...\n", + "Unpacking default-jre (2:1.11-72build2) ...\n", + "Selecting previously unselected package cuda-nsight-11-8.\n", + "Preparing to unpack .../46-cuda-nsight-11-8_11.8.86-1_amd64.deb ...\n", + "Unpacking cuda-nsight-11-8 (11.8.86-1) ...\n", + "Selecting previously unselected package nsight-compute-2022.3.0.\n", + "Preparing to unpack .../47-nsight-compute-2022.3.0_2022.3.0.22-1_amd64.deb ...\n", + "Unpacking nsight-compute-2022.3.0 (2022.3.0.22-1) ...\n", + "Selecting previously unselected package cuda-nsight-compute-11-8.\n", + "Preparing to unpack .../48-cuda-nsight-compute-11-8_11.8.0-1_amd64.deb ...\n", + "Unpacking cuda-nsight-compute-11-8 (11.8.0-1) ...\n", + "Selecting previously unselected package libtinfo5:amd64.\n", + "Preparing to unpack .../49-libtinfo5_6.3-2ubuntu0.1_amd64.deb ...\n", + "Unpacking libtinfo5:amd64 (6.3-2ubuntu0.1) ...\n", + "Selecting previously unselected package libxcb-xinerama0:amd64.\n", + "Preparing to unpack .../50-libxcb-xinerama0_1.14-3ubuntu3_amd64.deb ...\n", + "Unpacking libxcb-xinerama0:amd64 (1.14-3ubuntu3) ...\n", + "Selecting previously unselected package libxcb-icccm4:amd64.\n", + "Preparing to unpack .../51-libxcb-icccm4_0.4.1-1.1build2_amd64.deb ...\n", + "Unpacking libxcb-icccm4:amd64 (0.4.1-1.1build2) ...\n", + "Selecting previously unselected package libxcb-util1:amd64.\n", + "Preparing to unpack .../52-libxcb-util1_0.4.0-1build2_amd64.deb ...\n", + "Unpacking libxcb-util1:amd64 (0.4.0-1build2) ...\n", + "Selecting previously unselected package libxcb-image0:amd64.\n", + "Preparing to unpack .../53-libxcb-image0_0.4.0-2_amd64.deb ...\n", + "Unpacking libxcb-image0:amd64 (0.4.0-2) ...\n", + "Selecting previously unselected package libxcb-keysyms1:amd64.\n", + "Preparing to unpack .../54-libxcb-keysyms1_0.4.0-1build3_amd64.deb ...\n", + "Unpacking libxcb-keysyms1:amd64 (0.4.0-1build3) ...\n", + "Selecting previously unselected package libxcb-render-util0:amd64.\n", + "Preparing to unpack .../55-libxcb-render-util0_0.3.9-1build3_amd64.deb ...\n", + "Unpacking libxcb-render-util0:amd64 (0.3.9-1build3) ...\n", + "Selecting previously unselected package libxcb-xkb1:amd64.\n", + "Preparing to unpack .../56-libxcb-xkb1_1.14-3ubuntu3_amd64.deb ...\n", + "Unpacking libxcb-xkb1:amd64 (1.14-3ubuntu3) ...\n", + "Selecting previously unselected package libxkbcommon-x11-0:amd64.\n", + "Preparing to unpack .../57-libxkbcommon-x11-0_1.4.0-1_amd64.deb ...\n", + "Unpacking libxkbcommon-x11-0:amd64 (1.4.0-1) ...\n", + "Selecting previously unselected package libxcb-xinput0:amd64.\n", + "Preparing to unpack .../58-libxcb-xinput0_1.14-3ubuntu3_amd64.deb ...\n", + "Unpacking libxcb-xinput0:amd64 (1.14-3ubuntu3) ...\n", + "Selecting previously unselected package nsight-systems-2022.4.2.\n", + "Preparing to unpack .../59-nsight-systems-2022.4.2_2022.4.2.50-32196742v0_amd64.deb ...\n", + "Unpacking nsight-systems-2022.4.2 (2022.4.2.50-32196742v0) ...\n", + "Selecting previously unselected package cuda-nsight-systems-11-8.\n", + "Preparing to unpack .../60-cuda-nsight-systems-11-8_11.8.0-1_amd64.deb ...\n", + "Unpacking cuda-nsight-systems-11-8 (11.8.0-1) ...\n", + "Selecting previously unselected package cuda-nvml-dev-11-8.\n", + "Preparing to unpack .../61-cuda-nvml-dev-11-8_11.8.86-1_amd64.deb ...\n", + "Unpacking cuda-nvml-dev-11-8 (11.8.86-1) ...\n", + "Selecting previously unselected package cuda-nvvp-11-8.\n", + "Preparing to unpack .../62-cuda-nvvp-11-8_11.8.87-1_amd64.deb ...\n", + "Unpacking cuda-nvvp-11-8 (11.8.87-1) ...\n", + "Selecting previously unselected package cuda-visual-tools-11-8.\n", + "Preparing to unpack .../63-cuda-visual-tools-11-8_11.8.0-1_amd64.deb ...\n", + "Unpacking cuda-visual-tools-11-8 (11.8.0-1) ...\n", + "Selecting previously unselected package gds-tools-11-8.\n", + "Preparing to unpack .../64-gds-tools-11-8_1.4.0.31-1_amd64.deb ...\n", + "Unpacking gds-tools-11-8 (1.4.0.31-1) ...\n", + "Selecting previously unselected package cuda-tools-11-8.\n", + "Preparing to unpack .../65-cuda-tools-11-8_11.8.0-1_amd64.deb ...\n", + "Unpacking cuda-tools-11-8 (11.8.0-1) ...\n", + "Selecting previously unselected package cuda-toolkit-11-8.\n", + "Preparing to unpack .../66-cuda-toolkit-11-8_11.8.0-1_amd64.deb ...\n", + "Unpacking cuda-toolkit-11-8 (11.8.0-1) ...\n", + "Setting up cuda-nvml-dev-11-8 (11.8.86-1) ...\n", + "Setting up default-jre-headless (2:1.11-72build2) ...\n", + "Setting up cuda-toolkit-11-config-common (11.8.89-1) ...\n", + "Setting up libxcb-xinput0:amd64 (1.14-3ubuntu3) ...\n", + "Setting up cuda-cccl-11-8 (11.8.89-1) ...\n", + "Setting up cuda-cuobjdump-11-8 (11.8.86-1) ...\n", + "Setting up cuda-nvrtc-11-8 (11.8.89-1) ...\n", + "Setting up cuda-sanitizer-11-8 (11.8.86-1) ...\n", + "Setting up libxtst6:amd64 (2:1.2.3-1build4) ...\n", + "Setting up cuda-cupti-11-8 (11.8.87-1) ...\n", + "Setting up libxcb-keysyms1:amd64 (0.4.0-1build3) ...\n", + "Setting up libxcb-render-util0:amd64 (0.3.9-1build3) ...\n", + "Setting up openjdk-11-jre:amd64 (11.0.22+7-0ubuntu2~22.04.1) ...\n", + "Setting up cuda-nvdisasm-11-8 (11.8.86-1) ...\n", + "Setting up libxcb-icccm4:amd64 (0.4.1-1.1build2) ...\n", + "Setting up default-jre (2:1.11-72build2) ...\n", + "Setting up cuda-cuxxfilt-11-8 (11.8.86-1) ...\n", + "Setting up libxcb-util1:amd64 (0.4.0-1build2) ...\n", + "Setting up libxcb-xkb1:amd64 (1.14-3ubuntu3) ...\n", + "Setting up libxcb-image0:amd64 (0.4.0-2) ...\n", + "Setting up libxcb-xinerama0:amd64 (1.14-3ubuntu3) ...\n", + "Setting up cuda-nvvp-11-8 (11.8.87-1) ...\n", + "Setting up cuda-nvtx-11-8 (11.8.86-1) ...\n", + "Setting up cuda-gdb-11-8 (11.8.86-1) ...\n", + "Setting up cuda-toolkit-11-8-config-common (11.8.89-1) ...\n", + "Setting alternatives\n", + "update-alternatives: using /usr/local/cuda-11.8 to provide /usr/local/cuda-11 (cuda-11) in auto mode\n", + "Setting up libxkbcommon-x11-0:amd64 (1.4.0-1) ...\n", + "Setting up libcusolver-11-8 (11.4.1.48-1) ...\n", + "Setting up cuda-nvrtc-dev-11-8 (11.8.89-1) ...\n", + "Setting up cuda-driver-dev-11-8 (11.8.89-1) ...\n", + "Setting up cuda-memcheck-11-8 (11.8.86-1) ...\n", + "Setting up gds-tools-11-8 (1.4.0.31-1) ...\n", + "Setting up cuda-nsight-11-8 (11.8.86-1) ...\n", + "Setting up cuda-profiler-api-11-8 (11.8.86-1) ...\n", + "Setting up cuda-documentation-11-8 (11.8.86-1) ...\n", + "Setting up libtinfo5:amd64 (6.3-2ubuntu0.1) ...\n", + "Setting up cuda-nvprune-11-8 (11.8.86-1) ...\n", + "Setting up cuda-cudart-11-8 (11.8.89-1) ...\n", + "Setting up libnvjpeg-11-8 (11.9.0.86-1) ...\n", + "Setting up cuda-nvprof-11-8 (11.8.87-1) ...\n", + "Setting up nsight-compute-2022.3.0 (2022.3.0.22-1) ...\n", + "Setting up nsight-systems-2022.4.2 (2022.4.2.50-32196742v0) ...\n", + "update-alternatives: using /opt/nvidia/nsight-systems/2022.4.2/target-linux-x64/nsys to provide /usr/local/bin/nsys (nsys) in auto mode\n", + "update-alternatives: using /opt/nvidia/nsight-systems/2022.4.2/host-linux-x64/nsys-ui to provide /usr/local/bin/nsys-ui (nsys-ui) in auto mode\n", + "Setting up libcusparse-11-8 (11.7.5.86-1) ...\n", + "Setting up libcufft-11-8 (10.9.0.58-1) ...\n", + "Setting up cuda-cupti-dev-11-8 (11.8.87-1) ...\n", + "Setting up libcufft-dev-11-8 (10.9.0.58-1) ...\n", + "Setting up cuda-cudart-dev-11-8 (11.8.89-1) ...\n", + "Setting up libnpp-11-8 (11.8.0.86-1) ...\n", + "Setting up libcusolver-dev-11-8 (11.4.1.48-1) ...\n", + "Setting up cuda-nsight-systems-11-8 (11.8.0-1) ...\n", + "Setting up cuda-command-line-tools-11-8 (11.8.0-1) ...\n", + "Setting up libcusparse-dev-11-8 (11.7.5.86-1) ...\n", + "Setting up libcurand-11-8 (10.3.0.86-1) ...\n", + "Setting up libcufile-11-8 (1.4.0.31-1) ...\n", + "Setting alternatives\n", + "Setting up libcublas-11-8 (11.11.3.6-1) ...\n", + "Setting up libnpp-dev-11-8 (11.8.0.86-1) ...\n", + "Setting up cuda-libraries-11-8 (11.8.0-1) ...\n", + "Setting up cuda-nsight-compute-11-8 (11.8.0-1) ...\n", + "Setting up libnvjpeg-dev-11-8 (11.9.0.86-1) ...\n", + "Setting up cuda-nvcc-11-8 (11.8.89-1) ...\n", + "Setting up libcublas-dev-11-8 (11.11.3.6-1) ...\n", + "Setting up libcurand-dev-11-8 (10.3.0.86-1) ...\n", + "Setting up libcufile-dev-11-8 (1.4.0.31-1) ...\n", + "Setting up cuda-compiler-11-8 (11.8.0-1) ...\n", + "Setting up cuda-libraries-dev-11-8 (11.8.0-1) ...\n", + "Setting up cuda-visual-tools-11-8 (11.8.0-1) ...\n", + "Setting up cuda-tools-11-8 (11.8.0-1) ...\n", + "Setting up cuda-toolkit-11-8 (11.8.0-1) ...\n", + "Setting alternatives\n", + "Processing triggers for libc-bin (2.35-0ubuntu3.4) ...\n", + "/sbin/ldconfig.real: /usr/local/lib/libtbb.so.12 is not a symbolic link\n", + "\n", + "/sbin/ldconfig.real: /usr/local/lib/libtbbbind_2_0.so.3 is not a symbolic link\n", + "\n", + "/sbin/ldconfig.real: /usr/local/lib/libtbbbind_2_5.so.3 is not a symbolic link\n", + "\n", + "/sbin/ldconfig.real: /usr/local/lib/libtbbmalloc.so.2 is not a symbolic link\n", + "\n", + "/sbin/ldconfig.real: /usr/local/lib/libtbbmalloc_proxy.so.2 is not a symbolic link\n", + "\n", + "/sbin/ldconfig.real: /usr/local/lib/libtbbbind.so.3 is not a symbolic link\n", + "\n", + "Processing triggers for hicolor-icon-theme (0.17-2) ...\n" + ] + } + ], + "source": [ + "!apt-get update\n", + "\n", + "!apt-get install -y --no-install-recommends cuda-toolkit-11-8" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mugVhGJXXQXm" + }, + "source": [ + "### Install Python Dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "09KfzJBFXQXn", + "outputId": "f9b0c238-1e1b-47d6-9ef2-bc709cb5365c", + "trusted": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu118\n", + "Collecting accelerate (from -r requirements.colab.txt (line 1))\n", + " Downloading accelerate-0.29.1-py3-none-any.whl (297 kB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m297.3/297.3 kB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting bitsandbytes (from -r requirements.colab.txt (line 2))\n", + " Downloading bitsandbytes-0.43.0-py3-none-manylinux_2_24_x86_64.whl (102.2 MB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m102.2/102.2 MB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting datasets (from -r requirements.colab.txt (line 3))\n", + " Downloading datasets-2.18.0-py3-none-any.whl (510 kB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m510.5/510.5 kB\u001b[0m \u001b[31m46.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting evaluate (from -r requirements.colab.txt (line 4))\n", + " Downloading evaluate-0.4.1-py3-none-any.whl (84 kB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m84.1/84.1 kB\u001b[0m \u001b[31m11.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from -r requirements.colab.txt (line 5)) (3.8.1)\n", + "Collecting peft (from -r requirements.colab.txt (line 6))\n", + " Downloading peft-0.10.0-py3-none-any.whl (199 kB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m199.1/199.1 kB\u001b[0m \u001b[31m24.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting rouge-score (from -r requirements.colab.txt (line 7))\n", + " Downloading rouge_score-0.1.2.tar.gz (17 kB)\n", + " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: safetensors in /usr/local/lib/python3.10/dist-packages (from -r requirements.colab.txt (line 8)) (0.4.2)\n", + "Collecting scikit-learn (from -r requirements.colab.txt (line 9))\n", + " Downloading scikit_learn-1.4.1.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.1 MB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m12.1/12.1 MB\u001b[0m \u001b[31m52.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from -r requirements.colab.txt (line 10)) (2.2.1+cu121)\n", + "Collecting torch (from -r requirements.colab.txt (line 10))\n", + " Downloading https://download.pytorch.org/whl/cu118/torch-2.2.2%2Bcu118-cp310-cp310-linux_x86_64.whl (819.2 MB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m819.2/819.2 MB\u001b[0m \u001b[31m1.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (from -r requirements.colab.txt (line 11)) (4.38.2)\n", + "Collecting trl (from -r requirements.colab.txt (line 12))\n", + " Downloading trl-0.7.11-py3-none-any.whl (155 kB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m155.3/155.3 kB\u001b[0m \u001b[31m19.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from accelerate->-r requirements.colab.txt (line 1)) (1.25.2)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from accelerate->-r requirements.colab.txt (line 1)) (24.0)\n", + "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate->-r requirements.colab.txt (line 1)) (5.9.5)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from accelerate->-r requirements.colab.txt (line 1)) (6.0.1)\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from accelerate->-r requirements.colab.txt (line 1)) (0.20.3)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from datasets->-r requirements.colab.txt (line 3)) (3.13.3)\n", + "Requirement already satisfied: pyarrow>=12.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets->-r requirements.colab.txt (line 3)) (14.0.2)\n", + "Requirement already satisfied: pyarrow-hotfix in /usr/local/lib/python3.10/dist-packages (from datasets->-r requirements.colab.txt (line 3)) (0.6)\n", + "Collecting dill<0.3.9,>=0.3.0 (from datasets->-r requirements.colab.txt (line 3))\n", + " Downloading dill-0.3.8-py3-none-any.whl (116 kB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m15.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets->-r requirements.colab.txt (line 3)) (2.0.3)\n", + "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from datasets->-r requirements.colab.txt (line 3)) (2.31.0)\n", + "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from datasets->-r requirements.colab.txt (line 3)) (4.66.2)\n", + "Collecting xxhash (from datasets->-r requirements.colab.txt (line 3))\n", + " Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m22.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting multiprocess (from datasets->-r requirements.colab.txt (line 3))\n", + " Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m17.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: fsspec[http]<=2024.2.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from datasets->-r requirements.colab.txt (line 3)) (2023.6.0)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets->-r requirements.colab.txt (line 3)) (3.9.3)\n", + "Collecting responses<0.19 (from evaluate->-r requirements.colab.txt (line 4))\n", + " Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk->-r requirements.colab.txt (line 5)) (8.1.7)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk->-r requirements.colab.txt (line 5)) (1.3.2)\n", + "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk->-r requirements.colab.txt (line 5)) (2023.12.25)\n", + "Requirement already satisfied: absl-py in /usr/local/lib/python3.10/dist-packages (from rouge-score->-r requirements.colab.txt (line 7)) (1.4.0)\n", + "Requirement already satisfied: six>=1.14.0 in /usr/local/lib/python3.10/dist-packages (from rouge-score->-r requirements.colab.txt (line 7)) (1.16.0)\n", + "Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->-r requirements.colab.txt (line 9)) (1.11.4)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->-r requirements.colab.txt (line 9)) (3.4.0)\n", + "Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch->-r requirements.colab.txt (line 10)) (4.10.0)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->-r requirements.colab.txt (line 10)) (1.12)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->-r requirements.colab.txt (line 10)) (3.2.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->-r requirements.colab.txt (line 10)) (3.1.3)\n", + "Collecting nvidia-cuda-nvrtc-cu11==11.8.89 (from torch->-r requirements.colab.txt (line 10))\n", + " Downloading https://download.pytorch.org/whl/cu118/nvidia_cuda_nvrtc_cu11-11.8.89-py3-none-manylinux1_x86_64.whl (23.2 MB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m23.2/23.2 MB\u001b[0m \u001b[31m66.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nvidia-cuda-runtime-cu11==11.8.89 (from torch->-r requirements.colab.txt (line 10))\n", + " Downloading https://download.pytorch.org/whl/cu118/nvidia_cuda_runtime_cu11-11.8.89-py3-none-manylinux1_x86_64.whl (875 kB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m875.6/875.6 kB\u001b[0m \u001b[31m57.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nvidia-cuda-cupti-cu11==11.8.87 (from torch->-r requirements.colab.txt (line 10))\n", + " Downloading https://download.pytorch.org/whl/cu118/nvidia_cuda_cupti_cu11-11.8.87-py3-none-manylinux1_x86_64.whl (13.1 MB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m13.1/13.1 MB\u001b[0m \u001b[31m90.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nvidia-cudnn-cu11==8.7.0.84 (from torch->-r requirements.colab.txt (line 10))\n", + " Downloading https://download.pytorch.org/whl/cu118/nvidia_cudnn_cu11-8.7.0.84-py3-none-manylinux1_x86_64.whl (728.5 MB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m728.5/728.5 MB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nvidia-cublas-cu11==11.11.3.6 (from torch->-r requirements.colab.txt (line 10))\n", + " Downloading https://download.pytorch.org/whl/cu118/nvidia_cublas_cu11-11.11.3.6-py3-none-manylinux1_x86_64.whl (417.9 MB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m417.9/417.9 MB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nvidia-cufft-cu11==10.9.0.58 (from torch->-r requirements.colab.txt (line 10))\n", + " Downloading https://download.pytorch.org/whl/cu118/nvidia_cufft_cu11-10.9.0.58-py3-none-manylinux1_x86_64.whl (168.4 MB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m168.4/168.4 MB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nvidia-curand-cu11==10.3.0.86 (from torch->-r requirements.colab.txt (line 10))\n", + " Downloading https://download.pytorch.org/whl/cu118/nvidia_curand_cu11-10.3.0.86-py3-none-manylinux1_x86_64.whl (58.1 MB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m58.1/58.1 MB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nvidia-cusolver-cu11==11.4.1.48 (from torch->-r requirements.colab.txt (line 10))\n", + " Downloading https://download.pytorch.org/whl/cu118/nvidia_cusolver_cu11-11.4.1.48-py3-none-manylinux1_x86_64.whl (128.2 MB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m128.2/128.2 MB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nvidia-cusparse-cu11==11.7.5.86 (from torch->-r requirements.colab.txt (line 10))\n", + " Downloading https://download.pytorch.org/whl/cu118/nvidia_cusparse_cu11-11.7.5.86-py3-none-manylinux1_x86_64.whl (204.1 MB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m204.1/204.1 MB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nvidia-nccl-cu11==2.19.3 (from torch->-r requirements.colab.txt (line 10))\n", + " Downloading https://download.pytorch.org/whl/cu118/nvidia_nccl_cu11-2.19.3-py3-none-manylinux1_x86_64.whl (135.3 MB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m135.3/135.3 MB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting nvidia-nvtx-cu11==11.8.86 (from torch->-r requirements.colab.txt (line 10))\n", + " Downloading https://download.pytorch.org/whl/cu118/nvidia_nvtx_cu11-11.8.86-py3-none-manylinux1_x86_64.whl (99 kB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m99.1/99.1 kB\u001b[0m \u001b[31m14.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: triton==2.2.0 in /usr/local/lib/python3.10/dist-packages (from torch->-r requirements.colab.txt (line 10)) (2.2.0)\n", + "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers->-r requirements.colab.txt (line 11)) (0.15.2)\n", + "Collecting tyro>=0.5.11 (from trl->-r requirements.colab.txt (line 12))\n", + " Downloading tyro-0.8.2-py3-none-any.whl (80 kB)\n", + "\u001b[2K \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m80.1/80.1 kB\u001b[0m \u001b[31m11.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->-r requirements.colab.txt (line 3)) (1.3.1)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->-r requirements.colab.txt (line 3)) (23.2.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->-r requirements.colab.txt (line 3)) (1.4.1)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->-r requirements.colab.txt (line 3)) (6.0.5)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->-r requirements.colab.txt (line 3)) (1.9.4)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets->-r requirements.colab.txt (line 3)) (4.0.3)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets->-r requirements.colab.txt (line 3)) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets->-r requirements.colab.txt (line 3)) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets->-r requirements.colab.txt (line 3)) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets->-r requirements.colab.txt (line 3)) (2024.2.2)\n", + "Requirement already satisfied: docstring-parser>=0.14.1 in /usr/local/lib/python3.10/dist-packages (from tyro>=0.5.11->trl->-r requirements.colab.txt (line 12)) (0.16)\n", + "Requirement already satisfied: rich>=11.1.0 in /usr/local/lib/python3.10/dist-packages (from tyro>=0.5.11->trl->-r requirements.colab.txt (line 12)) (13.7.1)\n", + "Collecting shtab>=1.5.6 (from tyro>=0.5.11->trl->-r requirements.colab.txt (line 12))\n", + " Downloading shtab-1.7.1-py3-none-any.whl (14 kB)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->-r requirements.colab.txt (line 10)) (2.1.5)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets->-r requirements.colab.txt (line 3)) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets->-r requirements.colab.txt (line 3)) (2023.4)\n", + "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets->-r requirements.colab.txt (line 3)) (2024.1)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->-r requirements.colab.txt (line 10)) (1.3.0)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=11.1.0->tyro>=0.5.11->trl->-r requirements.colab.txt (line 12)) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=11.1.0->tyro>=0.5.11->trl->-r requirements.colab.txt (line 12)) (2.16.1)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=11.1.0->tyro>=0.5.11->trl->-r requirements.colab.txt (line 12)) (0.1.2)\n", + "Building wheels for collected packages: rouge-score\n", + " Building wheel for rouge-score (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24933 sha256=66753c5667dafe9eb1ab717835a90b806ba9c6d2f8fbebd6d697d5ed6dee98ca\n", + " Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4\n", + "Successfully built rouge-score\n", + "Installing collected packages: xxhash, shtab, nvidia-nvtx-cu11, nvidia-nccl-cu11, nvidia-cusparse-cu11, nvidia-curand-cu11, nvidia-cufft-cu11, nvidia-cuda-runtime-cu11, nvidia-cuda-nvrtc-cu11, nvidia-cuda-cupti-cu11, nvidia-cublas-cu11, dill, scikit-learn, rouge-score, responses, nvidia-cusolver-cu11, nvidia-cudnn-cu11, multiprocess, tyro, torch, datasets, bitsandbytes, accelerate, trl, peft, evaluate\n", + " Attempting uninstall: scikit-learn\n", + " Found existing installation: scikit-learn 1.2.2\n", + " Uninstalling scikit-learn-1.2.2:\n", + " Successfully uninstalled scikit-learn-1.2.2\n", + " Attempting uninstall: torch\n", + " Found existing installation: torch 2.2.1+cu121\n", + " Uninstalling torch-2.2.1+cu121:\n", + " Successfully uninstalled torch-2.2.1+cu121\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "torchaudio 2.2.1+cu121 requires torch==2.2.1, but you have torch 2.2.2+cu118 which is incompatible.\n", + "torchtext 0.17.1 requires torch==2.2.1, but you have torch 2.2.2+cu118 which is incompatible.\n", + "torchvision 0.17.1+cu121 requires torch==2.2.1, but you have torch 2.2.2+cu118 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed accelerate-0.29.1 bitsandbytes-0.43.0 datasets-2.18.0 dill-0.3.8 evaluate-0.4.1 multiprocess-0.70.16 nvidia-cublas-cu11-11.11.3.6 nvidia-cuda-cupti-cu11-11.8.87 nvidia-cuda-nvrtc-cu11-11.8.89 nvidia-cuda-runtime-cu11-11.8.89 nvidia-cudnn-cu11-8.7.0.84 nvidia-cufft-cu11-10.9.0.58 nvidia-curand-cu11-10.3.0.86 nvidia-cusolver-cu11-11.4.1.48 nvidia-cusparse-cu11-11.7.5.86 nvidia-nccl-cu11-2.19.3 nvidia-nvtx-cu11-11.8.86 peft-0.10.0 responses-0.18.0 rouge-score-0.1.2 scikit-learn-1.4.1.post1 shtab-1.7.1 torch-2.2.2+cu118 trl-0.7.11 tyro-0.8.2 xxhash-3.4.1\n" + ] + } + ], + "source": [ + "!python3 -m pip install \\\n", + " --upgrade \\\n", + " --requirement \"requirements.colab.txt\" \\\n", + " --constraint \"constraints.colab.txt\" \\\n", + " --extra-index-url \"https://download.pytorch.org/whl/cu118\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "execution": { + "iopub.execute_input": "2024-03-02T19:28:53.340375Z", + "iopub.status.busy": "2024-03-02T19:28:53.339923Z", + "iopub.status.idle": "2024-03-02T19:28:53.345431Z", + "shell.execute_reply": "2024-03-02T19:28:53.344654Z", + "shell.execute_reply.started": "2024-03-02T19:28:53.340339Z" + }, + "id": "Qhhc05VzXQXo" + }, + "source": [ + "### CUDA Configuration" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "0J8QrYw_XQXp", + "trusted": true + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"LD_LIBRARY_PATH\"] += \":/usr/local/cuda-11/lib64\"\n", + "os.environ[\"LD_LIBRARY_PATH\"] += \":/usr/local/cuda-11.8/lib64\"\n", + "os.environ[\"LD_LIBRARY_PATH\"] += \":/usr/lib64-nvidia\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iWXqj1yVXQXq" + }, + "source": [ + "### Working Directory" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "JESllH64XQXq", + "trusted": true + }, + "outputs": [], + "source": [ + "!mkdir step_1\n", + "\n", + "!mkdir step_1/input_directory\n", + "!mkdir step_1/working_directory\n", + "!mkdir step_1/output_directory\n", + "\n", + "!mkdir step_2\n", + "\n", + "!mkdir step_2/input_directory\n", + "!mkdir step_2/working_directory\n", + "!mkdir step_2/output_directory\n", + "\n", + "!mkdir step_3\n", + "\n", + "!mkdir step_3/input_directory\n", + "!mkdir step_3/working_directory\n", + "!mkdir step_3/output_directory\n", + "\n", + "!mkdir step_4\n", + "\n", + "!mkdir step_4/input_directory\n", + "!mkdir step_4/working_directory\n", + "!mkdir step_4/output_directory" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1zOkgtKfXQXr" + }, + "source": [ + "### Raw Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "e5xUOlOdXQXs", + "trusted": true + }, + "outputs": [], + "source": [ + "!cp json_documents.json step_1/input_directory" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l7XpJsdUXQXs" + }, + "source": [ + "# Step 1" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "1FcGQPDBXQXt", + "trusted": true + }, + "outputs": [], + "source": [ + "import json\n", + "import pathlib\n", + "import shutil\n", + "\n", + "from datasets import Dataset, DatasetDict" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "h9tCYVorXQXu", + "trusted": true + }, + "outputs": [], + "source": [ + "step_identifier = pathlib.Path(\"step_1\")\n", + "\n", + "input_directory = pathlib.Path(step_identifier, \"input_directory\")\n", + "working_directory = pathlib.Path(step_identifier, \"working_directory\")\n", + "output_directory = pathlib.Path(step_identifier, \"output_directory\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "qH4TNIkuXQXu", + "trusted": true + }, + "outputs": [], + "source": [ + "raw_dataset_path = pathlib.Path(input_directory, \"json_documents.json\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "fbMcjcGBXQXu", + "trusted": true + }, + "outputs": [], + "source": [ + "hugging_face_dataset_path = pathlib.Path(working_directory, \"hugging_face_dataset_directory\")\n", + "hugging_face_dataset_archive = pathlib.Path(output_directory, \"hugging_face_dataset_archive.zip\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "4IBoZajeXQXv", + "trusted": true + }, + "outputs": [], + "source": [ + "with raw_dataset_path.open(encoding=\"utf-8\") as file_object:\n", + " raw_dataset = json.load(file_object)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "id": "cWBs6ecIXQXv", + "trusted": true + }, + "outputs": [], + "source": [ + "dataset_splits = {split_type: [] for split_type in [\"train\", \"validation\", \"test\"]}\n", + "\n", + "for document in raw_dataset[\"tuning_documents\"]:\n", + " dataset_splits[document[\"split\"]].append(document)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "id": "Qg4rALP5XQXw", + "trusted": true + }, + "outputs": [], + "source": [ + "hugging_face_dataset = DatasetDict(\n", + " {\n", + " split_type: Dataset.from_list(split_data)\n", + " for split_type, split_data in dataset_splits.items()\n", + " }\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 113, + "referenced_widgets": [ + "b7c9053262c346beaa4a74ceb168a2bd", + "ad686be5b46b43929180427f1e453c43", + "9074ca1bec1e493fa6fac34616748967", + "881a955bd4f645d594c171840d66357e", + "be2e551f1ae6408186d73c8d6c8bf18f", + "17dcf9c82bbf4d0b8b6ee4051fa232f5", + "129cad9a109a44009c5613a0b32542a4", + "488119b3ddad4772aa367eb100c11a7f", + "ce6fefdd423b4cceb3501a1713a1bd41", + "c5bbf2f491314e6eb754003bb632e96e", + "eb9ac8d3463142a0b3861825643d305b", + "f8befaa4f4ff4c588f62378140fab443", + "d2da4136ebe945b0a9f82949f2ce9d4a", + "7dc4c4206c764db5ad9951d341fbbed1", + "549d75b7456744ba9bcbf622a1973e8a", + "935b0330e479477a96747532dcc134e1", + "31f0ece671fe4c598cabcbc7332a6559", + "78b34924973842e0859b2d5a92d50eae", + "5b8e42d784e74e4ebd1e467476525ca5", + "b3f9de876a2146799481c4d4fe7c8cf3", + "da22fce4515a44f6908debfe571d60a5", + "595f2051e65d45d5a9113654507d9ea0", + "c6103230fe384c8e830b68dd938334ab", + "f5e1eeb1c26e41b5b0aaf258726868d3", + "031031fe1ed44efabd7edfc5b4f24f83", + "af89da4aa17c4c26a420960015b61efe", + "2d6defc70a594af49333055f176ee539", + "44086aa7573d44d2b3af7f238e845f38", + "68222ff0df0a4f2a8c09f7f8ffe679b6", + "3dec45537fbd44c9a7cc62d429260c3d", + "0edad2f47ce744da920575e28074adca", + "21a64701aa0d439d8833439d30a89d78", + "a1030ae14d1247edaa1486a8f12ae337" + ] + }, + "id": "W2V52NuGXQXw", + "outputId": "890fb594-8b62-402f-fb10-a4bc46d11a2a", + "trusted": true + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b7c9053262c346beaa4a74ceb168a2bd", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Saving the dataset (0/1 shards): 0%| | 0/8234 [00:00 Tensor:\n", + " del labels\n", + "\n", + " if isinstance(logits, tuple):\n", + " logits = logits[0]\n", + "\n", + " return logits.argmax(dim=-1)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "id": "1dQOZBXkXQYH", + "trusted": true + }, + "outputs": [], + "source": [ + "def calculate_multi_class_classification_metrics(\n", + " y_true: numpy.ndarray, y_pred: numpy.ndarray\n", + ") -> dict[str, float]:\n", + " accuracy = accuracy_score(y_true, y_pred, normalize=False)\n", + "\n", + " precision = precision_score(y_true, y_pred, average=\"micro\", zero_division=1)\n", + " recall = recall_score(y_true, y_pred, average=\"micro\", zero_division=1)\n", + "\n", + " f1_balanced = f1_score(y_true, y_pred, average=\"micro\", zero_division=1)\n", + " f1_precision = fbeta_score(y_true, y_pred, beta=0.5, average=\"micro\", zero_division=1)\n", + " f1_recall = fbeta_score(y_true, y_pred, beta=2, average=\"micro\", zero_division=1)\n", + "\n", + " return {\n", + " \"accuracy\": accuracy,\n", + " \"precision\": precision,\n", + " \"recall\": recall,\n", + " \"f1_balanced\": f1_balanced,\n", + " \"f1_precision\": f1_precision,\n", + " \"f1_recall\": f1_recall,\n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "id": "3PTO-v8dXQYH", + "trusted": true + }, + "outputs": [], + "source": [ + "def track_validation_metrics(validation_outputs: EvalPrediction) -> dict[str, float]:\n", + " predictions = validation_outputs.predictions\n", + " labels = validation_outputs.label_ids\n", + "\n", + " if isinstance(predictions, tuple):\n", + " predictions = predictions[0]\n", + "\n", + " predictions = numpy.where(predictions != mask_token_index, predictions, tokeniser.pad_token_id)\n", + " labels = numpy.where(labels != mask_token_index, labels, tokeniser.pad_token_id)\n", + "\n", + " decoded_predictions = tokeniser.batch_decode(predictions, skip_special_tokens=True)\n", + " decoded_labels = tokeniser.batch_decode(labels, skip_special_tokens=True)\n", + "\n", + " bleu_score = bleu_metric.compute(predictions=decoded_predictions, references=decoded_labels)\n", + " google_bleu_score = google_bleu_metric.compute(\n", + " predictions=decoded_predictions, references=decoded_labels\n", + " )\n", + " rouge_score = rouge_metric.compute(predictions=decoded_predictions, references=decoded_labels)\n", + "\n", + " classification_scores = calculate_multi_class_classification_metrics(\n", + " labels.flatten(), predictions.flatten()\n", + " )\n", + "\n", + " return {**bleu_score, **google_bleu_score, **rouge_score, **classification_scores}" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "id": "djAESXXQXQYH", + "trusted": true + }, + "outputs": [], + "source": [ + "early_stopping_callback = EarlyStoppingCallback(\n", + " early_stopping_patience=10, early_stopping_threshold=0.000001\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 153, + "referenced_widgets": [ + "f4f6a459b78b4548befbe8f1d177ed8a", + "ec63434e1083407ebf3857ce1930b90f", + "dc2d530a7a8d4620a05e0fdd13fb6872", + "2b2217d1a2ab4563a2a6d83212651cc0", + "c539698bf85840418b1dd95baa327a79", + "849e3031e27249ee9be4a8d3cecd90d2", + "dc1ac2672b69478a8ddaed8767cce498", + "1dc1689ca038477fa0609735a10d8b86", + "5fdab2ce6e1d4f44aa9910fee06302b0", + "32ffcba4e05446829c0340f5d7503678", + "60759a513e9d4a349b71e25d6bf4d694", + "2987f8662e594291994ddbfefd4cff53", + "aa4afde379e14921861730ab665906c6", + "e72e7e8ec3c44667a7b8b994894dbb52", + "c5af5d5f00134021bfef5a3f8c74dad3", + "bcce971da32c4130a8ee31f928c1aea4", + "973f01e20c864c2a898ad69034f814cd", + "8df57e75a59846bb9b02616602c9f735", + "94d8dcf389364a97aa270ce6b4d00246", + "f07e92cd93054ca4bda03ebb960f6352", + "59e35119a590431c9cc489e4f40092cd", + "3ef39ebca2cc417b811b8fd078454fc1" + ] + }, + "id": "ZMl-YS9CXQYI", + "outputId": "6a0ac3d2-712c-4d27-da40-b67b2ea7e1fe", + "trusted": true + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "f4f6a459b78b4548befbe8f1d177ed8a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Map: 0%| | 0/8234 [00:00\n", + " \n", + " \n", + " [630/900 04:45 < 02:02, 2.20 it/s, Epoch 70/100]\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StepTraining LossValidation LossBleuPrecisionsBrevity PenaltyLength RatioTranslation LengthReference LengthGoogle BleuRouge1Rouge2RougelRougelsumAccuracyPrecisionRecallF1 BalancedF1 PrecisionF1 Recall
450.7841000.7283720.165267[0.20063291139240505, 0.1747448979591837, 0.15424164524421594, 0.13795336787564766]1.0000004.47592115803530.1670930.2447820.1836960.2282520.2346851532.0000000.3743890.3743890.3743890.3743890.374389
900.6530000.6097890.169963[0.20063091482649842, 0.17800381436745072, 0.1601537475976938, 0.1459005810200129]1.0000004.49008515853530.1713470.2447200.1887150.2323380.2387771532.0000000.3743890.3743890.3743890.3743890.374389
1350.5396000.5344950.175646[0.20819778789850357, 0.18295081967213114, 0.16523463317911435, 0.15123251165889406]1.0000004.35410815373530.1770900.2491500.1941830.2366060.2447031532.0000000.3743890.3743890.3743890.3743890.374389
1800.4534000.4763110.180623[0.21202327084680025, 0.18827361563517916, 0.17071569271175313, 0.15618795499669094]1.0000004.38243615473530.1819820.2521670.2038270.2421900.2522891532.0000000.3743890.3743890.3743890.3743890.374389
2250.4263000.4366150.180423[0.20933165195460277, 0.18742058449809404, 0.17157490396927016, 0.15741935483870967]1.0000004.49291815863530.1816010.2545980.2081850.2457250.2558561532.0000000.3743890.3743890.3743890.3743890.374389
2700.3676000.4079050.179700[0.2078332280480101, 0.18650541056651815, 0.17126363053239255, 0.1570782159017453]1.0000004.48441915833530.1808310.2559870.2071090.2482110.2543461532.0000000.3743890.3743890.3743890.3743890.374389
3150.3333000.3876230.179436[0.20559006211180125, 0.1858573216520651, 0.17150063051702397, 0.15819567979669633]1.0000004.56090716103530.1804330.2628740.2124210.2566740.2593671532.0000000.3743890.3743890.3743890.3743890.374389
3600.3117000.3768510.178866[0.2053349875930521, 0.185625, 0.17065491183879095, 0.15736040609137056]1.0000004.56657216123530.1798930.2646280.2135940.2565740.2608781532.0000000.3743890.3743890.3743890.3743890.374389
4050.2795000.3666340.179960[0.2054794520547945, 0.18569636135508155, 0.17193426042983564, 0.15987261146496815]1.0000004.54957516063530.1808880.2609820.2105090.2527670.2571941532.0000000.3743890.3743890.3743890.3743890.374389
4500.2627000.3575240.180187[0.2057356608478803, 0.18592964824120603, 0.17215189873417722, 0.1600765306122449]1.0000004.54390916043530.1811160.2614050.2107550.2531870.2576091532.0000000.3743890.3743890.3743890.3743890.374389
4950.2413000.3518200.180457[0.2054794520547945, 0.1863237139272271, 0.17256637168141592, 0.16050955414012738]1.0000004.54957516063530.1813600.2619070.2129750.2535860.2562061532.0000000.3743890.3743890.3743890.3743890.374389
5400.2241000.3514730.177330[0.20434782608695654, 0.18397997496871088, 0.16897856242118536, 0.1556543837357052]1.0000004.56090716103530.1783920.2564610.2080520.2482040.2509961533.0000000.3746330.3746330.3746330.3746330.374633
5850.2206000.3516920.177330[0.20434782608695654, 0.18397997496871088, 0.16897856242118536, 0.1556543837357052]1.0000004.56090716103530.1783920.2564610.2080520.2482040.2509961533.0000000.3746330.3746330.3746330.3746330.374633
6300.2185000.3524170.176665[0.20358910891089108, 0.18329177057356608, 0.16834170854271358, 0.1550632911392405]1.0000004.57790416163530.1777220.2583720.2093890.2500810.2531121533.0000000.3746330.3746330.3746330.3746330.374633

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "TrainOutput(global_step=630, training_loss=0.3796993512955923, metrics={'train_runtime': 286.8433, 'train_samples_per_second': 12.55, 'train_steps_per_second': 3.138, 'total_flos': 1423899380219904.0, 'train_loss': 0.3796993512955923, 'epoch': 70.0})" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "supervised_trainer.train()" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "id": "_OsS49mKXQYJ", + "trusted": true + }, + "outputs": [], + "source": [ + "supervised_trainer.model.save_pretrained(tuned_adapter_path, safe_serialization=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "id": "qRh6uS9vXQYK", + "trusted": true + }, + "outputs": [], + "source": [ + "_ = shutil.make_archive(\n", + " str(pathlib.Path(tuned_adapter_archive.parent, tuned_adapter_archive.stem)),\n", + " tuned_adapter_archive.suffix[1:],\n", + " root_dir=working_directory,\n", + " base_dir=tuned_adapter_path.stem,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": { + "id": "VbUb8MQHXQYK", + "trusted": true + }, + "outputs": [], + "source": [ + "_ = shutil.make_archive(\n", + " str(pathlib.Path(tuning_checkpoints_archive.parent, tuning_checkpoints_archive.stem)),\n", + " tuning_checkpoints_archive.suffix[1:],\n", + " root_dir=working_directory,\n", + " base_dir=tuning_checkpoints_path.stem,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "id": "M_TmNq3XXQYK", + "trusted": true + }, + "outputs": [], + "source": [ + "del supervised_trainer\n", + "del tokeniser\n", + "del model" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "id": "Dymbzq53XQYK", + "trusted": true + }, + "outputs": [], + "source": [ + "_ = gc.collect()\n", + "cuda.empty_cache()" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "id": "797zZL3sXQYL", + "trusted": true + }, + "outputs": [], + "source": [ + "time.sleep(30)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": { + "id": "f-fcM2V2XQYL", + "trusted": true + }, + "outputs": [], + "source": [ + "_ = gc.collect()\n", + "cuda.empty_cache()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TTMAYe_fXQYM" + }, + "source": [ + "## Artefacts" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": { + "id": "iFLiTdslXQYM", + "trusted": true + }, + "outputs": [], + "source": [ + "!cp step_2/output_directory/* step_4/input_directory" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GgsiPOQmXQYN" + }, + "source": [ + "# Step 3" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": { + "id": "ptx_LihcXQYN", + "trusted": true + }, + "outputs": [], + "source": [ + "import gc\n", + "import pathlib\n", + "import time\n", + "\n", + "from torch import cuda, float16\n", + "from transformers import (\n", + " AutoModelForCausalLM,\n", + " AutoTokenizer,\n", + " BitsAndBytesConfig,\n", + " pipeline,\n", + " set_seed,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": { + "id": "UnrDQdS_XQYO", + "trusted": true + }, + "outputs": [], + "source": [ + "set_seed(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": { + "id": "9WrjBzNLXQYP", + "trusted": true + }, + "outputs": [], + "source": [ + "step_identifier = pathlib.Path(\"step_3\")\n", + "\n", + "input_directory = pathlib.Path(step_identifier, \"input_directory\")\n", + "working_directory = pathlib.Path(step_identifier, \"working_directory\")\n", + "output_directory = pathlib.Path(step_identifier, \"output_directory\")" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": { + "id": "MwCFz1qpXQYP", + "trusted": true + }, + "outputs": [], + "source": [ + "base_model_identifier = \"facebook/opt-350m\"\n", + "\n", + "quantisation_configuration = BitsAndBytesConfig(\n", + " load_in_4bit=True,\n", + " bnb_4bit_compute_dtype=float16,\n", + " bnb_4bit_quant_type=\"nf4\",\n", + " bnb_4bit_use_double_quant=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": { + "id": "i11QhVX5XQYQ", + "trusted": true + }, + "outputs": [], + "source": [ + "tokeniser = AutoTokenizer.from_pretrained(base_model_identifier)\n", + "\n", + "tokeniser.pad_token = tokeniser.eos_token\n", + "tokeniser.padding_side = \"right\"" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": { + "id": "W9AXG4uqXQYQ", + "trusted": true + }, + "outputs": [], + "source": [ + "untuned_model = AutoModelForCausalLM.from_pretrained(\n", + " base_model_identifier,\n", + " quantization_config=quantisation_configuration,\n", + " device_map=\"auto\",\n", + " low_cpu_mem_usage=True,\n", + ")\n", + "\n", + "_ = untuned_model.eval()" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": { + "id": "5NpuNOmzXQYQ", + "trusted": true + }, + "outputs": [], + "source": [ + "untuned_pipeline = pipeline(\n", + " \"text-generation\",\n", + " model=untuned_model,\n", + " tokenizer=tokeniser,\n", + " device_map=\"auto\",\n", + " torch_dtype=float16,\n", + " model_kwargs={\"low_cpu_mem_usage\": True},\n", + " max_new_tokens=256,\n", + " do_sample=True,\n", + " top_k=1,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": { + "id": "t28J3RynXQYR", + "trusted": true + }, + "outputs": [], + "source": [ + "instruction_template = \"\"\"You are an assistant for question answering tasks.\n", + "\n", + "### Instructions\n", + "\n", + "1. Use only the following retrieved context to answer the given question.\n", + "2. If the answer is not in the context, say \"I do not know.\".\n", + "3. Keep your answer as concise as possible.\"\"\"\n", + "\n", + "context_template = \"\"\"### Context:\"\"\"\n", + "question_template = \"### Question:\"\n", + "answer_template = \"### Answer:\"" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": { + "id": "FXbt49krXQYR", + "trusted": true + }, + "outputs": [], + "source": [ + "input_question = \"Name the root package.\"\n", + "retrieved_context = \"'package_name_to_import_with' is the root package.\"\n", + "\n", + "model_input = \"\\n\\n\".join(\n", + " [\n", + " instruction_template,\n", + " f\"{context_template} {retrieved_context}\",\n", + " f\"{question_template} {input_question}\",\n", + " f\"{answer_template} \",\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "W9dim0yaXQYR", + "outputId": "f7663572-3b4e-428d-a2e2-37e2e3a89151", + "trusted": true + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + }, + "text/plain": [ + "'You are an assistant for question answering tasks.\\n\\n### Instructions\\n\\n1. Use only the following retrieved context to answer the given question.\\n2. If the answer is not in the context, say \"I do not know.\".\\n3. Keep your answer as concise as possible.\\n\\n### Context: \\'package_name_to_import_with\\' is the root package.\\n\\n### Question: Name the root package.\\n\\n### Answer: '" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_input" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": { + "id": "hrzonoDcXQYR", + "trusted": true + }, + "outputs": [], + "source": [ + "untuned_model_output = untuned_pipeline(input_question, return_full_text=False)\n", + "\n", + "untuned_output_answer = untuned_model_output[0][\"generated_text\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 139 + }, + "id": "fNs1X640XQYS", + "outputId": "0818bf3e-9e71-4d6e-fba6-33c444c1138a", + "trusted": true + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + }, + "text/plain": [ + "'\\n\\nThe root package is the package that is installed on the system.\\n\\nThe root package is the package that is installed on the system.\\n\\nThe root package is the package that is installed on the system.\\n\\nThe root package is the package that is installed on the system.\\n\\nThe root package is the package that is installed on the system.\\n\\nThe root package is the package that is installed on the system.\\n\\nThe root package is the package that is installed on the system.\\n\\nThe root package is the package that is installed on the system.\\n\\nThe root package is the package that is installed on the system.\\n\\nThe root package is the package that is installed on the system.\\n\\nThe root package is the package that is installed on the system.\\n\\nThe root package is the package that is installed on the system.\\n\\nThe root package is the package that is installed on the system.\\n\\nThe root package is the package that is installed on the system.\\n\\nThe root package is the package that is installed on the system.\\n\\nThe root package is the package that is installed on the system.\\n\\nThe root package is the package that is installed on the system.\\n'" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "untuned_output_answer" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": { + "id": "c6hpJXXNXQYS", + "trusted": true + }, + "outputs": [], + "source": [ + "del untuned_pipeline\n", + "del untuned_model\n", + "del tokeniser" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": { + "id": "2KDUqvPNXQYS", + "trusted": true + }, + "outputs": [], + "source": [ + "_ = gc.collect()\n", + "cuda.empty_cache()" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": { + "id": "Hj8QicxVXQYS", + "trusted": true + }, + "outputs": [], + "source": [ + "time.sleep(30)" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": { + "id": "zTbpLXyvXQYT", + "trusted": true + }, + "outputs": [], + "source": [ + "_ = gc.collect()\n", + "cuda.empty_cache()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7z_3FaTaXQYT" + }, + "source": [ + "# Step 4" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": { + "id": "rkl7jkkFXQYT", + "trusted": true + }, + "outputs": [], + "source": [ + "import gc\n", + "import pathlib\n", + "import shutil\n", + "import time\n", + "\n", + "from peft import PeftModel\n", + "from torch import cuda, float16\n", + "from transformers import (\n", + " AutoModelForCausalLM,\n", + " AutoTokenizer,\n", + " BitsAndBytesConfig,\n", + " pipeline,\n", + " set_seed,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": { + "id": "ReuUx5iXXQYU", + "trusted": true + }, + "outputs": [], + "source": [ + "step_identifier = pathlib.Path(\"step_4\")\n", + "\n", + "input_directory = pathlib.Path(step_identifier, \"input_directory\")\n", + "working_directory = pathlib.Path(step_identifier, \"working_directory\")\n", + "output_directory = pathlib.Path(step_identifier, \"output_directory\")" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": { + "id": "Bqg-zi_PXQYV", + "trusted": true + }, + "outputs": [], + "source": [ + "tuned_adapter_archive = pathlib.Path(input_directory, \"tuned_adapter_archive.zip\")\n", + "tuned_adapter_path = pathlib.Path(working_directory, \"tuned_adapter_directory\")" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": { + "id": "BMVSMgc7XQYV", + "trusted": true + }, + "outputs": [], + "source": [ + "shutil.unpack_archive(tuned_adapter_archive, extract_dir=working_directory)" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": { + "id": "uzCiIgj5XQYV", + "trusted": true + }, + "outputs": [], + "source": [ + "base_model_identifier = \"facebook/opt-350m\"\n", + "\n", + "quantisation_configuration = BitsAndBytesConfig(\n", + " load_in_4bit=True,\n", + " bnb_4bit_compute_dtype=float16,\n", + " bnb_4bit_quant_type=\"nf4\",\n", + " bnb_4bit_use_double_quant=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": { + "id": "eraZKNIkXQYW", + "trusted": true + }, + "outputs": [], + "source": [ + "tokeniser = AutoTokenizer.from_pretrained(base_model_identifier)\n", + "\n", + "tokeniser.pad_token = tokeniser.eos_token\n", + "tokeniser.padding_side = \"right\"" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": { + "id": "Olzr55qDXQYW", + "trusted": true + }, + "outputs": [], + "source": [ + "untuned_model = AutoModelForCausalLM.from_pretrained(\n", + " base_model_identifier,\n", + " quantization_config=quantisation_configuration,\n", + " device_map=\"auto\",\n", + " low_cpu_mem_usage=True,\n", + ")\n", + "\n", + "peft_model = PeftModel.from_pretrained(untuned_model, tuned_adapter_path)\n", + "\n", + "_ = peft_model.eval()" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "voSsxEqKXQYX", + "outputId": "52a73543-eaee-4a85-fc06-3adbe67a4bb0", + "trusted": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MixtralForCausalLM', 'MptForCausalLM', 'MusicgenForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PersimmonForCausalLM', 'PhiForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'Qwen2ForCausalLM', 'ReformerModelWithLMHead', 'RemBertForCausalLM', 'RobertaForCausalLM', 'RobertaPreLayerNormForCausalLM', 'RoCBertForCausalLM', 'RoFormerForCausalLM', 'RwkvForCausalLM', 'Speech2Text2ForCausalLM', 'StableLmForCausalLM', 'TransfoXLLMHeadModel', 'TrOCRForCausalLM', 'WhisperForCausalLM', 'XGLMForCausalLM', 'XLMWithLMHeadModel', 'XLMProphetNetForCausalLM', 'XLMRobertaForCausalLM', 'XLMRobertaXLForCausalLM', 'XLNetLMHeadModel', 'XmodForCausalLM'].\n" + ] + } + ], + "source": [ + "tuned_pipeline = pipeline(\n", + " \"text-generation\",\n", + " model=peft_model,\n", + " tokenizer=tokeniser,\n", + " device_map=\"auto\",\n", + " torch_dtype=float16,\n", + " model_kwargs={\"low_cpu_mem_usage\": True},\n", + " max_new_tokens=256,\n", + " do_sample=True,\n", + " top_k=1,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": { + "id": "bUfuiUjlXQYX", + "trusted": true + }, + "outputs": [], + "source": [ + "instruction_template = \"\"\"You are an assistant for question answering tasks.\n", + "\n", + "### Instructions\n", + "\n", + "1. Use only the following retrieved context to answer the given question.\n", + "2. If the answer is not in the context, say \"I do not know.\".\n", + "3. Keep your answer as concise as possible.\"\"\"\n", + "\n", + "context_template = \"\"\"### Context:\"\"\"\n", + "question_template = \"### Question:\"\n", + "answer_template = \"### Answer:\"" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": { + "id": "Yt76SrmUXQYX", + "trusted": true + }, + "outputs": [], + "source": [ + "input_question = \"Name the root package.\"\n", + "retrieved_context = \"'package_name_to_import_with' is the root package.\"\n", + "\n", + "model_input = \"\\n\\n\".join(\n", + " [\n", + " instruction_template,\n", + " f\"{context_template} {retrieved_context}\",\n", + " f\"{question_template} {input_question}\",\n", + " f\"{answer_template} \",\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "Z33Y7WnHXQYY", + "outputId": "a84f6995-7004-48c6-8309-ae5e538c4428", + "trusted": true + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + }, + "text/plain": [ + "'You are an assistant for question answering tasks.\\n\\n### Instructions\\n\\n1. Use only the following retrieved context to answer the given question.\\n2. If the answer is not in the context, say \"I do not know.\".\\n3. Keep your answer as concise as possible.\\n\\n### Context: \\'package_name_to_import_with\\' is the root package.\\n\\n### Question: Name the root package.\\n\\n### Answer: '" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_input" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": { + "id": "1IEEd7oqXQYY", + "trusted": true + }, + "outputs": [], + "source": [ + "tuned_model_output = tuned_pipeline(input_question, return_full_text=False)\n", + "\n", + "tuned_output_answer = tuned_model_output[0][\"generated_text\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 139 + }, + "id": "ih2bnJAdXQYZ", + "outputId": "9996b89a-f011-4a35-f37d-a234f86a584d", + "trusted": true + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + }, + "text/plain": [ + "'\\n\\nThe root package is the one that you need to install on your computer.\\n\\nThe root package is the one that you need to install on your computer. The root package is the one that you need to install on your computer. The root package is the one that you need to install on your computer. The root package is the one that you need to install on your computer. The root package is the one that you need to install on your computer. The root package is the one that you need to install on your computer. The root package is the one that you need to install on your computer. The root package is the one that you need to install on your computer. The root package is the one that you need to install on your computer. The root package is the one that you need to install on your computer. The root package is the one that you need to install on your computer. The root package is the one that you need to install on your computer. The root package is the one that you need to install on your computer. The root package is the one that you need to install on your computer. The root package is the one that you need to install on your computer. The root package is the one that you need to install on'" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tuned_output_answer" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": { + "id": "O0064yF_XQYZ", + "trusted": true + }, + "outputs": [], + "source": [ + "del tuned_pipeline\n", + "del peft_model\n", + "del untuned_model\n", + "del tokeniser" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": { + "id": "-5JJJWNVXQYa" + }, + "outputs": [], + "source": [ + "_ = gc.collect()\n", + "cuda.empty_cache()" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": { + "id": "iLPR88vgXQYa" + }, + "outputs": [], + "source": [ + "time.sleep(30)" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": { + "id": "3Y6nYccsXQYa", + "trusted": true + }, + "outputs": [], + "source": [ + "_ = gc.collect()\n", + "cuda.empty_cache()" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kaggle": { + "accelerator": "gpu", + "dataSources": [ + { + "datasetId": 4528009, + "sourceId": 8053191, + "sourceType": "datasetVersion" + } + ], + "dockerImageVersionId": 30665, + "isGpuEnabled": true, + "isInternetEnabled": true, + "language": "python", + "sourceType": "notebook" + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "00d8bf97416b4a00b74ce1541bd87ff0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "011d2f74b30941829a36163889b086d7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_c9f536755ec94f09828f44251f1cc6ef", + "IPY_MODEL_d8d397e3e12e4a6485972e47b8357b46", + "IPY_MODEL_abc17f25befa4484ad16965cca1b9dc7" + ], + "layout": "IPY_MODEL_457941ba594b4a69b87a15ee0b226c73" + } + }, + "031031fe1ed44efabd7edfc5b4f24f83": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3dec45537fbd44c9a7cc62d429260c3d", + "max": 2701, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_0edad2f47ce744da920575e28074adca", + "value": 2701 + } + }, + "03712b4568914e0582f3a5bbee97afca": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "03c7fc4c0fb148bf84a80a02ea720615": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_50ae210afda04e19ad2a13f41633c5f4", + "placeholder": "\u200b", + "style": "IPY_MODEL_1902b2e8552b4f6daebf545fbfac24a0", + "value": "\u2007441/441\u2007[00:00<00:00,\u20078.24kB/s]" + } + }, + "041465c9011a4505920986105afd81b6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_95088e48d2cb4ce39da9a53c1c3c9862", + "placeholder": "\u200b", + "style": "IPY_MODEL_03712b4568914e0582f3a5bbee97afca", + "value": "\u20073.34k/3.34k\u2007[00:00<00:00,\u2007230kB/s]" + } + }, + "056b81aa4d254966ba79cc5700862cbb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "06894ccf881a476daf8bc9513dd8079f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "06b729359fcf4c4183e46c6bf577685d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0c5df199e2504cbcb1ff94f034662124": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_bf4bcd04704b4fab82006adfd4cde82b", + "max": 685, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_b97fb1ad284f4e8284627db8da269bff", + "value": 685 + } + }, + "0d2707f351cf4ad1b6c46551f66b09fa": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "0e264e99aa774dc6b8cd1ce6b26f2d3c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_bcffc6f9e1804a8f87489bf9ed0f133f", + "placeholder": "\u200b", + "style": "IPY_MODEL_2ef86b2eca9743ceb14fea8cefeb40a9", + "value": "generation_config.json:\u2007100%" + } + }, + "0edad2f47ce744da920575e28074adca": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "0f5d2cdc342c40fd95b0e1a3c26fd080": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2475e8a098f34f28ab279a16ef8a67cc", + "placeholder": "\u200b", + "style": "IPY_MODEL_9e276df965ad45678563a9354604eb6b", + "value": "Downloading\u2007extra\u2007modules:\u2007" + } + }, + "129cad9a109a44009c5613a0b32542a4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "17dcf9c82bbf4d0b8b6ee4051fa232f5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1902b2e8552b4f6daebf545fbfac24a0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "1b1d8564c6944ac389e8b2fc0bd73432": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1c58c010a14e4a98b1d80858044e9c10": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "1dc1689ca038477fa0609735a10d8b86": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1e2e472fdfa64a829cce9014c683c134": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "1ef56d1d8ff94f049cb618ffcb4914d0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "21a64701aa0d439d8833439d30a89d78": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "228b57bf99af4de48040327ee93d998a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2475e8a098f34f28ab279a16ef8a67cc": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2477a84331d14f2b92664949c452eef9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_8466e904761b40efb3667ee7aad783cd", + "IPY_MODEL_f9023c4b7298434d8702b1b7bba1bd1e", + "IPY_MODEL_c2a3652b1dda49e1b6cc3af1e7e3a163" + ], + "layout": "IPY_MODEL_6f78f3d1283f4e2f9535c7d56d4d9672" + } + }, + "247cd95babae4f9fabd66657bbb62b0e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "24915cd927be4ad6bc6d5707f21b425f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "25f197bb23ea400ebf75ebe10c1a14a7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "286cc99f2c194d44b3ca8736359245dd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "2987f8662e594291994ddbfefd4cff53": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_aa4afde379e14921861730ab665906c6", + "IPY_MODEL_e72e7e8ec3c44667a7b8b994894dbb52", + "IPY_MODEL_c5af5d5f00134021bfef5a3f8c74dad3" + ], + "layout": "IPY_MODEL_bcce971da32c4130a8ee31f928c1aea4" + } + }, + "2b2217d1a2ab4563a2a6d83212651cc0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_32ffcba4e05446829c0340f5d7503678", + "placeholder": "\u200b", + "style": "IPY_MODEL_60759a513e9d4a349b71e25d6bf4d694", + "value": "\u20078234/8234\u2007[00:00<00:00,\u200797535.07\u2007examples/s]" + } + }, + "2c8a2920ec9b42628d9fa0c3a22d78d7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9c9caee9ebb64ec19b6c71defc6e86ad", + "max": 441, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_37a3d6690b0d4283a23fd9efc98a2204", + "value": 441 + } + }, + "2cdb8f82f31e467dbba686351dd4b7f8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_3c5869e2f1ea4834b71405c13479f56d", + "IPY_MODEL_0c5df199e2504cbcb1ff94f034662124", + "IPY_MODEL_c160b9ee1c6d4d5eb2efac5fcd3820d0" + ], + "layout": "IPY_MODEL_228b57bf99af4de48040327ee93d998a" + } + }, + "2d6defc70a594af49333055f176ee539": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2ef86b2eca9743ceb14fea8cefeb40a9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "31f0ece671fe4c598cabcbc7332a6559": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "320fabcc13644184ac81742415689110": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "32178c3652f643418e1ba9293dd2235b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "32ffcba4e05446829c0340f5d7503678": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "35f19a445d8e41e588d78169ea1c4d08": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "37a3d6690b0d4283a23fd9efc98a2204": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "3a09a7d232064621981bf7dd2e99ca31": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "3a34223dbe61448d8e5fd84ddfbba770": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_64330bfa9bc74e8db8e3549b2c0d38ed", + "placeholder": "\u200b", + "style": "IPY_MODEL_00d8bf97416b4a00b74ce1541bd87ff0", + "value": "\u20074.07k/?\u2007[00:00<00:00,\u2007215kB/s]" + } + }, + "3c5869e2f1ea4834b71405c13479f56d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c89f7f5d00c94a92a7b3618781ef1e9f", + "placeholder": "\u200b", + "style": "IPY_MODEL_ea4f5bfd947249bf8785e1c45f0db81a", + "value": "tokenizer_config.json:\u2007100%" + } + }, + "3dec45537fbd44c9a7cc62d429260c3d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3ef39ebca2cc417b811b8fd078454fc1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "40f72d8956f94c05943df787b837f9a6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "43b68689256b4ef1ab0b7a1fd24ba2bb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "44086aa7573d44d2b3af7f238e845f38": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "457941ba594b4a69b87a15ee0b226c73": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "46ccb18dfe1a4d7a9f3f98b4b4744b3d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ecf5160a15d54189b8de039b2e5cee49", + "placeholder": "\u200b", + "style": "IPY_MODEL_c3c5a83fa913453ba88c84867e259402", + "value": "\u20073.34k/3.34k\u2007[00:00<00:00,\u2007181kB/s]" + } + }, + "486b19e9c56f4dc3bc34f41bf98eb567": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "488119b3ddad4772aa367eb100c11a7f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4dafdd16073a432a8ef22f3a3b657a8e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4ff36c15154941b294f1b1026b3dc9dd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7ad272484af94a37a9d56140da5f68ec", + "max": 8645, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_d799588b0e514f879101fb6f94de9209", + "value": 8645 + } + }, + "50ae210afda04e19ad2a13f41633c5f4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "549d75b7456744ba9bcbf622a1973e8a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_da22fce4515a44f6908debfe571d60a5", + "placeholder": "\u200b", + "style": "IPY_MODEL_595f2051e65d45d5a9113654507d9ea0", + "value": "\u20072772/2772\u2007[00:00<00:00,\u200770210.27\u2007examples/s]" + } + }, + "549ecf02da704441bc252264ac090bb7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "55c49f667a274b1f8b7228ddd704e36b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1b1d8564c6944ac389e8b2fc0bd73432", + "placeholder": "\u200b", + "style": "IPY_MODEL_caf57cebe64747898368119773b9824f", + "value": "Downloading\u2007extra\u2007modules:\u2007100%" + } + }, + "57bdeb5ecacd44f4bd92de036eff8f81": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "57fb00266e094ffb8efaafb2fff3cd22": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "595f2051e65d45d5a9113654507d9ea0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "59e35119a590431c9cc489e4f40092cd": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5b8e42d784e74e4ebd1e467476525ca5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5d74ea41be9f40a7b115cb4be3452172": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "5f19fcc123e945e6abd8c958b0d0a644": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_61a48153341a47e99d10e3e85aeaf1b1", + "max": 3344, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_7a34212f4f0944958f123904ab2d146b", + "value": 3344 + } + }, + "5f752bda665d4adda4502572c3e6894d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5fdab2ce6e1d4f44aa9910fee06302b0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "60759a513e9d4a349b71e25d6bf4d694": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "60bd086b1b324ffabbfd2190a30c5013": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6e918d56a43d4dac9b1ea17ca860fc2c", + "max": 6270, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_a6b9abd58c9e4279a3516160a47d4dff", + "value": 6270 + } + }, + "61a48153341a47e99d10e3e85aeaf1b1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "64330bfa9bc74e8db8e3549b2c0d38ed": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6616e75902f745249e8513ebbec1976a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e89846a46c51418db6b3487ecb05245a", + "placeholder": "\u200b", + "style": "IPY_MODEL_320fabcc13644184ac81742415689110", + "value": "pytorch_model.bin:\u2007100%" + } + }, + "67a7909fc74a4b8ab11c699096f5fdaa": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_6616e75902f745249e8513ebbec1976a", + "IPY_MODEL_b2872743e69846a69b9a461a2afab141", + "IPY_MODEL_c387729014c64e2b8256083af30712d9" + ], + "layout": "IPY_MODEL_f284e02f425e4a88949a7b2836a92f72" + } + }, + "68222ff0df0a4f2a8c09f7f8ffe679b6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "6ca29e483dc84e03904b722b9df7d50b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6e918d56a43d4dac9b1ea17ca860fc2c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "6f78f3d1283f4e2f9535c7d56d4d9672": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "78ac29cd1755426e976cfb0a8f912a42": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "78b34924973842e0859b2d5a92d50eae": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "799259ec533d4e1599e5168c41661b1e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7a34212f4f0944958f123904ab2d146b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "7ad272484af94a37a9d56140da5f68ec": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7d33731a6b194e318db13015c1126092": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_b4c4823ec7794824baca02995b18e9b6", + "IPY_MODEL_5f19fcc123e945e6abd8c958b0d0a644", + "IPY_MODEL_041465c9011a4505920986105afd81b6" + ], + "layout": "IPY_MODEL_6ca29e483dc84e03904b722b9df7d50b" + } + }, + "7d4d60ef08b44415b5797c794aef9d2f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "7dc4c4206c764db5ad9951d341fbbed1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5b8e42d784e74e4ebd1e467476525ca5", + "max": 2772, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_b3f9de876a2146799481c4d4fe7c8cf3", + "value": 2772 + } + }, + "80710a748e41413a98d93dd7f7b0e996": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "81e9803dbf814d8f8543d3460f73d41d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8466e904761b40efb3667ee7aad783cd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9f9a2f7a152146e1a9320da46283f69a", + "placeholder": "\u200b", + "style": "IPY_MODEL_f1d2a20c160c40b386b0d239841cbc2d", + "value": "vocab.json:\u2007100%" + } + }, + "849e3031e27249ee9be4a8d3cecd90d2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "881a955bd4f645d594c171840d66357e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c5bbf2f491314e6eb754003bb632e96e", + "placeholder": "\u200b", + "style": "IPY_MODEL_eb9ac8d3463142a0b3861825643d305b", + "value": "\u20078234/8234\u2007[00:00<00:00,\u2007251982.02\u2007examples/s]" + } + }, + "8d7743368d5c43b989aef855993b020a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "8df57e75a59846bb9b02616602c9f735": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9074ca1bec1e493fa6fac34616748967": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_488119b3ddad4772aa367eb100c11a7f", + "max": 8234, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_ce6fefdd423b4cceb3501a1713a1bd41", + "value": 8234 + } + }, + "911adc8103c043f5934cbee88918f8e8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_9c45c993f95e46618de552d7219d3999", + "IPY_MODEL_edf453b360404d30a3ed620acf8c0a98", + "IPY_MODEL_a261bac6eba94fdcbd5a136333164c87" + ], + "layout": "IPY_MODEL_25f197bb23ea400ebf75ebe10c1a14a7" + } + }, + "935b0330e479477a96747532dcc134e1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "94d8dcf389364a97aa270ce6b4d00246": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "950624f2765f4d339774cd30dc8696d6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "95088e48d2cb4ce39da9a53c1c3c9862": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9614ab86c04a4c0ebd2d3cd63bc25e88": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5f752bda665d4adda4502572c3e6894d", + "placeholder": "\u200b", + "style": "IPY_MODEL_d00060b25c9041aa85d5c7c1073cc0f7", + "value": "\u2007644/644\u2007[00:00<00:00,\u200723.7kB/s]" + } + }, + "964bbaef8d1b471d9cbfca1b92d62ff7": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "973f01e20c864c2a898ad69034f814cd": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "983e4aafd987437d80165650c7dc1b1a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1ef56d1d8ff94f049cb618ffcb4914d0", + "max": 137, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_32178c3652f643418e1ba9293dd2235b", + "value": 137 + } + }, + "9c2059dcd75143c28f38c4d3464ea248": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a9a9e37caaea4eec93e2422428513584", + "placeholder": "\u200b", + "style": "IPY_MODEL_0d2707f351cf4ad1b6c46551f66b09fa", + "value": "\u20076.27k/6.27k\u2007[00:00<00:00,\u2007363kB/s]" + } + }, + "9c45c993f95e46618de552d7219d3999": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e3cd428ab0c1472d9bede352c28adc76", + "placeholder": "\u200b", + "style": "IPY_MODEL_e11d58ae8d3a41208561455a2c63e5fc", + "value": "Downloading\u2007builder\u2007script:\u2007100%" + } + }, + "9c9caee9ebb64ec19b6c71defc6e86ad": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9d921b2b56684047bd5c2b543d64b2c9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_f5742774f122465e915baaa25f7d7688", + "IPY_MODEL_2c8a2920ec9b42628d9fa0c3a22d78d7", + "IPY_MODEL_03c7fc4c0fb148bf84a80a02ea720615" + ], + "layout": "IPY_MODEL_b85905a68afc4e96882b98ed7bb9b95d" + } + }, + "9e276df965ad45678563a9354604eb6b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9f9a2f7a152146e1a9320da46283f69a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a1030ae14d1247edaa1486a8f12ae337": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a261bac6eba94fdcbd5a136333164c87": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_06b729359fcf4c4183e46c6bf577685d", + "placeholder": "\u200b", + "style": "IPY_MODEL_fa017fe211ee4ab28b191f54110f38d5", + "value": "\u20075.94k/5.94k\u2007[00:00<00:00,\u2007389kB/s]" + } + }, + "a26f1104699a45129692ae4e4e096970": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a4a8284dd4574fcc8ed5dbac424cbfae": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a600b578edce421c83d89d60bc330412": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a651f7eccf64405eaf4f8da3fe99c684": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_0e264e99aa774dc6b8cd1ce6b26f2d3c", + "IPY_MODEL_983e4aafd987437d80165650c7dc1b1a", + "IPY_MODEL_eec74b2fe5374897808045fe334140a7" + ], + "layout": "IPY_MODEL_80710a748e41413a98d93dd7f7b0e996" + } + }, + "a6b9abd58c9e4279a3516160a47d4dff": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "a9a9e37caaea4eec93e2422428513584": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "aa4afde379e14921861730ab665906c6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_973f01e20c864c2a898ad69034f814cd", + "placeholder": "\u200b", + "style": "IPY_MODEL_8df57e75a59846bb9b02616602c9f735", + "value": "Map:\u2007100%" + } + }, + "aae026074d484c1da55774e73db18b6f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "abc17f25befa4484ad16965cca1b9dc7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_06894ccf881a476daf8bc9513dd8079f", + "placeholder": "\u200b", + "style": "IPY_MODEL_1c58c010a14e4a98b1d80858044e9c10", + "value": "\u2007456k/456k\u2007[00:00<00:00,\u20076.32MB/s]" + } + }, + "ad686be5b46b43929180427f1e453c43": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_17dcf9c82bbf4d0b8b6ee4051fa232f5", + "placeholder": "\u200b", + "style": "IPY_MODEL_129cad9a109a44009c5613a0b32542a4", + "value": "Saving\u2007the\u2007dataset\u2007(1/1\u2007shards):\u2007100%" + } + }, + "ae423cd6cdc444a98309e44660528a7d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_81e9803dbf814d8f8543d3460f73d41d", + "max": 644, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_ee1062ad196f4ac8aad1faf92019460b", + "value": 644 + } + }, + "aeddecf41fed433fb613eb17938abf9a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "af89da4aa17c4c26a420960015b61efe": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_21a64701aa0d439d8833439d30a89d78", + "placeholder": "\u200b", + "style": "IPY_MODEL_a1030ae14d1247edaa1486a8f12ae337", + "value": "\u20072701/2701\u2007[00:00<00:00,\u200784113.41\u2007examples/s]" + } + }, + "b2872743e69846a69b9a461a2afab141": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_cec32a675a72415c8a94cfe65bdc4040", + "max": 662513657, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_486b19e9c56f4dc3bc34f41bf98eb567", + "value": 662513657 + } + }, + "b3f9de876a2146799481c4d4fe7c8cf3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "b4c4823ec7794824baca02995b18e9b6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_57bdeb5ecacd44f4bd92de036eff8f81", + "placeholder": "\u200b", + "style": "IPY_MODEL_c2c14a3c2f0545c0b8f47759437f7d40", + "value": "Downloading\u2007extra\u2007modules:\u2007100%" + } + }, + "b5f1438fd68d4772906f780f1252587b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b7c9053262c346beaa4a74ceb168a2bd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ad686be5b46b43929180427f1e453c43", + "IPY_MODEL_9074ca1bec1e493fa6fac34616748967", + "IPY_MODEL_881a955bd4f645d594c171840d66357e" + ], + "layout": "IPY_MODEL_be2e551f1ae6408186d73c8d6c8bf18f" + } + }, + "b85905a68afc4e96882b98ed7bb9b95d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b8e7ebb61d06461d95fa5711c28cfcb8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a600b578edce421c83d89d60bc330412", + "placeholder": "\u200b", + "style": "IPY_MODEL_1e2e472fdfa64a829cce9014c683c134", + "value": "Downloading\u2007builder\u2007script:\u2007100%" + } + }, + "b97fb1ad284f4e8284627db8da269bff": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "bcce971da32c4130a8ee31f928c1aea4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bcffc6f9e1804a8f87489bf9ed0f133f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "be2e551f1ae6408186d73c8d6c8bf18f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bed285f4916945ef89635fb1c9b52b0d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bf4bcd04704b4fab82006adfd4cde82b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c160b9ee1c6d4d5eb2efac5fcd3820d0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_799259ec533d4e1599e5168c41661b1e", + "placeholder": "\u200b", + "style": "IPY_MODEL_a4a8284dd4574fcc8ed5dbac424cbfae", + "value": "\u2007685/685\u2007[00:00<00:00,\u200715.4kB/s]" + } + }, + "c2a3652b1dda49e1b6cc3af1e7e3a163": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c4cb859ecd7c4c7f88b922a479914a6c", + "placeholder": "\u200b", + "style": "IPY_MODEL_ee51d79fcf794d6b9302c4438343edb6", + "value": "\u2007899k/899k\u2007[00:00<00:00,\u20072.54MB/s]" + } + }, + "c2c14a3c2f0545c0b8f47759437f7d40": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c387729014c64e2b8256083af30712d9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_aeddecf41fed433fb613eb17938abf9a", + "placeholder": "\u200b", + "style": "IPY_MODEL_e768492a94cf431884ffa82f8be937e8", + "value": "\u2007663M/663M\u2007[00:06<00:00,\u2007153MB/s]" + } + }, + "c3c5a83fa913453ba88c84867e259402": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c486c52285c44862a10ee6b4f1fde01f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "c4cb859ecd7c4c7f88b922a479914a6c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c539698bf85840418b1dd95baa327a79": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c59ad1df213b4051958fb22369def230": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_57fb00266e094ffb8efaafb2fff3cd22", + "max": 3344, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_c486c52285c44862a10ee6b4f1fde01f", + "value": 3344 + } + }, + "c5af5d5f00134021bfef5a3f8c74dad3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_59e35119a590431c9cc489e4f40092cd", + "placeholder": "\u200b", + "style": "IPY_MODEL_3ef39ebca2cc417b811b8fd078454fc1", + "value": "\u20072772/2772\u2007[00:00<00:00,\u200753699.37\u2007examples/s]" + } + }, + "c5bbf2f491314e6eb754003bb632e96e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c6103230fe384c8e830b68dd938334ab": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_f5e1eeb1c26e41b5b0aaf258726868d3", + "IPY_MODEL_031031fe1ed44efabd7edfc5b4f24f83", + "IPY_MODEL_af89da4aa17c4c26a420960015b61efe" + ], + "layout": "IPY_MODEL_2d6defc70a594af49333055f176ee539" + } + }, + "c89f7f5d00c94a92a7b3618781ef1e9f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c9dd594f76f14ab48eebc085ecbdc148": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_55c49f667a274b1f8b7228ddd704e36b", + "IPY_MODEL_c59ad1df213b4051958fb22369def230", + "IPY_MODEL_46ccb18dfe1a4d7a9f3f98b4b4744b3d" + ], + "layout": "IPY_MODEL_78ac29cd1755426e976cfb0a8f912a42" + } + }, + "c9f536755ec94f09828f44251f1cc6ef": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_950624f2765f4d339774cd30dc8696d6", + "placeholder": "\u200b", + "style": "IPY_MODEL_8d7743368d5c43b989aef855993b020a", + "value": "merges.txt:\u2007100%" + } + }, + "ca7e925e04554fa397ca43eecb0267a5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b5f1438fd68d4772906f780f1252587b", + "placeholder": "\u200b", + "style": "IPY_MODEL_3a09a7d232064621981bf7dd2e99ca31", + "value": "Downloading\u2007builder\u2007script:\u2007100%" + } + }, + "caf57cebe64747898368119773b9824f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "cc309f0f267842d1b79613f4fa341d34": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "ce6fefdd423b4cceb3501a1713a1bd41": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "cec32a675a72415c8a94cfe65bdc4040": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d00060b25c9041aa85d5c7c1073cc0f7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d220677af57f4b47b16765be332bb9d1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d487721513d64095b5ec291185dfb6d3", + "placeholder": "\u200b", + "style": "IPY_MODEL_a26f1104699a45129692ae4e4e096970", + "value": "\u20078.64k/8.64k\u2007[00:00<00:00,\u2007566kB/s]" + } + }, + "d2da4136ebe945b0a9f82949f2ce9d4a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_31f0ece671fe4c598cabcbc7332a6559", + "placeholder": "\u200b", + "style": "IPY_MODEL_78b34924973842e0859b2d5a92d50eae", + "value": "Saving\u2007the\u2007dataset\u2007(1/1\u2007shards):\u2007100%" + } + }, + "d487721513d64095b5ec291185dfb6d3": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d77d6f084ff143f3b3f3fd8dffdf09e6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_35f19a445d8e41e588d78169ea1c4d08", + "placeholder": "\u200b", + "style": "IPY_MODEL_5d74ea41be9f40a7b115cb4be3452172", + "value": "config.json:\u2007100%" + } + }, + "d799588b0e514f879101fb6f94de9209": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "d89cebb4831746c6a7f51b293ad14221": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_0f5d2cdc342c40fd95b0e1a3c26fd080", + "IPY_MODEL_ee4e94bba36a43ff84c859445b645a83", + "IPY_MODEL_3a34223dbe61448d8e5fd84ddfbba770" + ], + "layout": "IPY_MODEL_bed285f4916945ef89635fb1c9b52b0d" + } + }, + "d8d397e3e12e4a6485972e47b8357b46": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d8f4e85ed1e54ecebfb26f06f67d0d1c", + "max": 456318, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_7d4d60ef08b44415b5797c794aef9d2f", + "value": 456318 + } + }, + "d8f4e85ed1e54ecebfb26f06f67d0d1c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "da22fce4515a44f6908debfe571d60a5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "dc1ac2672b69478a8ddaed8767cce498": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "dc2d530a7a8d4620a05e0fdd13fb6872": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1dc1689ca038477fa0609735a10d8b86", + "max": 8234, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_5fdab2ce6e1d4f44aa9910fee06302b0", + "value": 8234 + } + }, + "ded69f7777594e1fba512577d30533be": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "e11d58ae8d3a41208561455a2c63e5fc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e3cd428ab0c1472d9bede352c28adc76": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e72e7e8ec3c44667a7b8b994894dbb52": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_94d8dcf389364a97aa270ce6b4d00246", + "max": 2772, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_f07e92cd93054ca4bda03ebb960f6352", + "value": 2772 + } + }, + "e768492a94cf431884ffa82f8be937e8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e89846a46c51418db6b3487ecb05245a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ea4f5bfd947249bf8785e1c45f0db81a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "eb9ac8d3463142a0b3861825643d305b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ec63434e1083407ebf3857ce1930b90f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_849e3031e27249ee9be4a8d3cecd90d2", + "placeholder": "\u200b", + "style": "IPY_MODEL_dc1ac2672b69478a8ddaed8767cce498", + "value": "Map:\u2007100%" + } + }, + "ecf5160a15d54189b8de039b2e5cee49": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "edf453b360404d30a3ed620acf8c0a98": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4dafdd16073a432a8ef22f3a3b657a8e", + "max": 5937, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_ded69f7777594e1fba512577d30533be", + "value": 5937 + } + }, + "ee1062ad196f4ac8aad1faf92019460b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "ee4e94bba36a43ff84c859445b645a83": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_40f72d8956f94c05943df787b837f9a6", + "max": 1554, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_cc309f0f267842d1b79613f4fa341d34", + "value": 1554 + } + }, + "ee51d79fcf794d6b9302c4438343edb6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "eec74b2fe5374897808045fe334140a7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_247cd95babae4f9fabd66657bbb62b0e", + "placeholder": "\u200b", + "style": "IPY_MODEL_aae026074d484c1da55774e73db18b6f", + "value": "\u2007137/137\u2007[00:00<00:00,\u20078.48kB/s]" + } + }, + "f07e92cd93054ca4bda03ebb960f6352": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "f1d2a20c160c40b386b0d239841cbc2d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "f284e02f425e4a88949a7b2836a92f72": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f4f6a459b78b4548befbe8f1d177ed8a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ec63434e1083407ebf3857ce1930b90f", + "IPY_MODEL_dc2d530a7a8d4620a05e0fdd13fb6872", + "IPY_MODEL_2b2217d1a2ab4563a2a6d83212651cc0" + ], + "layout": "IPY_MODEL_c539698bf85840418b1dd95baa327a79" + } + }, + "f5742774f122465e915baaa25f7d7688": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_549ecf02da704441bc252264ac090bb7", + "placeholder": "\u200b", + "style": "IPY_MODEL_056b81aa4d254966ba79cc5700862cbb", + "value": "special_tokens_map.json:\u2007100%" + } + }, + "f5e1eeb1c26e41b5b0aaf258726868d3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_44086aa7573d44d2b3af7f238e845f38", + "placeholder": "\u200b", + "style": "IPY_MODEL_68222ff0df0a4f2a8c09f7f8ffe679b6", + "value": "Saving\u2007the\u2007dataset\u2007(1/1\u2007shards):\u2007100%" + } + }, + "f7770274cfa84ec7b10c807ebb807c06": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f8befaa4f4ff4c588f62378140fab443": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_d2da4136ebe945b0a9f82949f2ce9d4a", + "IPY_MODEL_7dc4c4206c764db5ad9951d341fbbed1", + "IPY_MODEL_549d75b7456744ba9bcbf622a1973e8a" + ], + "layout": "IPY_MODEL_935b0330e479477a96747532dcc134e1" + } + }, + "f9023c4b7298434d8702b1b7bba1bd1e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f7770274cfa84ec7b10c807ebb807c06", + "max": 898822, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_286cc99f2c194d44b3ca8736359245dd", + "value": 898822 + } + }, + "fa017fe211ee4ab28b191f54110f38d5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "fa188b218ed14306b39fb1ba4908945b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_b8e7ebb61d06461d95fa5711c28cfcb8", + "IPY_MODEL_60bd086b1b324ffabbfd2190a30c5013", + "IPY_MODEL_9c2059dcd75143c28f38c4d3464ea248" + ], + "layout": "IPY_MODEL_43b68689256b4ef1ab0b7a1fd24ba2bb" + } + }, + "fae7e1d66a8b4ab6978427f12bc8174b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_d77d6f084ff143f3b3f3fd8dffdf09e6", + "IPY_MODEL_ae423cd6cdc444a98309e44660528a7d", + "IPY_MODEL_9614ab86c04a4c0ebd2d3cd63bc25e88" + ], + "layout": "IPY_MODEL_964bbaef8d1b471d9cbfca1b92d62ff7" + } + }, + "fcceddfbdd3c4938a0ce148d6d539470": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ca7e925e04554fa397ca43eecb0267a5", + "IPY_MODEL_4ff36c15154941b294f1b1026b3dc9dd", + "IPY_MODEL_d220677af57f4b47b16765be332bb9d1" + ], + "layout": "IPY_MODEL_24915cd927be4ad6bc6d5707f21b425f" + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} From e0b5393d38ec916a53a31ab41f915f5d9c9f08ca Mon Sep 17 00:00:00 2001 From: Anirban Ray <39331844+yarnabrina@users.noreply.github.com> Date: Sun, 7 Apr 2024 20:57:37 +0530 Subject: [PATCH 26/26] bumped version --- pyproject.toml | 2 +- src/generative_ai/metadata.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 968d711..6490387 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ requires = [ [project] name = "query-package-documentation" -version = "0.0.2" +version = "0.0.3" description = "A package to explore documentations" keywords = [ "documentation", diff --git a/src/generative_ai/metadata.json b/src/generative_ai/metadata.json index d7b20d0..db8a303 100644 --- a/src/generative_ai/metadata.json +++ b/src/generative_ai/metadata.json @@ -17,5 +17,5 @@ "Anirban Ray <39331844+yarnabrina@users.noreply.github.com>" ], "Name": "query-package-documentation", - "Version": "0.0.2" + "Version": "0.0.3" }