In [1]:
from synthetic_data_generation.rag_data import DataGenerator
from retrievers.chroma import ExtendedChromaMarkdownRetriever
from chunkers.markdown_chunker import MarkdownChunker

from config.project_paths import project_root, source_document_directory

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
retriever = ExtendedChromaMarkdownRetriever()

In [3]:
selected_document = 'accessibility.md'

selected_document_path = source_document_directory / 'vscode' / selected_document
text = selected_document_path.read_text()

toc = MarkdownChunker.get_table_of_contents(selected_document_path)

In [4]:
len(text)

19303

In [5]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=6000,
    chunk_overlap=400,
    length_function=len,
    is_separator_regex=False,
)

In [6]:
docs = text_splitter.create_documents([text])

In [7]:
from utilities import print_long_text


print_long_text(docs[1].page_content)

`#ffef0f` (yellow) and `#37777ff` (blue), are more accessible to individuals with common forms of color vision
deficiencies.  ### Selecting accessible colors  The accessibility of colors is subjective to the type of anomalous
trichromacy (color blindness). The level of severity ranges per person and can be divided into four condition types:  |
Condition      |   Type    | |       ---        |    ---    | |   Deuteranopia   |   Reduced sensitivity to green light.
It is the most common form of color blindness. |   Protanopia     |   Reduced sensitivity to red light. |   Tritanopia
|   Reduced sensitivity to blue light. This condition is considered rare. |   Monochromia    |   The inability to see
all colors, also referred to as achromatopsia. More information about the rarest form of color blindness: [Foundation
for Fighting Blindness](https://www.fightingblindness.org/diseases/achromatopsia).  One of the best approaches to
selecting the best colors for a specific condition is to apply c

In [12]:
from tqdm import tqdm

generator = DataGenerator()

results = []
for i in tqdm(docs):
    q_a = generator.generate_questions_from_document__search_engine_style_query(i, 2)
    print(q_a)
    results.append(q_a)

 25%|██▌       | 1/4 [00:03<00:09,  3.01s/it]

content='Question: VS Code zoom level\nAnswer: You can adjust the zoom level in VS Code with the **View** > **Appearance** > **Zoom** commands. The default value is 0 and each increment/decrement changes the zoom level by 20 percent.\n\nQuestion: VS Code high contrast theme\nAnswer: VS Code supports a High Contrast color theme on all platforms. Use **File** > **Preferences** > **Theme** > **Color Theme** to display the **Select Color Theme** dropdown and select the **High Contrast** theme.' response_metadata={'token_usage': {'prompt_tokens': 2232, 'total_tokens': 2357, 'completion_tokens': 125}, 'model': 'mistral-small', 'finish_reason': 'stop'} id='run-85a4f0ba-c434-4238-b20c-5579358e17da-0'


 50%|█████     | 2/4 [00:05<00:05,  2.56s/it]

content='Question: color accessibility\nAnswer: The accessibility of colors is subjective to the type of anomalous trichromacy (color blindness). The level of severity ranges per person and can be divided into four condition types: Deuteranopia, Protanopia, Tritanopia, and Monochromia. One of the best approaches to selecting the best colors for a specific condition is to apply complementary colors.\n\nQuestion: tab navigation focus\nAnswer: You can use the `kbstyle(Tab)` key to navigate between UI controls in VS Code, with an indicator appearing around each UI element when it has focus. This also works in the integrated terminal and can be toggled with `kb(editor.action.toggleTabFocusMode)`.' response_metadata={'token_usage': {'prompt_tokens': 2140, 'total_tokens': 2305, 'completion_tokens': 165}, 'model': 'mistral-small', 'finish_reason': 'stop'} id='run-6450c150-d4b5-4a34-8e87-232990e8e221-0'


 75%|███████▌  | 3/4 [00:06<00:02,  2.05s/it]

content='Question: NVDA focus mode\nAnswer: It is recommended to stay in focus mode for NVDA and use hotkeys to navigate instead of browse mode.\n\nQuestion: Screen reader optimized mode\nAnswer: VS Code goes into screen reader optimized mode for UI such as the editor and Integrated Terminal when it detects that a screen reader is being used.' response_metadata={'token_usage': {'prompt_tokens': 2095, 'total_tokens': 2174, 'completion_tokens': 79}, 'model': 'mistral-small', 'finish_reason': 'stop'} id='run-f7b0f972-55e2-4cac-9cac-0f0f57272596-0'


100%|██████████| 4/4 [00:08<00:00,  2.14s/it]

content='Question: Accessibility signals sounds?\nAnswer: The command **Help: List Signal Sounds** lists all available accessibility signal sounds, lets you hear each as you move through the list, and allows for configuring their enabled/disabled status.\n\nQuestion: Orca silent Linux?\nAnswer: If Orca in your Linux distribution does not read the editor content, ensure that the setting `"editor.accessibilitySupport": "on"` is enabled in VS Code. Additionally, try setting `ACCESSIBILITY_ENABLED=1` as an environment variable to resolve the issue.' response_metadata={'token_usage': {'prompt_tokens': 1176, 'total_tokens': 1298, 'completion_tokens': 122}, 'model': 'mistral-small', 'finish_reason': 'stop'} id='run-2593c5ba-7754-42fa-99fc-96fd9492ae42-0'





In [13]:
f = project_root / 'synthetic_data_generation' / 'data.txt'  
t = ''
for i in results:
    t += i.content
    t += '\n\n'
    
f.write_text(t)

2044

In [14]:
qa_tuples = generator.extract_qa_tuples(t)

: 

In [11]:
qa_tuples

[('VS Code zoom commands',
  'The zoom commands in VS Code are View > Appearance > Zoom In, View > Appearance > Zoom Out, and View > Appearance > Reset Zoom.'),
 ('VS Code high contrast theme',
  'To enable the High Contrast theme in VS Code, go to File > Preferences > Theme > Color Theme and select High Contrast.'),
 ('color meaning accessibility',
 ('keyboard shortcuts commands',
  'VS Code provides an exhaustive list of commands in the Command Palette, which can be used without a mouse. Press `kb(workbench.action.showCommands)` and type a command name to filter the list of commands.'),
 ('tab navigation controls',
  'In VS Code, you can use the `kbstyle(Tab)` key to navigate between UI controls and an indicator will appear around each UI element when it has focus.'),
 ('NVDA focus mode',
  'It is recommended to stay in focus mode and use hotkeys to navigate with NVDA instead of browse mode.'),
 ('Go to next error',
 ('Accessible diff viewer',
  "The Accessible Diff Viewer presents c