In [45]:
import pandas as pd
import json

leetcode_dataset_path = "data/leetcode_dataset.csv"
df = pd.read_csv(leetcode_dataset_path)
df.head()

Unnamed: 0,id,title,description,is_premium,difficulty,solution_link,acceptance_rate,frequency,url,discuss_count,accepted,submissions,companies,related_topics,likes,dislikes,rating,asked_by_faang,similar_questions
0,1,Two Sum,Given an array of integers `nums` and an integ...,0,Easy,/articles/two-sum,46.7,100.0,https://leetcode.com/problems/two-sum,999,4.1M,8.7M,"Amazon,Google,Apple,Adobe,Microsoft,Bloomberg,...","Array,Hash Table",20217,712,97,1,"[3Sum, /problems/3sum/, Medium], [4Sum, /probl..."
1,2,Add Two Numbers,You are given two non-empty linked lists repre...,0,Medium,/articles/add-two-numbers,35.7,93.1,https://leetcode.com/problems/add-two-numbers,999,1.9M,5.2M,"Bloomberg,Microsoft,Amazon,Google,Facebook,App...","Linked List,Math,Recursion",11350,2704,81,1,"[Multiply Strings, /problems/multiply-strings/..."
2,3,Longest Substring Without Repeating Characters,"Given a string `s`, find the length of the lon...",0,Medium,/articles/longest-substring-without-repeating-...,31.5,90.9,https://leetcode.com/problems/longest-substrin...,999,2.1M,6.7M,"Amazon,Bloomberg,Microsoft,Facebook,Apple,Adob...","Hash Table,Two Pointers,String,Sliding Window",13810,714,95,1,[Longest Substring with At Most Two Distinct C...
3,4,Median of Two Sorted Arrays,Given two sorted arrays `nums1` and `nums2` of...,0,Hard,/articles/median-of-two-sorted-arrays,31.4,86.2,https://leetcode.com/problems/median-of-two-so...,999,904.7K,2.9M,"Amazon,Goldman Sachs,Facebook,Microsoft,Apple,...","Array,Binary Search,Divide and Conquer",9665,1486,87,1,
4,5,Longest Palindromic Substring,"Given a string `s`, return the longest palindr...",0,Medium,/articles/longest-palindromic-substring,30.6,84.7,https://leetcode.com/problems/longest-palindro...,999,1.3M,4.1M,"Amazon,Microsoft,Wayfair,Facebook,Adobe,eBay,G...","String,Dynamic Programming",10271,670,94,1,"[Shortest Palindrome, /problems/shortest-palin..."


In [46]:
"""
1. read the  description
2. send questions to openai api 
3. process the responses - markdown format, display, export as html
"""

'\n1. read the  description\n2. send questions to openai api \n3. process the responses - markdown format, display, export as html\n'

In [47]:
import openai
import os
import pandas as pd
import json
from openai import OpenAI
leetcode_dataset_path = "data/leetcode_dataset.csv"
df = pd.read_csv(leetcode_dataset_path)
df.head()

def solve_problem(problem):
    prompt = f"""
        Write a Python code to solve the problem below, 
        please give a detailed explanation and follow the LeetCode solution format to ensure the answer can be run directly on the LeetCode website.
        
        Problem:
        {problem}
        """
    client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
    
    response = client.chat.completions.create(
      model="gpt-4o-mini",
      messages=[
        {"role": "system", "content": "You are an algorithm master."},
        {"role": "user", "content": prompt},
      ]
    )

    return response.choices[0].message


In [48]:
import concurrent.futures
import time

# Function to process the entire dataset in parallel
def process_problems_in_parallel(df):
    problems = df['description'].tolist()  # Assuming 'problem' column contains the problem descriptions
    with concurrent.futures.ThreadPoolExecutor() as executor:
        results = list(executor.map(solve_problem, problems))
    return results

# Execute and store results
results = process_problems_in_parallel(df)
print(results)

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [49]:
def format_markdown(message):
    content = message.content
    
    # Replace escaped newlines with actual newlines
    content = content.replace('\\n', '\n')
    
    # Ensure proper Markdown formatting
    formatted_content = content.replace('```python', '\n```python').replace('```', '\n```')
    
    return formatted_content

from IPython.display import display, Markdown

display(Markdown(formatted_text))

To solve the problem of finding the length of the longest substring without repeating characters, we can use a sliding window approach combined with a dictionary to keep track of the characters and their positions. This method is efficient and runs in linear time.

### Explanation of the Approach:

1. **Sliding Window**: We maintain two pointers, `start` and `end`, which will represent the current window of characters being evaluated. `start` will denote the beginning of the window, and `end` will iterate through the string.

2. **Dictionary for Characters**: We use a dictionary to map each character to its latest index (position) in the string. This allows us to quickly identify if a character has been seen before and, if so, its position.

3. **Updating the Window**: As we iterate with the `end` pointer:
   - If the character at `s[end]` is not in the dictionary (i.e., it hasn't been seen in the current window), we add it to the dictionary and expand the window by moving `end` to the right.
   - If the character has been seen (it's already in the dictionary), we move the `start` pointer to the right of the last occurrence of this character to ensure all characters in the current window are unique.

4. **Calculating Maximum Length**: After each character insertion, we calculate the length of the current unique substring and update a variable to keep track of the maximum length found.

5. **Edge Case**: If the input string is empty, we immediately return 0.

### Python Code Implementation:



```python
class Solution:
    def lengthOfLongestSubstring(self, s: str) -> int:
        char_index = {}  # Dictionary to store the last index of each character
        max_length = 0  # To keep track of maximum length of substring found
        start = 0  # Left pointer for the sliding window
        
        for end in range(len(s)):
            if s[end] in char_index:
                # If the character is already in the dictionary and is in the current window
                start = max(start, char_index[s[end]] + 1)  # Move 'start' to the right of the last occurrence
            
            char_index[s[end]] = end  # Update the last seen index of the character
            max_length = max(max_length, end - start + 1)  # Calculate max length
            
        return max_length  # Return the maximum length found

```

### Explanation of the Code:

- We define a class `Solution` and a method `lengthOfLongestSubstring` that takes a string `s` as input.
- We initialize a dictionary `char_index` to keep track of the last seen index of each character, a variable `max_length` to store the length of the longest substring found, and `start` to represent the beginning of the current substring window.
- We loop through each character in the string using the `end` pointer. Inside the loop:
  - If the character at `s[end]` has been seen before and falls within the current window (i.e., its last index is greater than or equal to `start`), we update `start` to the index immediately after the last occurrence of that character.
  - Regardless of whether the character was seen or not, we update its last seen position in `char_index`.
  - We compute the current length of the substring from `start` to `end` and update `max_length` if this new length is greater.
- Finally, we return `max_length` as the output.

This solution efficiently calculates the required length with a time complexity of O(n), where n is the length of the string, making it suitable for the given constraints.

In [56]:
n = 500
start = 500
end = 1000
ids = df['id'].tolist()[start: end]
problems = df['description'].tolist()[start: end]
titles = df['title'].tolist()[start: end]
markdown_results = [format_markdown(result) for result in results]
markdown_text = ""
for question_number, title, problem, result in zip(ids, titles, problems, markdown_results):
    markdown_text += f"# {question_number}. {title}\n\n"
    markdown_text += f"### Problem Description \n{problem}\n\n"
    markdown_text += f"### Solution \n {result}\n\n"

In [57]:
import nbformat as nbf
from nbconvert import HTMLExporter

# Create a new Jupyter Notebook
nb = nbf.v4.new_notebook()

# Add a Markdown cell
nb.cells.append(nbf.v4.new_markdown_cell(markdown_text))

# Save the notebook to a file
notebook_filename = "markdown_display-500-1000-test-problems.ipynb"
with open(notebook_filename, "w") as f:
    nbf.write(nb, f)

# Convert the notebook to HTML
html_exporter = HTMLExporter()
html_data, resources = html_exporter.from_filename(notebook_filename)

# Save the HTML to a file
html_filename = "markdown_display-500-1000-problems.html"
with open(html_filename, "w") as f:
    f.write(html_data)

# Optionally, clean up the notebook file
os.remove(notebook_filename)

print(f"Markdown has been exported to {html_filename}")

Markdown has been exported to markdown_display-500-1000-problems.html
