In [23]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from time import sleep
from google import genai

import undetected_chromedriver as uc
import requests
import csv
import json

from Model.list_question import ListQuestion
from Model.answer import Answer
from Model.question import Question
from Model.option import Option

In [57]:
def is_valid_json(text):
    try:
        json.loads(text)
        return True
    except json.JSONDecodeError:
        return False
    
def clean_json_response(text):
    if text.strip().startswith("```json"):
        text = text.strip().removeprefix("```json").removesuffix("```").strip()
    return text
    
def get_type(outerHTML):
    short_answer = '{"block": "1", "type": "Short answer", "heading": "Where are you from?"}'
    multiple_choice = '{"block": "2", "type": "Multiple choice", "heading": "What is your favorite color?", "options": ["Red", "Blue", "Green"]}'
    paragraph = '{"block": "3", "type": "Paragraph", "heading": "Describe your experience with the product."}'
    multiple_choice_grid = '{"block": "4", "type": "Multiple-choice grid", "heading": "Rate the following features:", "rows": ["Feature A", "Feature B"], "columns": ["1", "2", "3"]}'
    tick_box_grid = '{"block": "5", "type": "Tick box grid", "heading": "Select your preferred options:", "rows": ["Option 1", "Option 2"], "columns": ["Choice A", "Choice B"]}'
    checkboxes = '{"block": "6", "type": "Checkboxes", "heading": "Select all that apply:", "options": ["Option 1", "Option 2", "Option 3"]}'
    date = '{"block": "7", "type": "Date", "heading": "Select a date:"}'
    time = '{"block": "8", "type": "Time", "heading": "Select a time:"}'
    linear_scale = '{"block": "9", "type": "Linear scale", "heading": "Rate your satisfaction from 1 to 5.", "option": "1", "2", "3", "4", "5"}'
    rating = '{"block": "10", "type": "Rating", "heading": "Rate the product quality from 1 to 5 stars.", "option": "1", "2", "3", "4", "5"}'
    title = '{"block": "11", "type": "Title", "heading": "Welcome to the survey!"}'

    example = f"""[{title}, {short_answer}, {multiple_choice}, {paragraph}, {multiple_choice_grid}, {tick_box_grid}, {checkboxes}, {date}, {time}, {linear_scale}, {rating}]"""

    prompt = f"""
    You are given a block of HTML that represents a single question from a Google Form. Your task is to analyze the HTML structure and classify the question into one of the following types:

    {outerHTML}

    ["Title", "Short answer", "Paragraph", "Multiple choice", "Checkboxes", "Drop-down", "Linear scale", "Rating", "Multiple-choice grid", "Tick box grid", "Date", "Time"]

    If the question is ["Multiple-choice grid", "Tick box grid", "Checkboxes", "Drop-down", "Multiple choice"] return all options available in the question.
    If the question is ["Linear scale", "Rating"] return the scale or rating options available in the question.

    Use the following rules to classify:

    - "Title": <div>, <h1>, or <h2> containing descriptive text only (no inputs).
    - "Short answer": <input type="text"> with placeholder or text like "Your answer".
    - "Paragraph": <textarea> with placeholder "Your answer".
    - "Multiple choice": div[role="radio"] inside label.docssharedWizToggleLabeledContainer.
    - "Checkboxes": div[role="checkbox"] inside label.docssharedWizToggleLabeledContainer.
    - "Drop-down": div[role="listbox"] with div[role="option"] inside.
    - "Linear scale": div[role="radio"] with numeric labels (e.g., 1–5), and polar ends (e.g., "bad" to "good").
    - "Rating": label[data-ratingscale] with div[role="radio"] representing stars or numbers.
    - "Multiple-choice grid": div[role="radiogroup"][aria-label] (each row is a set of radio buttons).
    - "Tick box grid": div[role="group"] (each row has multiple div[role="checkbox"]).
    - "Date": <input type="date"> or fields for day/month/year.
    - "Time": input[type="text"][max="23"] or [max="59"] or fields for hour/minute.

    Returns the correct result in JSON format with two properties:
    
    "block": the HTML structure you analyzed,
    "type": the exact question type (must match one of the valid values).
    "heading": the question's title or description if available.
    "options": an array of options if applicable (only for multiple choice, checkboxes, linear scale, rating etc.).
    "rows": an array of row labels if applicable (only for multiple-choice grid, tick box grid).
    "columns": an array of column labels if applicable (only for multiple-choice grid, tick box grid).

    Make sure to:

    Identify the question type accurately based on text or HTML structure.
    Provide all relevant options, scales, or labels in the JSON response.
    Return the JSON response without any additional text or formatting.

    Example output:

    {example}

    """
    client = genai.Client(api_key="AIzaSyAg6QdqkgO_wzJmdfci86QI9Pm0eY65wpc")

    response = client.models.generate_content(
        model="gemini-2.0-flash", contents= prompt
    )

    text = clean_json_response(response.text)

    if is_valid_json(text):
        return json.loads(text)  # Remove the "```json" and "```" markers
    else:
        print("Invalid JSON response:", response.text)
        return None
    


In [25]:
options = uc.ChromeOptions()

# Thiết lập ngôn ngữ và UA
options.add_argument("--lang=en-US")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")

# Thêm các tham số ẩn danh
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("--disable-extensions")
options.add_argument("--disable-popup-blocking")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--disable-gpu")
options.add_argument("--remote-debugging-port=9222")

# Thêm các tham số ngẫu nhiên giống trình duyệt thật
options.add_argument("--flag-switches-begin")
options.add_argument("--flag-switches-end")

# Thiết lập profile
options.add_argument("--user-data-dir=/tmp/chrome_profile")
options.add_argument("--profile-directory=Default")

uc.TARGET_VERSION = 85
driver = uc.Chrome(options=options)

# Xóa traces của automation
driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
driver.execute_script("window.navigator.chrome = {runtime: {}, etc: {}};")

# driver.get("https://forms.gle/LHPFYvz3M8Wjp9p37") # chuẩn
# driver.get("https://forms.gle/kRrps3n2m5pLaPZP8") # khong có tiêu đề
driver.get("https://forms.gle/qFkBDjWP1HCVDM2V8") # test agf
result = []
index = 1



In [49]:
list_question = ListQuestion(driver.find_element(By.XPATH, "//div[@role='list']"))
list_question.questions = []

In [50]:
questions_element = list_question.element.find_elements(By.XPATH, "./div[@role='listitem']")
print(str(len(questions_element)))

12


In [51]:
html =''
for element in questions_element:
    question = Question(element)
    question.xpath = "./div[@role='listitem']"
    outerHTML = element.get_attribute("outerHTML")
    html += "{'block': '" + str(index) + "', 'html': '" + outerHTML + "'}"
    index += 1
    list_question.questions.append(question)
print(html)

{'block': '24', 'html': '<div class="Qr7Oae" role="listitem"><div class="OxAavc" jsname="ibnC6b" jscontroller="RGrRJf" data-item-id="1389529834"><div class="meSK8 M7eMe" role="heading" aria-level="2">Title</div><div class="spb5Rd OIC90c">Description title</div><div jsname="XbIQze" class="M0Ikp" id="i.err.1389529834" role="alert"></div></div></div>'}{'block': '25', 'html': '<div class="Qr7Oae" role="listitem"><div jsmodel="CP1oW" data-params="%.@.[139727508,&quot;Short answer&quot;,null,0,[[300964300,null,false,null,null,null,null,null,null,null,[]]],null,null,null,null,null,null,[null,&quot;Short answer&quot;]],&quot;i1&quot;,&quot;i2&quot;,&quot;i3&quot;,false,&quot;i4&quot;]"><div jscontroller="sWGJ4b" jsaction="EEvAHc:yfX9oc;" jsname="WsjYwc" class="geS5n"><div class="z12JJ"><div class="M4DNQ"><div id="i1" class="HoXoMd D1wxyf RjsPE" role="heading" aria-level="3"><span class="M7eMe">Short answer</span></div><div class="gubaDc OIC90c RjsPE" id="i2"></div></div></div><div jscontroller

In [58]:
genai_result = get_type(html)
result = genai_result

In [61]:
for i in range(len(result)):
    print(result[i])

{'block': '24', 'type': 'Title', 'heading': 'Title', 'options': None, 'rows': None, 'columns': None}
{'block': '25', 'type': 'Short answer', 'heading': 'Short answer', 'options': None, 'rows': None, 'columns': None}
{'block': '26', 'type': 'Paragraph', 'heading': 'Paragraph', 'options': None, 'rows': None, 'columns': None}
{'block': '27', 'type': 'Multiple choice', 'heading': 'Multiple choice', 'options': ['Option 1', 'Option 2'], 'rows': None, 'columns': None}
{'block': '28', 'type': 'Checkboxes', 'heading': 'Checkboxes', 'options': ['Option 1', 'Option 2'], 'rows': None, 'columns': None}
{'block': '29', 'type': 'Drop-down', 'heading': 'Drop-down', 'options': ['Option 1', 'Option 2', 'Option 3'], 'rows': None, 'columns': None}
{'block': '30', 'type': 'Linear scale', 'heading': 'Linear scale', 'options': ['1', '2', '3', '4', '5'], 'rows': None, 'columns': None}
{'block': '31', 'type': 'Rating', 'heading': 'Rating', 'options': ['1', '2', '3', '4', '5'], 'rows': None, 'columns': None}
{'

In [None]:
for i in range(len(result)):
    print(result[i])
    list_question.questions[i].type = result[i]['type']
    list_question.questions[i].heading = result[i]['heading']
    if result[i]['type'] in ["Multiple choice", "Checkboxes", "Drop-down", "Linear scale", "Rating"]:
        for option in result[i]['options']:
            list_question.questions[i].options.append(Option(option))

{'block': '24', 'type': 'Title', 'heading': 'Title'}
{'block': '25', 'type': 'Short answer', 'heading': 'Short answer'}
{'block': '26', 'type': 'Paragraph', 'heading': 'Paragraph'}
{'block': '27', 'type': 'Multiple choice', 'heading': 'Multiple choice', 'options': ['Option 1', 'Option 2']}
{'block': '28', 'type': 'Checkboxes', 'heading': 'Checkboxes', 'options': ['Option 1', 'Option 2']}
{'block': '29', 'type': 'Drop-down', 'heading': 'Drop-down', 'options': ['Option 1', 'Option 2', 'Option 3']}
{'block': '30', 'type': 'Linear scale', 'heading': 'Linear scale', 'scale': ['1', '2', '3', '4', '5']}
{'block': '31', 'type': 'Rating', 'heading': 'Rating', 'scale': ['1', '2', '3', '4', '5']}
{'block': '32', 'type': 'Multiple-choice grid', 'heading': 'Multiple-choice grid', 'rows': ['Row 1', 'Row 2', 'Row 3'], 'columns': ['Column 1', 'Column 2', 'Column 3', 'Column 4', 'Column 5']}
{'block': '33', 'type': 'Tick box grid', 'heading': 'Tick box grid', 'rows': ['Row 1', 'Row 2', 'Row 3'], 'colum

In [55]:
print(list.child[7].element.get_attribute("outerHTML"))

<div class="Qr7Oae" role="listitem"><div jsmodel="CP1oW" data-params="%.@.[107966770,&quot;What do you think about tableware products made from areca spathe? &quot;,null,7,[[1491468268,[[&quot;Strongly disagree&quot;],[&quot;Disagree&quot;],[&quot;Neutral&quot;],[&quot;Agree&quot;],[&quot;Strongly agree&quot;]],false,[&quot;Do you find the product attractive&quot;],null,null,null,null,null,null,[],[false]],[1644581275,[[&quot;Strongly disagree&quot;],[&quot;Disagree&quot;],[&quot;Neutral&quot;],[&quot;Agree&quot;],[&quot;Strongly agree&quot;]],false,[&quot;The product can replace conventional disks and plastic products.&quot;],null,null,null,null,null,null,[],[false]],[1840782282,[[&quot;Strongly disagree&quot;],[&quot;Disagree&quot;],[&quot;Neutral&quot;],[&quot;Agree&quot;],[&quot;Strongly agree&quot;]],false,[&quot;You feel that areca bowls and plates are suitable for the green living trend and environmental protection.&quot;],null,null,null,null,null,null,[],[false]]],null,null,nul

In [30]:
print(str(len(list.child)))

12


In [112]:
div_list = list.find_elements(By.XPATH, "./div[@role='listitem']")

In [113]:
html = div_list[9].get_attribute("outerHTML")

In [114]:
print(html)

<div class="Qr7Oae" role="listitem"><div jsmodel="CP1oW" data-params="%.@.[978983220,&quot;Tick box grid&quot;,null,7,[[1388253273,[[&quot;Column 1&quot;],[&quot;Column 2&quot;],[&quot;Column 3&quot;],[&quot;Column 4&quot;],[&quot;Column 5&quot;]],false,[&quot;Row 1&quot;],null,null,null,null,null,null,[],[true]],[1389996590,[[&quot;Column 1&quot;],[&quot;Column 2&quot;],[&quot;Column 3&quot;],[&quot;Column 4&quot;],[&quot;Column 5&quot;]],false,[&quot;Row 2&quot;],null,null,null,null,null,null,[],[true]],[2059325292,[[&quot;Column 1&quot;],[&quot;Column 2&quot;],[&quot;Column 3&quot;],[&quot;Column 4&quot;],[&quot;Column 5&quot;]],false,[&quot;Row 3&quot;],null,null,null,null,null,null,[],[true]]],null,null,null,null,null,null,[null,&quot;Tick box grid&quot;]],&quot;i60&quot;,&quot;i61&quot;,&quot;i62&quot;,false,&quot;i63&quot;]"><div jscontroller="sWGJ4b" jsaction="EEvAHc:yfX9oc;" jsname="WsjYwc" class="geS5n"><div class="z12JJ"><div class="M4DNQ"><div id="i60" class="HoXoMd D1wxyf 

In [196]:
button_next = driver.find_element(
    By.XPATH,
    "//div[@role='button'][contains(., 'Next') or contains(., 'Tiếp')]"
)

In [None]:
button_next.click()