In [2]:
def extract_answers_sequence(string_file_path):

    answers = []   # creates an empty list to store the answer values 
    current_answer = 0 
    i = 0 

    with open(string_file_path, 'r', encoding = 'utf-8') as file:
        survey = file.readlines()      # opens the file to read only 


    while i < len(survey):
        text = survey[i].strip()

        if text.startswith("Question"):
            current_answer = 0   # if a line starts with "Question", current answer remains 0 
            question_block = survey[i+1:i+5]

            k = 0

            for answer_line in question_block:
                answer_line = answer_line.strip()
                if '[x]' in answer_line:
                    current_answer = 1 + k 
                    break
                k +=1
            answers.append(current_answer)
            i += 5

        else:
            i += 1

    return answers


#string_file_path = "data/answers_respondent_2.txt"
#list_answers = extract_answers_sequence(string_file_path)
#print(list_answers)

#list_answers = extract_answers_sequence("data/raw_answers/answers_respondent_1.txt")
def write_answers_sequence(list_answers, int_n):
    new_text_file = f"data/answers_list_respondent_{int_n}.txt"
    
    with open(new_text_file, 'w') as file:
        file.writelines(f"{answer}\n" for answer in list_answers)    # sets new name to the text file containing answers list
    
    print(f"Answers saved to text file!")



In [4]:
import os
import subprocess

import sys

# dowmload gdown
subprocess.check_call([sys.executable, "-m", "pip", "install", "gdown"])
def download_answer_files(cloud_url, path_to_data_folder, respondent_index):

    os.makedirs(path_to_data_folder, exist_ok=True)

    

    # gdown --folder URL -O path
    command = [
        "gdown",
        "--folder",
        cloud_url,
        "-O",
        path_to_data_folder
    ]

    try:
        subprocess.run(command, check=True)
        print(" Download complete.")
    except Exception as e:
        print(f" Download failed: {e}")
        return

    # a1.txt rename to answers_respondent_1.txt 
    for i in range(1, respondent_index + 1):
        original = os.path.join(path_to_data_folder, f"a{i}.txt")
        renamed = os.path.join(path_to_data_folder, f"answers_respondent_{i}.txt")
        if os.path.exists(original):
            os.rename(original, renamed)
            print(f" Renamed {original} → {renamed}")
        else:
            print(f" File missing: {original}")


def collate_answer_files(data_folder_path):
    
    os.makedirs("output", exist_ok=True)
    output_path = os.path.join("output", "collated_answers.txt")

    with open(output_path, 'w', encoding='utf-8') as outfile:
        for filename in sorted(os.listdir(data_folder_path)):
            if filename.startswith("answers_list_respondent_") and filename.endswith(".txt"):
                file_path = os.path.join(data_folder_path, filename)
                with open(file_path, 'r', encoding='utf-8') as infile:
                    outfile.write(infile.read())
                    outfile.write("*\n")

    print(f"Collation complete: {output_path}")
    print(" Collate function has run.")






[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m


M2's code consists of two functions, which aim to download and rename raw data files as well as collecting all respondents' answers into a single file. 

M2's code is well structured and easy to read. They have added comments throughout the code, explaining what each block does. This makes it easier to follow. 

The code follows the assignment's function guidelines.

Running time is 0.2 seconds. 

In [5]:
import matplotlib.pyplot as plt

def load_sequences(collated_answers_path):
    sequences = []
    
    # open, read and close file. named file as "f". 
    with open(collated_answers_path, 'r',encoding='utf-8') as f:
        # read text, clean text(\n) in opening/ closing and sperate answer from 4 answerers(by using *).
        blocks = f.read().strip().split("*\n")  
        for block in blocks:
            # turn text to string
            lines = block.strip().splitlines()
            # turn string to integers
            sequence = [int(x.strip()) for x in lines if x.strip().isdigit()]
            if len(sequence)==100:
        
                sequences.append(sequence)
            else:
                print("incomplete sequence")
    return sequences
    
def generate_means_sequence(collated_answers_path):
# if seq[i]!= 0, add seq[i] in values.
    sequences = load_sequences(collated_answers_path)
    means=[]
    for i in range(100):
        values = [seq[i] for seq in sequences if seq[i] != 0]
        mean = sum(values) / len(values) if values else 0
        means.append(mean)
    return means

def visualize_data(collated_answers_path, n):
    sequences = load_sequences(collated_answers_path)
    # draw the average value of all the answer (exclude 0)
    if n == 1:
        means = generate_means_sequence(collated_answers_path)
        plt.scatter(range(1, 101), means)
        plt.title("Mean Answer Value per Question")
        plt.xlabel("Question Number")
        plt.ylabel("Mean Answer (1–4)")
    # draw every one(4 mumber) answer line.
    elif n == 2:
        for seq in sequences:
            plt.plot(range(1, 101), seq)
        plt.title("All Respondents’ Answer Sequences")
        plt.xlabel("Question Number")
        plt.ylabel("Answer (1–4 or 0)")
    else:
        print("Error: Invalid plot option. n must be 1 or 2.")
        return
    plt.grid(True)
    plt.show()


M3 was in charge of computing statistics and providing visual insights into potential patterns.

The code is well structured with comments throughout, making it more  readable. 

Code's running time is 0.2 seconds, so runs efficiently and fast. 

Makes use of appropriate libraries and functions to produce visualisations - well presented with appropriate titles and labels. 

The code contains three functions instead of two. 

In [7]:
#import os
#from data_preparation_M2 import download_answer_files, collate_answer_files
#from data_extraction_M1 import extract_answers_sequence, write_answers_sequence
#from data_analysis_M3 import visualize_data

def run_full_analysis():

    cloud_url = "https://drive.google.com/drive/folders/1wq4I1RFFIZ7fz0tQ9ojcBSOHFe1AX95y?usp=sharing"
    data_folder = "data/raw_answers"
    structured_folder = "data"
    collated_file_path = "output/collated_answers.txt"
    respondent_index = 25
    plot_mode = 1,2  

    print("="*50)
    download_answer_files(cloud_url, data_folder, respondent_index)

    for i in range(1, respondent_index + 1):
        input_file = os.path.join(data_folder, f"answers_respondent_{i}.txt")
        answers = extract_answers_sequence(input_file)
        write_answers_sequence(answers, i)

    collate_answer_files(structured_folder)


    visualize_data(collated_file_path, plot_mode)



if __name__ == "__main__":
    run_full_analysis()



Retrieving folder contents


Processing file 183Nm-rrLUoHrkwp25Vp6PtUg1H-Lt4C8 a1.txt
Processing file 1CbYZiNKSpEFNpoyQIDUClwTIXgVWY-yi a2.txt
Processing file 16KiOjB-NuVR-DWVbvWOeFFRL_sP7eYt- a3.txt
Processing file 1bKZZfJE_kP8kui6o5UJAqbHf3dU1M9GV a4.txt
Processing file 1Yiec0DXqKtPN-n92xGD32ZqmkVOxGfy6 a5.txt
Processing file 1V90EEBfdJleGF8ScklHwYg_HrKpFOAwt a6.txt
Processing file 1ALrgcMFa_AzqvpFsHQqH1i3UuEE9sEfP a7.txt
Processing file 1UjSG5_n94xUMEFh3nfRci6JsfAPwKd7a a8.txt
Processing file 1E0YEYO-VfSeFsdU2jhzZpnnNUpawGRGl a9.txt
Processing file 1gbCw18IZfH8i8sAZOhw0S2RcjUDWTbS0 a10.txt
Processing file 139noLFinj_YuAha-Xcikg5AKRV8P0mMs a11.txt
Processing file 1BXlTTjGP8EHXIf0l1zcP_Pk2CGbkQrsR a12.txt
Processing file 17r4oAYoW52HV0IHa2RPF_F5IgBxsvGy_ a13.txt
Processing file 1NjI6bOe16tAuLZinULuVOtRHJqiP_rtT a14.txt
Processing file 1MK5tzyCMFNGcianLpZYYRcvE6s5I1hEU a15.txt
Processing file 1rxTi7vFZLKUHMbgdwtjC1_rsK2ovAcn7 a16.txt
Processing file 1K9kMmAGjbGZTAG1rpwB7V-9Rgou6omo9 a17.txt
Processing file 19gC_aT

Retrieving folder contents completed
Building directory structure
Building directory structure completed
Downloading...
From: https://drive.google.com/uc?id=183Nm-rrLUoHrkwp25Vp6PtUg1H-Lt4C8
To: /Users/tanup/Documents/Analysis_Of_Python_Quiz_Responses_Project/reviews/data/raw_answers/a1.txt
100%|██████████| 21.3k/21.3k [00:00<00:00, 35.1MB/s]
Downloading...
From: https://drive.google.com/uc?id=1CbYZiNKSpEFNpoyQIDUClwTIXgVWY-yi
To: /Users/tanup/Documents/Analysis_Of_Python_Quiz_Responses_Project/reviews/data/raw_answers/a2.txt
100%|██████████| 21.3k/21.3k [00:00<00:00, 920kB/s]
Downloading...
From: https://drive.google.com/uc?id=16KiOjB-NuVR-DWVbvWOeFFRL_sP7eYt-
To: /Users/tanup/Documents/Analysis_Of_Python_Quiz_Responses_Project/reviews/data/raw_answers/a3.txt
100%|██████████| 21.3k/21.3k [00:00<00:00, 1.14MB/s]
Downloading...
From: https://drive.google.com/uc?id=1bKZZfJE_kP8kui6o5UJAqbHf3dU1M9GV
To: /Users/tanup/Documents/Analysis_Of_Python_Quiz_Responses_Project/reviews/data/raw_answ

 Download complete.
 File missing: data/raw_answers/a1.txt
 Renamed data/raw_answers/a2.txt → data/raw_answers/answers_respondent_2.txt
 Renamed data/raw_answers/a3.txt → data/raw_answers/answers_respondent_3.txt
 File missing: data/raw_answers/a4.txt
 Renamed data/raw_answers/a5.txt → data/raw_answers/answers_respondent_5.txt
 Renamed data/raw_answers/a6.txt → data/raw_answers/answers_respondent_6.txt
 Renamed data/raw_answers/a7.txt → data/raw_answers/answers_respondent_7.txt
 Renamed data/raw_answers/a8.txt → data/raw_answers/answers_respondent_8.txt
 Renamed data/raw_answers/a9.txt → data/raw_answers/answers_respondent_9.txt
 Renamed data/raw_answers/a10.txt → data/raw_answers/answers_respondent_10.txt
 Renamed data/raw_answers/a11.txt → data/raw_answers/answers_respondent_11.txt
 Renamed data/raw_answers/a12.txt → data/raw_answers/answers_respondent_12.txt
 Renamed data/raw_answers/a13.txt → data/raw_answers/answers_respondent_13.txt
 Renamed data/raw_answers/a14.txt → data/raw_an

Downloading...
From: https://drive.google.com/uc?id=1vZ0eqWaoiWRTGNqHWs8d25mRsnVO99AZ
To: /Users/tanup/Documents/Analysis_Of_Python_Quiz_Responses_Project/reviews/data/raw_answers/a25.txt
100%|██████████| 21.3k/21.3k [00:00<00:00, 1.06MB/s]
Download completed


FileNotFoundError: [Errno 2] No such file or directory: 'data/raw_answers/answers_respondent_1.txt'