In [1]:
import pandas as pd
import os
from pathlib import Path

# Get the latest CSV file from output directory
output_dir = Path("output")
csv_files = list(output_dir.glob("literature_coding_results_*.csv"))
latest_csv = max(csv_files, key=os.path.getctime)

print(f"Reading latest output file: {latest_csv.name}")

# Read the CSV as DataFrame
df = pd.read_csv(latest_csv)

print(f"DataFrame shape: {df.shape}")
print(f"Columns: {list(df.columns)}")

# Display basic info about the DataFrame
df.head()

Reading latest output file: literature_coding_results_20251126_110419.csv
DataFrame shape: (107, 27)
Columns: ['Title', 'Include in Review (Y/N)', 'Exclusion Reason', '1.1 Primary Stakeholders', '1.1 Primary Stakeholders - Source', '1.2 Context', '1.2 Context - Source', '1.3 Tech/AI type', '1.3 Tech/AI type - Source', '1.4 Tool/Platform', '1.4 Tool/Platform - Source', '1.5 Education level', '1.5 Education level - Source', '2.1 Feedback term', '2.1 Feedback term - Source', '2.2 Description of context', '2.2 Description of context - Source', '2.3 Our evaluation', '2.3 Our evaluation - Source', '3.1 Agency type', '3.1 Agency type - Source', '3.2 Feedback timing control', '3.2 Feedback timing control - Source', '4.1 Metrics for evaluation', '4.1 Metrics for evaluation - Source', '4.2 Measurement of agency', '4.2 Measurement of agency - Source']


Unnamed: 0,Title,Include in Review (Y/N),Exclusion Reason,1.1 Primary Stakeholders,1.1 Primary Stakeholders - Source,1.2 Context,1.2 Context - Source,1.3 Tech/AI type,1.3 Tech/AI type - Source,1.4 Tool/Platform,...,2.3 Our evaluation,2.3 Our evaluation - Source,3.1 Agency type,3.1 Agency type - Source,3.2 Feedback timing control,3.2 Feedback timing control - Source,4.1 Metrics for evaluation,4.1 Metrics for evaluation - Source,4.2 Measurement of agency,4.2 Measurement of agency - Source
0,Datta Et Al. 2023 Classifying Mathematics Teac...,Y,Not applicable,Teachers and pre-service teachers.,"""A key barrier to providing these opportunitie...",Classroom discourse and teacher preparation co...,"""The questions were drawn from anonymized tran...",Natural Language Processing (NLP) using transf...,"""With the advent of transfer learning, the sta...",Not specified,...,The feedback provided is aimed at improving qu...,"""By describing the accuracy of our classifiers...",Not specified,Not specified,Not specified.,"""Not specified"" (No relevant quote found in th...",Accuracy of classification models.,"""We used accuracy as the evaluation metric as ...",Not specified.,"""Not specified"" (No relevant quote found in th..."
1,Shin Et Al. 2022 Analyzing Students' Performan...,Y,Not applicable,Students and teachers.,"""the proposed system would greatly help teache...",Classroom setting with computerized formative ...,"""computerized formative assessments in the cla...","Deep learning frameworks, specifically Long Sh...","""we introduced a deep learning framework to pr...",Star Math assessment.,...,"The feedback provided is predictive in nature,...","""the main purpose of the study was to introduc...",Teacher agency in decision-making regarding te...,"""teachers often act as a sole authority to dec...",Not specified.,"""the task of identifying an optimum number of ...","Accuracy, precision, recall, and F1-score.","""we used commonly employed classification accu...",Not specified.,"""the current study’s focus on providing method..."
2,Confrey Et Al. 2018 the Concept of an Agile Cu...,Y,Not applicable,The primary stakeholders in this study are mid...,"""We report on a study of the teachers’ practic...",The context of the study is middle school clas...,"""Field-testing M-M has been conducted at three...",The technology used is a digital learning syst...,"""Our DLS, called Math-Mapper 6–8, is introduce...",The specific tool mentioned is Math-Mapper 6–8.,...,The feedback provided is actionable and design...,"""An agile curriculum should provide compelling...",There is evidence of student agency as student...,"""Students are recruited as partners in interpr...",Students have some control over the timing of ...,"""Autonomous access to their own assessment dat...",Metrics for evaluation include student perform...,"""Class reports provide teachers with 'heatmap'...",Agency is measured through students' ability t...,"""Students’ involvement also increased as teach..."
3,Portillo and Alvarado 2025 Plenary the Impact ...,Y,Not applicable (the paper meets the inclusion ...,Students and teachers.,"""This study examines AI usage among engineerin...",Classroom and academic settings.,"""AI integration into academic settings present...","AI tools, specifically ChatGPT.","""While AI tools like ChatGPT can support indep...",ChatGPT.,...,The feedback provided by AI tools is immediate...,"""Perceptions of AI were largely positive, with...",Limited student agency due to reliance on AI.,"""15% admitted to fully solve problems with AI,...",Students have limited control over feedback ti...,"""While most students used AI occasionally (54....",Student perceptions and reliance on AI tools.,"""Perceptions of AI were largely positive, with...",Agency is measured through student reliance on...,"""Among non-users, concerns centered on diminis..."
4,Barno Et Al. 2024 Scaling Generated Feedback f...,Y,"Not applicable, as the paper is relevant to ma...",Novice mathematics teachers and teacher educat...,"""To best support the learning of novice mathem...",Digital clinical simulations for novice teache...,"""The Teacher Moments platform is a platform fo...",Generative AI (specifically large language mod...,"""By integrating LLMs into these platforms, tea...",Teacher Moments platform.,...,The feedback generated is tailored and nuanced...,"""We believe this prompt structure can help sca...",Teacher educators have agency in editing and e...,"""Each teacher educator then endorsed the gener...",Not specified.,"""Not specified.""",The quality of generated feedback is evaluated...,"""To ensure the fine-tuning is producing feedba...",Agency is measured through the ability of teac...,"""By allowing teacher educators to tailor the s..."


In [2]:
# Extract author and year from title and create new columns
import re

def extract_author_year(title):
    """Extract author and year from title format: 'Author (Year) Title'"""
    if pd.isna(title):
        return None, None, title
    
    # Pattern to match "Author (Year) Title" format
    pattern = r'^([^(]+)\s*\((\d{4})\)\s*(.+)$'
    match = re.match(pattern, title.strip())
    
    if match:
        author = match.group(1).strip()
        year = match.group(2).strip()
        clean_title = match.group(3).strip()
        return author, year, clean_title
    else:
        # If pattern doesn't match, return original title
        return None, None, title

# Apply the extraction
df[['Author', 'Year', 'Clean_Title']] = df['Title'].apply(
    lambda x: pd.Series(extract_author_year(x))
)

# Reorder columns to put Author and Year after Title
cols = df.columns.tolist()
title_idx = cols.index('Title')

# Insert Author and Year after Title
new_cols = (cols[:title_idx+1] + 
           ['Author', 'Year', 'Clean_Title'] + 
           cols[title_idx+1:-3])  # -3 to exclude the newly added columns at the end

df = df[new_cols]

# Show sample of the reorganized data
print("Sample of titles with extracted author/year:")
df[['Title', 'Author', 'Year', 'Clean_Title']].head(10)

Sample of titles with extracted author/year:


Unnamed: 0,Title,Author,Year,Clean_Title
0,Datta Et Al. 2023 Classifying Mathematics Teac...,,,Datta Et Al. 2023 Classifying Mathematics Teac...
1,Shin Et Al. 2022 Analyzing Students' Performan...,,,Shin Et Al. 2022 Analyzing Students' Performan...
2,Confrey Et Al. 2018 the Concept of an Agile Cu...,,,Confrey Et Al. 2018 the Concept of an Agile Cu...
3,Portillo and Alvarado 2025 Plenary the Impact ...,,,Portillo and Alvarado 2025 Plenary the Impact ...
4,Barno Et Al. 2024 Scaling Generated Feedback f...,,,Barno Et Al. 2024 Scaling Generated Feedback f...
5,Olsen Et Al. 2019 It Is Not Either or an Initi...,,,Olsen Et Al. 2019 It Is Not Either or an Initi...
6,Colliot Et Al. 2024 What Makes Tablet Based Le...,,,Colliot Et Al. 2024 What Makes Tablet Based Le...
7,Bastian Et Al. 2025 Using Ai Chatbots to Facil...,,,Bastian Et Al. 2025 Using Ai Chatbots to Facil...
8,Bush 2021 Software Based Intervention with Dig...,,,Bush 2021 Software Based Intervention with Dig...
9,Copur Gencturk Et Al. 2024 the Impact of an In...,,,Copur Gencturk Et Al. 2024 the Impact of an In...


In [3]:
# Display the complete DataFrame
print(f"Complete DataFrame ({df.shape[0]} rows, {df.shape[1]} columns):")
df

Complete DataFrame (107 rows, 30 columns):


Unnamed: 0,Title,Author,Year,Clean_Title,Include in Review (Y/N),Exclusion Reason,1.1 Primary Stakeholders,1.1 Primary Stakeholders - Source,1.2 Context,1.2 Context - Source,...,2.3 Our evaluation,2.3 Our evaluation - Source,3.1 Agency type,3.1 Agency type - Source,3.2 Feedback timing control,3.2 Feedback timing control - Source,4.1 Metrics for evaluation,4.1 Metrics for evaluation - Source,4.2 Measurement of agency,4.2 Measurement of agency - Source
0,Datta Et Al. 2023 Classifying Mathematics Teac...,,,Datta Et Al. 2023 Classifying Mathematics Teac...,Y,Not applicable,Teachers and pre-service teachers.,"""A key barrier to providing these opportunitie...",Classroom discourse and teacher preparation co...,"""The questions were drawn from anonymized tran...",...,The feedback provided is aimed at improving qu...,"""By describing the accuracy of our classifiers...",Not specified,Not specified,Not specified.,"""Not specified"" (No relevant quote found in th...",Accuracy of classification models.,"""We used accuracy as the evaluation metric as ...",Not specified.,"""Not specified"" (No relevant quote found in th..."
1,Shin Et Al. 2022 Analyzing Students' Performan...,,,Shin Et Al. 2022 Analyzing Students' Performan...,Y,Not applicable,Students and teachers.,"""the proposed system would greatly help teache...",Classroom setting with computerized formative ...,"""computerized formative assessments in the cla...",...,"The feedback provided is predictive in nature,...","""the main purpose of the study was to introduc...",Teacher agency in decision-making regarding te...,"""teachers often act as a sole authority to dec...",Not specified.,"""the task of identifying an optimum number of ...","Accuracy, precision, recall, and F1-score.","""we used commonly employed classification accu...",Not specified.,"""the current study’s focus on providing method..."
2,Confrey Et Al. 2018 the Concept of an Agile Cu...,,,Confrey Et Al. 2018 the Concept of an Agile Cu...,Y,Not applicable,The primary stakeholders in this study are mid...,"""We report on a study of the teachers’ practic...",The context of the study is middle school clas...,"""Field-testing M-M has been conducted at three...",...,The feedback provided is actionable and design...,"""An agile curriculum should provide compelling...",There is evidence of student agency as student...,"""Students are recruited as partners in interpr...",Students have some control over the timing of ...,"""Autonomous access to their own assessment dat...",Metrics for evaluation include student perform...,"""Class reports provide teachers with 'heatmap'...",Agency is measured through students' ability t...,"""Students’ involvement also increased as teach..."
3,Portillo and Alvarado 2025 Plenary the Impact ...,,,Portillo and Alvarado 2025 Plenary the Impact ...,Y,Not applicable (the paper meets the inclusion ...,Students and teachers.,"""This study examines AI usage among engineerin...",Classroom and academic settings.,"""AI integration into academic settings present...",...,The feedback provided by AI tools is immediate...,"""Perceptions of AI were largely positive, with...",Limited student agency due to reliance on AI.,"""15% admitted to fully solve problems with AI,...",Students have limited control over feedback ti...,"""While most students used AI occasionally (54....",Student perceptions and reliance on AI tools.,"""Perceptions of AI were largely positive, with...",Agency is measured through student reliance on...,"""Among non-users, concerns centered on diminis..."
4,Barno Et Al. 2024 Scaling Generated Feedback f...,,,Barno Et Al. 2024 Scaling Generated Feedback f...,Y,"Not applicable, as the paper is relevant to ma...",Novice mathematics teachers and teacher educat...,"""To best support the learning of novice mathem...",Digital clinical simulations for novice teache...,"""The Teacher Moments platform is a platform fo...",...,The feedback generated is tailored and nuanced...,"""We believe this prompt structure can help sca...",Teacher educators have agency in editing and e...,"""Each teacher educator then endorsed the gener...",Not specified.,"""Not specified.""",The quality of generated feedback is evaluated...,"""To ensure the fine-tuning is producing feedba...",Agency is measured through the ability of teac...,"""By allowing teacher educators to tailor the s..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102,Zhang Et Al. 2019 Interaction Patterns in Expl...,,,Zhang Et Al. 2019 Interaction Patterns in Expl...,Y,Not applicable,Students and teachers,"""Students (n = 189) from six different classes...",Classroom setting in Chinese schools,"""This study used Fractions Lab, an exploratory...",...,The feedback provided is designed to guide stu...,"""The findings highlight that the design of thr...",Limited student agency; students can request f...,"""The students appeared to have explored more o...",Students have limited control over feedback ti...,"""After creating a fraction, the students had a...","Pre- and post-test scores, interaction data, a...","""A one-way ANOVA was used to analyse the diffe...",Agency is measured through the frequency of fe...,"""The amount of learning activities that were s..."
103,Bulut Et Al. 2020 an Intelligent Recommender S...,,,Bulut Et Al. 2020 an Intelligent Recommender S...,Y,Not applicable,Students and teachers.,"""Classroom assessments allow K–12 teachers to ...","K-12 education setting, specifically focusing ...","""The introduction of computerized formative as...",...,The feedback provided through the IRS is adapt...,"""the IRS could reduce the number of tests admi...",Teacher agency in decision-making regarding as...,"""teachers should be the ultimate decision-make...",Teachers control the timing of feedback throug...,"""the system would simply use this additional i...","Average number of test administrations, averag...","""The performance of the IRS was evaluated base...",Agency is measured through the ability of the ...,"""the decision to refer a student for additiona..."
104,Lin Et Al. 2024 the Effects of Students' Self ...,,,Lin Et Al. 2024 the Effects of Students' Self ...,Y,Not applicable,Students,"""The research results indicated that informati...",Digital game-based learning system (GBLS),"""The rapid uptake of game-based learning syste...",...,The feedback provided is formative and aims to...,"""Information feedback can assist students in t...",Self-regulated learning agency,"""SRL strategies enable students to take more r...",Not specified,"""Not specified"" (Not found in the paper)",Perceived learning effectiveness and actual le...,"""We use perceived and actual learning effectiv...",Not specified,"""Not specified"" (Not found in the paper)"
105,Fang Et Al. 2024 Evaluating the Design Feature...,,,Fang Et Al. 2024 Evaluating the Design Feature...,Y,Not applicable,College students,"""This study investigates two distinctive featu...",Classroom setting for advanced mathematics lea...,"""Xiaomai is designed to help Chinese college s...",...,The feedback provided is immediate and require...,"""Our findings indicated that reflecting on the...",Limited student agency in feedback timing,"""There is no mechanism in the system to ensure...",Students do not control feedback timing,"""Response time for each question is recorded i...","Number of correct answers, practice performanc...","""The outcome of pretest and posttest were meas...",Not specified,"""Not specified"""


In [4]:
# Let's examine the actual title format first
print("Sample of original titles to understand the format:")
for i in range(5):
    print(f"Title {i+1}: '{df['Title'].iloc[i]}'")
    
print("\nLet's check if any author/year extractions worked:")
print(f"Non-null Authors: {df['Author'].notna().sum()}")
print(f"Non-null Years: {df['Year'].notna().sum()}")

Sample of original titles to understand the format:
Title 1: 'Datta Et Al. 2023 Classifying Mathematics Teacher Questions to Support Mathematical Discourse'
Title 2: 'Shin Et Al. 2022 Analyzing Students' Performance in Computerized Formative Assessments to Optimize Teachers' Test Adm'
Title 3: 'Confrey Et Al. 2018 the Concept of an Agile Curriculum as Applied to a Middle School Mathematics Digital Learning System'
Title 4: 'Portillo and Alvarado 2025 Plenary the Impact of Ai Tools on Student Learning in Integral Calculus a Case Study of Latin Amer'
Title 5: 'Barno Et Al. 2024 Scaling Generated Feedback for Novice Teachers by Sustaining Teacher Educators' Expertise a Design T'

Let's check if any author/year extractions worked:
Non-null Authors: 0
Non-null Years: 0


In [5]:
# Fix the extraction function for the actual format: "Author Et Al. Year Title"
def extract_author_year_fixed(title):
    """Extract author and year from title format: 'Author Et Al. Year Title'"""
    if pd.isna(title):
        return None, None, title
    
    # Pattern to match "Author Et Al. Year Title" format
    # Look for year (4 digits) after author name, then capture the rest as title
    pattern = r'^(.+?(?:\s+Et\s+Al\.)?)\s+(\d{4})\s+(.+)$'
    match = re.match(pattern, title.strip(), re.IGNORECASE)
    
    if match:
        author = match.group(1).strip()
        year = match.group(2).strip()
        clean_title = match.group(3).strip()
        return author, year, clean_title
    else:
        # If pattern doesn't match, return original title
        return None, None, title

# Re-apply the corrected extraction
df[['Author', 'Year', 'Clean_Title']] = df['Title'].apply(
    lambda x: pd.Series(extract_author_year_fixed(x))
)

print("Fixed extraction results:")
print(f"Non-null Authors: {df['Author'].notna().sum()}")
print(f"Non-null Years: {df['Year'].notna().sum()}")

print("\nSample of corrected extraction:")
df[['Title', 'Author', 'Year', 'Clean_Title']].head(10)

Fixed extraction results:
Non-null Authors: 107
Non-null Years: 107

Sample of corrected extraction:


Unnamed: 0,Title,Author,Year,Clean_Title
0,Datta Et Al. 2023 Classifying Mathematics Teac...,Datta Et Al.,2023,Classifying Mathematics Teacher Questions to S...
1,Shin Et Al. 2022 Analyzing Students' Performan...,Shin Et Al.,2022,Analyzing Students' Performance in Computerize...
2,Confrey Et Al. 2018 the Concept of an Agile Cu...,Confrey Et Al.,2018,the Concept of an Agile Curriculum as Applied ...
3,Portillo and Alvarado 2025 Plenary the Impact ...,Portillo and Alvarado,2025,Plenary the Impact of Ai Tools on Student Lear...
4,Barno Et Al. 2024 Scaling Generated Feedback f...,Barno Et Al.,2024,Scaling Generated Feedback for Novice Teachers...
5,Olsen Et Al. 2019 It Is Not Either or an Initi...,Olsen Et Al.,2019,It Is Not Either or an Initial Investigation I...
6,Colliot Et Al. 2024 What Makes Tablet Based Le...,Colliot Et Al.,2024,What Makes Tablet Based Learning Effective a S...
7,Bastian Et Al. 2025 Using Ai Chatbots to Facil...,Bastian Et Al.,2025,Using Ai Chatbots to Facilitate Mathematics Pr...
8,Bush 2021 Software Based Intervention with Dig...,Bush,2021,Software Based Intervention with Digital Manip...
9,Copur Gencturk Et Al. 2024 the Impact of an In...,Copur Gencturk Et Al.,2024,"the Impact of an Interactive, Personalized Com..."


In [6]:
# Display the final DataFrame with extracted author and year
print(f"Final DataFrame ({df.shape[0]} rows, {df.shape[1]} columns):")
df

Final DataFrame (107 rows, 30 columns):


Unnamed: 0,Title,Author,Year,Clean_Title,Include in Review (Y/N),Exclusion Reason,1.1 Primary Stakeholders,1.1 Primary Stakeholders - Source,1.2 Context,1.2 Context - Source,...,2.3 Our evaluation,2.3 Our evaluation - Source,3.1 Agency type,3.1 Agency type - Source,3.2 Feedback timing control,3.2 Feedback timing control - Source,4.1 Metrics for evaluation,4.1 Metrics for evaluation - Source,4.2 Measurement of agency,4.2 Measurement of agency - Source
0,Datta Et Al. 2023 Classifying Mathematics Teac...,Datta Et Al.,2023,Classifying Mathematics Teacher Questions to S...,Y,Not applicable,Teachers and pre-service teachers.,"""A key barrier to providing these opportunitie...",Classroom discourse and teacher preparation co...,"""The questions were drawn from anonymized tran...",...,The feedback provided is aimed at improving qu...,"""By describing the accuracy of our classifiers...",Not specified,Not specified,Not specified.,"""Not specified"" (No relevant quote found in th...",Accuracy of classification models.,"""We used accuracy as the evaluation metric as ...",Not specified.,"""Not specified"" (No relevant quote found in th..."
1,Shin Et Al. 2022 Analyzing Students' Performan...,Shin Et Al.,2022,Analyzing Students' Performance in Computerize...,Y,Not applicable,Students and teachers.,"""the proposed system would greatly help teache...",Classroom setting with computerized formative ...,"""computerized formative assessments in the cla...",...,"The feedback provided is predictive in nature,...","""the main purpose of the study was to introduc...",Teacher agency in decision-making regarding te...,"""teachers often act as a sole authority to dec...",Not specified.,"""the task of identifying an optimum number of ...","Accuracy, precision, recall, and F1-score.","""we used commonly employed classification accu...",Not specified.,"""the current study’s focus on providing method..."
2,Confrey Et Al. 2018 the Concept of an Agile Cu...,Confrey Et Al.,2018,the Concept of an Agile Curriculum as Applied ...,Y,Not applicable,The primary stakeholders in this study are mid...,"""We report on a study of the teachers’ practic...",The context of the study is middle school clas...,"""Field-testing M-M has been conducted at three...",...,The feedback provided is actionable and design...,"""An agile curriculum should provide compelling...",There is evidence of student agency as student...,"""Students are recruited as partners in interpr...",Students have some control over the timing of ...,"""Autonomous access to their own assessment dat...",Metrics for evaluation include student perform...,"""Class reports provide teachers with 'heatmap'...",Agency is measured through students' ability t...,"""Students’ involvement also increased as teach..."
3,Portillo and Alvarado 2025 Plenary the Impact ...,Portillo and Alvarado,2025,Plenary the Impact of Ai Tools on Student Lear...,Y,Not applicable (the paper meets the inclusion ...,Students and teachers.,"""This study examines AI usage among engineerin...",Classroom and academic settings.,"""AI integration into academic settings present...",...,The feedback provided by AI tools is immediate...,"""Perceptions of AI were largely positive, with...",Limited student agency due to reliance on AI.,"""15% admitted to fully solve problems with AI,...",Students have limited control over feedback ti...,"""While most students used AI occasionally (54....",Student perceptions and reliance on AI tools.,"""Perceptions of AI were largely positive, with...",Agency is measured through student reliance on...,"""Among non-users, concerns centered on diminis..."
4,Barno Et Al. 2024 Scaling Generated Feedback f...,Barno Et Al.,2024,Scaling Generated Feedback for Novice Teachers...,Y,"Not applicable, as the paper is relevant to ma...",Novice mathematics teachers and teacher educat...,"""To best support the learning of novice mathem...",Digital clinical simulations for novice teache...,"""The Teacher Moments platform is a platform fo...",...,The feedback generated is tailored and nuanced...,"""We believe this prompt structure can help sca...",Teacher educators have agency in editing and e...,"""Each teacher educator then endorsed the gener...",Not specified.,"""Not specified.""",The quality of generated feedback is evaluated...,"""To ensure the fine-tuning is producing feedba...",Agency is measured through the ability of teac...,"""By allowing teacher educators to tailor the s..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102,Zhang Et Al. 2019 Interaction Patterns in Expl...,Zhang Et Al.,2019,Interaction Patterns in Exploratory Learning E...,Y,Not applicable,Students and teachers,"""Students (n = 189) from six different classes...",Classroom setting in Chinese schools,"""This study used Fractions Lab, an exploratory...",...,The feedback provided is designed to guide stu...,"""The findings highlight that the design of thr...",Limited student agency; students can request f...,"""The students appeared to have explored more o...",Students have limited control over feedback ti...,"""After creating a fraction, the students had a...","Pre- and post-test scores, interaction data, a...","""A one-way ANOVA was used to analyse the diffe...",Agency is measured through the frequency of fe...,"""The amount of learning activities that were s..."
103,Bulut Et Al. 2020 an Intelligent Recommender S...,Bulut Et Al.,2020,an Intelligent Recommender System for Personal...,Y,Not applicable,Students and teachers.,"""Classroom assessments allow K–12 teachers to ...","K-12 education setting, specifically focusing ...","""The introduction of computerized formative as...",...,The feedback provided through the IRS is adapt...,"""the IRS could reduce the number of tests admi...",Teacher agency in decision-making regarding as...,"""teachers should be the ultimate decision-make...",Teachers control the timing of feedback throug...,"""the system would simply use this additional i...","Average number of test administrations, averag...","""The performance of the IRS was evaluated base...",Agency is measured through the ability of the ...,"""the decision to refer a student for additiona..."
104,Lin Et Al. 2024 the Effects of Students' Self ...,Lin Et Al.,2024,"the Effects of Students' Self Efficacy, Self R...",Y,Not applicable,Students,"""The research results indicated that informati...",Digital game-based learning system (GBLS),"""The rapid uptake of game-based learning syste...",...,The feedback provided is formative and aims to...,"""Information feedback can assist students in t...",Self-regulated learning agency,"""SRL strategies enable students to take more r...",Not specified,"""Not specified"" (Not found in the paper)",Perceived learning effectiveness and actual le...,"""We use perceived and actual learning effectiv...",Not specified,"""Not specified"" (Not found in the paper)"
105,Fang Et Al. 2024 Evaluating the Design Feature...,Fang Et Al.,2024,Evaluating the Design Features of an Intellige...,Y,Not applicable,College students,"""This study investigates two distinctive featu...",Classroom setting for advanced mathematics lea...,"""Xiaomai is designed to help Chinese college s...",...,The feedback provided is immediate and require...,"""Our findings indicated that reflecting on the...",Limited student agency in feedback timing,"""There is no mechanism in the system to ensure...",Students do not control feedback timing,"""Response time for each question is recorded i...","Number of correct answers, practice performanc...","""The outcome of pretest and posttest were meas...",Not specified,"""Not specified"""


In [7]:
# Let's actually see what's in the Author and Year columns
print("Checking actual Author and Year values:")
print("First 10 Authors:")
print(df['Author'].head(10).tolist())
print("\nFirst 10 Years:")
print(df['Year'].head(10).tolist())
print("\nUnique Authors count:", df['Author'].nunique())
print("Unique Years count:", df['Year'].nunique())

# Let's debug the regex with a specific title
test_title = df['Title'].iloc[0]
print(f"\nTesting regex on: '{test_title}'")

pattern = r'^(.+?(?:\s+Et\s+Al\.)?)\s+(\d{4})\s+(.+)$'
match = re.match(pattern, test_title.strip(), re.IGNORECASE)
if match:
    print(f"Match found!")
    print(f"Group 1 (author): '{match.group(1)}'")
    print(f"Group 2 (year): '{match.group(2)}'")
    print(f"Group 3 (title): '{match.group(3)}'")
else:
    print("No match found")

Checking actual Author and Year values:
First 10 Authors:
['Datta Et Al.', 'Shin Et Al.', 'Confrey Et Al.', 'Portillo and Alvarado', 'Barno Et Al.', 'Olsen Et Al.', 'Colliot Et Al.', 'Bastian Et Al.', 'Bush', 'Copur Gencturk Et Al.']

First 10 Years:
['2023', '2022', '2018', '2025', '2024', '2019', '2024', '2025', '2021', '2024']

Unique Authors count: 98
Unique Years count: 9

Testing regex on: 'Datta Et Al. 2023 Classifying Mathematics Teacher Questions to Support Mathematical Discourse'
Match found!
Group 1 (author): 'Datta Et Al.'
Group 2 (year): '2023'
Group 3 (title): 'Classifying Mathematics Teacher Questions to Support Mathematical Discourse'


In [8]:
# Sort DataFrame by Clean_Title in alphabetical order
df_sorted = df.sort_values('Clean_Title').reset_index(drop=True)

print(f"DataFrame sorted by Clean_Title ({df_sorted.shape[0]} rows, {df_sorted.shape[1]} columns):")
df_sorted

DataFrame sorted by Clean_Title (107 rows, 30 columns):


Unnamed: 0,Title,Author,Year,Clean_Title,Include in Review (Y/N),Exclusion Reason,1.1 Primary Stakeholders,1.1 Primary Stakeholders - Source,1.2 Context,1.2 Context - Source,...,2.3 Our evaluation,2.3 Our evaluation - Source,3.1 Agency type,3.1 Agency type - Source,3.2 Feedback timing control,3.2 Feedback timing control - Source,4.1 Metrics for evaluation,4.1 Metrics for evaluation - Source,4.2 Measurement of agency,4.2 Measurement of agency - Source
0,Villegas Ch Et Al. 2025 Adaptive Intelligent T...,Villegas Ch Et Al.,2025,Adaptive Intelligent Tutoring Systems for Stem...,Y,Not applicable,Students and educators.,"""The methodology included evaluating 450 unive...",Classroom and online learning environment.,"""The developed system was evaluated in a contr...",...,"The feedback is immediate and personalized, ef...","""These findings identify the system’s potentia...",Student agency is present through interaction ...,"""Students in the experimental group interacted...",Students have some control over feedback timin...,"""The system adapts feedback based on historica...","Precision, progress rate, and student satisfac...","""Data on precision, progress rate, and student...",Agency is measured through student interaction...,"""The results show significant improvements in ..."
1,Kuklick and Lindner 2023 Affective Motivationa...,Kuklick and Lindner,2023,Affective Motivational Effects of Performance ...,Y,Not applicable,University students,"""University students (N =439) worked on a low-...",Computer-based assessment,"""In computer-based assessments, immediate perf...",...,Feedback complexity affects emotional responses,"""Our data showed that students reported higher...",Not specified,"""Not specified"" (No direct mention of agency t...",Not specified,"""Not specified"" (No direct mention of control ...","Positive and negative emotions, perceived usef...","""We assessed the effects of performance feedba...",Not specified,"""Not specified"" (No direct mention of how agen..."
2,Sun Et Al. 2024 Ai Driven Feedback for Enhanci...,Sun Et Al.,2024,Ai Driven Feedback for Enhancing Students' Mat...,Y,Not applicable,"Students, specifically primary school students...","""The developing system, ScaffoldiaMyMaths, aim...",The context is primary mathematics education i...,"""ScaffoldiaMyMaths is an innovative web-based ...",...,The feedback provided is immediate and persona...,"""By utilizing AI, ScaffoldiaMyMaths ensures th...",Self-directed learning agency.,"""This innovative approach enables students to ...",Students do not control feedback timing; it is...,"""The system uses AI-powered scaffolding to pro...",Not specified.,"""Not specified"" (No specific metrics mentioned...",Not specified.,"""Not specified"" (No specific measurement of ag..."
3,Kumor Et Al. 2024 Aleks in High School Mathema...,Kumor Et Al.,2024,Aleks in High School Mathematics Classrooms Ex...,Y,Not applicable,Teachers and students,"""five Chicagoland high school mathematics teac...",High school mathematics classrooms,"""This study aimed to collect information about...",...,The feedback provided by ALEKS is immediate an...,"""The ability of the program to provide immedia...",Student agency through personalized learning p...,"""The individualized pathway that ALEKS provide...",Students have some control over feedback timin...,"""Teachers acted as facilitators who could prov...",Student progress and performance on assessments,"""Teachers used ALEKS to assess students on the...",Agency is measured through the ability of stud...,"""Teachers used the ALEKS My Path to allow stud..."
4,Mcnichols Et Al. 2023 Algebra Error Classifica...,Mcnichols Et Al.,2023,Algebra Error Classification with Large Langua...,Y,Not applicable,Students and teachers.,"""This component enables teachers to provide pe...",Not specified,Not specified,...,The feedback provided is immediate and tailore...,"""This direct, timely feedback enables immediat...",Limited student agency; feedback is predefined...,"""the feedback can direct a student to the prec...",Students do not control feedback timing; feedb...,"""the ITS showed an automated feedback message ...",Classification accuracy.,"""For error classification, our primary metric ...",Not specified.,"""Not specified."""
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102,Copur Gencturk Et Al. 2024 the Impact of an In...,Copur Gencturk Et Al.,2024,"the Impact of an Interactive, Personalized Com...",Y,Not applicable,Teachers and middle school students.,"""Based on the data collected from 1727 middle ...",Online professional development for mathematic...,"""This study aims to address the aforementioned...",...,The feedback is interactive and tailored to th...,"""the program had a statistically significant i...",Teacher agency in professional development.,"""teachers could learn by interacting with a vi...",Teachers control the timing of their feedback ...,"""Teachers in the treatment group completed the...",Student performance on mathematics assessments.,"""To capture the impact of our program on stude...",Not specified.,"""Not specified"" (Not explicitly mentioned in t..."
103,Alvarez Et Al. 2024 the Influence of Ai Apps o...,Alvarez Et Al.,2024,the Influence of Ai Apps on Enhancing Mathemat...,Y,Not applicable,Primary school students,"""The influence of artificial intelligence (AI)...",Classroom and mobile learning environment,"""These applications, along with the use of mob...",...,The feedback provided by AI applications is im...,"""The implementation of AI tools in mathematics...",Student agency through self-directed learning,"""It is understood that the advancement of new ...",Students have some control over feedback timin...,"""AI can also help teachers identify the indivi...",Academic performance and understanding of math...,"""The findings underscore the importance of usi...",Not specified,"""Not specified"""
104,Liu Et Al. 2023 the Influence of Digital Game ...,Liu Et Al.,2023,the Influence of Digital Game Based Learning w...,Y,Not applicable,Students and teachers.,"""the study used a purposive sampling method to...",Classroom setting.,"""the school had conducted the DGBL using the '...",...,The feedback provided by the game is immediate...,"""the game could immediately know if they got t...",Limited student agency.,"""the game allows our students to practise four...",Students do not control feedback timing.,"""the game could immediately know if they got t...","Calculation accuracy, calculation speed, and n...","""the majority of participants generally agreed...",Not specified.,"""Not specified"" (Section VI, Conclusion)."
105,Ghilay 2018 the Second Generation of Feeedback...,Ghilay,2018,the Second Generation of Feeedback Based Learn...,Y,Not applicable,Students and lecturers.,"""The study examined students’ attitudes regard...",Higher education quantitative courses.,"""The current research presents a second genera...",...,The feedback provided is continuous and aims t...,"""The FBL-2g model offers an effective solution...",Student agency is present through feedback and...,"""Student questions or requests are another imp...",Students can control feedback timing through t...,"""Students are asked to evaluate the extent to ...",Not specified,Not specified,Agency is measured through student feedback an...,"""The open-ended question strengthens the close..."


In [9]:
# Drop all columns that contain "Source" in the name
source_columns = [col for col in df_sorted.columns if "Source" in col]
print(f"Dropping {len(source_columns)} source columns:")
print(source_columns)

df_clean = df_sorted.drop(columns=source_columns)

print(f"\nDataFrame after removing source columns ({df_clean.shape[0]} rows, {df_clean.shape[1]} columns):")
df_clean

Dropping 12 source columns:
['1.1 Primary Stakeholders - Source', '1.2 Context - Source', '1.3 Tech/AI type - Source', '1.4 Tool/Platform - Source', '1.5 Education level - Source', '2.1 Feedback term - Source', '2.2 Description of context - Source', '2.3 Our evaluation - Source', '3.1 Agency type - Source', '3.2 Feedback timing control - Source', '4.1 Metrics for evaluation - Source', '4.2 Measurement of agency - Source']

DataFrame after removing source columns (107 rows, 18 columns):


Unnamed: 0,Title,Author,Year,Clean_Title,Include in Review (Y/N),Exclusion Reason,1.1 Primary Stakeholders,1.2 Context,1.3 Tech/AI type,1.4 Tool/Platform,1.5 Education level,2.1 Feedback term,2.2 Description of context,2.3 Our evaluation,3.1 Agency type,3.2 Feedback timing control,4.1 Metrics for evaluation,4.2 Measurement of agency
0,Villegas Ch Et Al. 2025 Adaptive Intelligent T...,Villegas Ch Et Al.,2025,Adaptive Intelligent Tutoring Systems for Stem...,Y,Not applicable,Students and educators.,Classroom and online learning environment.,Intelligent Tutoring System (ITS) utilizing de...,Not specified.,University level.,Adaptive feedback.,Feedback is provided in real-time based on stu...,"The feedback is immediate and personalized, ef...",Student agency is present through interaction ...,Students have some control over feedback timin...,"Precision, progress rate, and student satisfac...",Agency is measured through student interaction...
1,Kuklick and Lindner 2023 Affective Motivationa...,Kuklick and Lindner,2023,Affective Motivational Effects of Performance ...,Y,Not applicable,University students,Computer-based assessment,Automated performance feedback,SoSci Survey,Undergraduate and graduate,"Knowledge of Results (KR), Knowledge of Correc...",Immediate feedback after incorrect responses,Feedback complexity affects emotional responses,Not specified,Not specified,"Positive and negative emotions, perceived usef...",Not specified
2,Sun Et Al. 2024 Ai Driven Feedback for Enhanci...,Sun Et Al.,2024,Ai Driven Feedback for Enhancing Students' Mat...,Y,Not applicable,"Students, specifically primary school students...",The context is primary mathematics education i...,AI-driven feedback system.,ScaffoldiaMyMaths.,Primary school.,"Immediate, tailored feedback.",Feedback is provided in real-time based on stu...,The feedback provided is immediate and persona...,Self-directed learning agency.,Students do not control feedback timing; it is...,Not specified.,Not specified.
3,Kumor Et Al. 2024 Aleks in High School Mathema...,Kumor Et Al.,2024,Aleks in High School Mathematics Classrooms Ex...,Y,Not applicable,Teachers and students,High school mathematics classrooms,Adaptive learning system,ALEKS,High school,Immediate feedback,Feedback is provided through assessments and d...,The feedback provided by ALEKS is immediate an...,Student agency through personalized learning p...,Students have some control over feedback timin...,Student progress and performance on assessments,Agency is measured through the ability of stud...
4,Mcnichols Et Al. 2023 Algebra Error Classifica...,Mcnichols Et Al.,2023,Algebra Error Classification with Large Langua...,Y,Not applicable,Students and teachers.,Not specified,Pre-trained Large Language Models (LLMs).,Cognitive Tutor.,Middle school (Algebra I).,"Automated feedback, hints, descriptive feedback.",Feedback is generated based on error classific...,The feedback provided is immediate and tailore...,Limited student agency; feedback is predefined...,Students do not control feedback timing; feedb...,Classification accuracy.,Not specified.
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102,Copur Gencturk Et Al. 2024 the Impact of an In...,Copur Gencturk Et Al.,2024,"the Impact of an Interactive, Personalized Com...",Y,Not applicable,Teachers and middle school students.,Online professional development for mathematic...,Natural language processing (NLP) based intera...,Not specified.,Middle school.,Personalized and real-time feedback.,Feedback is provided through a virtual facilit...,The feedback is interactive and tailored to th...,Teacher agency in professional development.,Teachers control the timing of their feedback ...,Student performance on mathematics assessments.,Not specified.
103,Alvarez Et Al. 2024 the Influence of Ai Apps o...,Alvarez Et Al.,2024,the Influence of Ai Apps on Enhancing Mathemat...,Y,Not applicable,Primary school students,Classroom and mobile learning environment,AI applications for personalized learning,"Smartick, Photomath, Matific, Khan Academy, Au...",Primary education,Instant feedback,Feedback is provided through AI applications t...,The feedback provided by AI applications is im...,Student agency through self-directed learning,Students have some control over feedback timin...,Academic performance and understanding of math...,Not specified
104,Liu Et Al. 2023 the Influence of Digital Game ...,Liu Et Al.,2023,the Influence of Digital Game Based Learning w...,Y,Not applicable,Students and teachers.,Classroom setting.,Digital game-based learning (DGBL).,Oral Math Hero.,Grade 3-4 students.,Task feedback.,Feedback is provided through task forms that a...,The feedback provided by the game is immediate...,Limited student agency.,Students do not control feedback timing.,"Calculation accuracy, calculation speed, and n...",Not specified.
105,Ghilay 2018 the Second Generation of Feeedback...,Ghilay,2018,the Second Generation of Feeedback Based Learn...,Y,Not applicable,Students and lecturers.,Higher education quantitative courses.,Not specified.,Not specified,Higher education (undergraduate).,"Feedback, diagnosis, prognosis.",Feedback is provided through online questionna...,The feedback provided is continuous and aims t...,Student agency is present through feedback and...,Students can control feedback timing through t...,Not specified,Agency is measured through student feedback an...


In [13]:
# Read current_list_titles.txt as a tab-separated table
df_titles = pd.read_csv('current_list_titles.tsv', sep='\t')

print(f"Loaded titles file as DataFrame:")
print(f"Shape: {df_titles.shape}")
print(f"Columns: {df_titles.columns.tolist()}")

# Display the first few rows to understand the structure
print("\nFirst 10 rows:")
print(df_titles.head(10))

# Check if there are multiple columns or just one
print(f"\nColumn count: {len(df_titles.columns)}")
if len(df_titles.columns) == 1:
    print("Single column detected - renaming to 'Title'")
    df_titles.columns = ['Title']
else:
    print("Multiple columns detected")
    
df_titles

Loaded titles file as DataFrame:
Shape: (109, 4)
Columns: ['ID', 'Publication Year', 'Author', 'Title']

First 10 rows:
   ID  Publication Year                                             Author  \
0   1              2025  Villegas-Ch, W; Buenano-Fernandez, D; Navarro,...   
1   2              2023                            Kuklick, L; Lindner, MA   
2   3              2024  Sun, D; Wang, JY; Yang, L; Chou, KL; Song, ZX;...   
3   4              2024  Kumor, T; Uribe-Florez, L; Trespalacios, J; Ya...   
4   5              2023                    McNichols, H; Zhang, MX; Lan, A   
5   6              2020                     Bulut, O; Cormier, DC; Shin, J   
6   7              2024  Li, CL; Zhu, WD; Xing, WL; Guo, R; Assoc Compu...   
7   8              2022                 Shin, JN; Chen, F; Lu, C; Bulut, O   
8   9              2025  Chelleri, E; Doz, D; Juma, ZO; Dogbalou, MWD; ...   
9  10              2025  Demszky, D; Liu, J; Hill, HC; Sanghi, S; Chung, A   

                     

Unnamed: 0,ID,Publication Year,Author,Title
0,1,2025,"Villegas-Ch, W; Buenano-Fernandez, D; Navarro,...",Adaptive intelligent tutoring systems for STEM...
1,2,2023,"Kuklick, L; Lindner, MA",Affective-motivational effects of performance ...
2,3,2024,"Sun, D; Wang, JY; Yang, L; Chou, KL; Song, ZX;...",AI-Driven Feedback for Enhancing Students' Mat...
3,4,2024,"Kumor, T; Uribe-Florez, L; Trespalacios, J; Ya...",ALEKS in High School Mathematics Classrooms: E...
4,5,2023,"McNichols, H; Zhang, MX; Lan, A",Algebra Error Classification with Large Langua...
...,...,...,...,...
104,105,2020,"Oker, A; Pecune, F; Declercq, C",Virtual tutor and pupil interaction: A study o...
105,106,2024,"Colliot, T; Krichen, O; Girard, N; Anquetil, E...",What makes tablet-based learning effective? A ...
106,107,2025,"Hahn, MG",Work in Progress: Investigating ChatGPT for Gr...
107,108,2018,"Williams, JJ; Heffernan, N; Poquet, O",Workshop: Design and Application of Collaborat...


In [15]:
# Install fuzzywuzzy if not already installed
try:
    from fuzzywuzzy import fuzz, process
except ImportError:
    import subprocess
    import sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "fuzzywuzzy", "python-Levenshtein"])
    from fuzzywuzzy import fuzz, process

# Perform fuzzy matching between df_titles and df_sorted (df_clean)
def find_best_match(title, choices, threshold=80):
    """Find the best matching title using fuzzy string matching"""
    match = process.extractOne(title, choices, scorer=fuzz.ratio)
    if match and match[1] >= threshold:
        return match[0], match[1]
    return None, 0

# Get the clean titles from df_clean for matching
clean_titles_list = df_clean['Clean_Title'].tolist()

# For each title in df_titles, find the best match in df_clean
matches = []
for idx, row in df_titles.iterrows():
    title = row.iloc[3]  # Assuming title is in 4th column (index 3)
    best_match, score = find_best_match(title, clean_titles_list, threshold=70)
    matches.append({
        'original_title': title,
        'matched_title': best_match,
        'match_score': score
    })

# Create a DataFrame with the matches
match_df = pd.DataFrame(matches)

# Add the match results to df_titles
df_titles_with_matches = df_titles.copy()
df_titles_with_matches['matched_title'] = match_df['matched_title']
df_titles_with_matches['match_score'] = match_df['match_score']

# Perform left join - merge df_titles with df_clean based on the fuzzy matches
df_merged = df_titles_with_matches.merge(
    df_clean, 
    left_on='matched_title', 
    right_on='Clean_Title', 
    how='left'
)

print(f"Fuzzy matching results:")
print(f"Total titles in df_titles: {len(df_titles)}")
print(f"Successfully matched: {len(df_merged[df_merged['match_score'] > 0])}")
print(f"No match found: {len(df_merged[df_merged['match_score'] == 0])}")

print(f"\nMerged DataFrame shape: {df_merged.shape}")
print(f"Columns: {df_merged.columns.tolist()}")

# Show some examples of matches
print(f"\nSample matches (score >= 70):")
good_matches = df_merged[df_merged['match_score'] >= 70].head(10)
for _, row in good_matches.iterrows():
    orig_title = row['original_title'] if 'original_title' in df_merged.columns else row.iloc[3]
    print(f"Original: {str(orig_title)[:60]}...")
    print(f"Matched:  {str(row['matched_title'])[:60] if row['matched_title'] else 'None'}... (Score: {row['match_score']})")
    print()

df_merged

Fuzzy matching results:
Total titles in df_titles: 109
Successfully matched: 107
No match found: 2

Merged DataFrame shape: (109, 24)
Columns: ['ID', 'Publication Year', 'Author_x', 'Title_x', 'matched_title', 'match_score', 'Title_y', 'Author_y', 'Year', 'Clean_Title', 'Include in Review (Y/N)', 'Exclusion Reason', '1.1 Primary Stakeholders', '1.2 Context', '1.3 Tech/AI type', '1.4 Tool/Platform', '1.5 Education level', '2.1 Feedback term', '2.2 Description of context', '2.3 Our evaluation', '3.1 Agency type', '3.2 Feedback timing control', '4.1 Metrics for evaluation', '4.2 Measurement of agency']

Sample matches (score >= 70):
Original: Adaptive intelligent tutoring systems for STEM education: an...
Matched:  Adaptive Intelligent Tutoring Systems for Stem Education Ana... (Score: 86)

Original: Affective-motivational effects of performance feedback in co...
Matched:  Affective Motivational Effects of Performance Feedback in Co... (Score: 90)

Original: AI-Driven Feedback for Enhanci

Unnamed: 0,ID,Publication Year,Author_x,Title_x,matched_title,match_score,Title_y,Author_y,Year,Clean_Title,...,1.3 Tech/AI type,1.4 Tool/Platform,1.5 Education level,2.1 Feedback term,2.2 Description of context,2.3 Our evaluation,3.1 Agency type,3.2 Feedback timing control,4.1 Metrics for evaluation,4.2 Measurement of agency
0,1,2025,"Villegas-Ch, W; Buenano-Fernandez, D; Navarro,...",Adaptive intelligent tutoring systems for STEM...,Adaptive Intelligent Tutoring Systems for Stem...,86,Villegas Ch Et Al. 2025 Adaptive Intelligent T...,Villegas Ch Et Al.,2025,Adaptive Intelligent Tutoring Systems for Stem...,...,Intelligent Tutoring System (ITS) utilizing de...,Not specified.,University level.,Adaptive feedback.,Feedback is provided in real-time based on stu...,"The feedback is immediate and personalized, ef...",Student agency is present through interaction ...,Students have some control over feedback timin...,"Precision, progress rate, and student satisfac...",Agency is measured through student interaction...
1,2,2023,"Kuklick, L; Lindner, MA",Affective-motivational effects of performance ...,Affective Motivational Effects of Performance ...,90,Kuklick and Lindner 2023 Affective Motivationa...,Kuklick and Lindner,2023,Affective Motivational Effects of Performance ...,...,Automated performance feedback,SoSci Survey,Undergraduate and graduate,"Knowledge of Results (KR), Knowledge of Correc...",Immediate feedback after incorrect responses,Feedback complexity affects emotional responses,Not specified,Not specified,"Positive and negative emotions, perceived usef...",Not specified
2,3,2024,"Sun, D; Wang, JY; Yang, L; Chou, KL; Song, ZX;...",AI-Driven Feedback for Enhancing Students' Mat...,Ai Driven Feedback for Enhancing Students' Mat...,99,Sun Et Al. 2024 Ai Driven Feedback for Enhanci...,Sun Et Al.,2024,Ai Driven Feedback for Enhancing Students' Mat...,...,AI-driven feedback system.,ScaffoldiaMyMaths.,Primary school.,"Immediate, tailored feedback.",Feedback is provided in real-time based on stu...,The feedback provided is immediate and persona...,Self-directed learning agency.,Students do not control feedback timing; it is...,Not specified.,Not specified.
3,4,2024,"Kumor, T; Uribe-Florez, L; Trespalacios, J; Ya...",ALEKS in High School Mathematics Classrooms: E...,Aleks in High School Mathematics Classrooms Ex...,99,Kumor Et Al. 2024 Aleks in High School Mathema...,Kumor Et Al.,2024,Aleks in High School Mathematics Classrooms Ex...,...,Adaptive learning system,ALEKS,High school,Immediate feedback,Feedback is provided through assessments and d...,The feedback provided by ALEKS is immediate an...,Student agency through personalized learning p...,Students have some control over feedback timin...,Student progress and performance on assessments,Agency is measured through the ability of stud...
4,5,2023,"McNichols, H; Zhang, MX; Lan, A",Algebra Error Classification with Large Langua...,Algebra Error Classification with Large Langua...,100,Mcnichols Et Al. 2023 Algebra Error Classifica...,Mcnichols Et Al.,2023,Algebra Error Classification with Large Langua...,...,Pre-trained Large Language Models (LLMs).,Cognitive Tutor.,Middle school (Algebra I).,"Automated feedback, hints, descriptive feedback.",Feedback is generated based on error classific...,The feedback provided is immediate and tailore...,Limited student agency; feedback is predefined...,Students do not control feedback timing; feedb...,Classification accuracy.,Not specified.
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
104,105,2020,"Oker, A; Pecune, F; Declercq, C",Virtual tutor and pupil interaction: A study o...,Virtual Tutor and Pupil Interaction a Study of...,99,Oker Et Al. 2020 Virtual Tutor and Pupil Inter...,Oker Et Al.,2020,Virtual Tutor and Pupil Interaction a Study of...,...,Embodied Conversational Agent (ECA),Virtual Interactive Behavior (VIB) platform,Primary school (fifth grade),Formative feedback,Feedback provided through verbal and facial ex...,Feedback is perceived as more effective when e...,Student agency in response to feedback,Not specified,"Accuracy rates, reaction times, and questionna...",Not specified
105,106,2024,"Colliot, T; Krichen, O; Girard, N; Anquetil, E...",What makes tablet-based learning effective? A ...,What Makes Tablet Based Learning Effective a S...,99,Colliot Et Al. 2024 What Makes Tablet Based Le...,Colliot Et Al.,2024,What Makes Tablet Based Learning Effective a S...,...,Intelligent Tutoring System (ITS) with real-ti...,IntuiGéo app.,Seventh graders.,Real-time corrective feedback.,Feedback is provided immediately after drawing...,The feedback provided is immediate and correct...,Limited student agency; feedback is controlled...,Students do not control feedback timing; it is...,Success in drawing geometric shapes and angle ...,Not specified.
106,107,2025,"Hahn, MG",Work in Progress: Investigating ChatGPT for Gr...,Work in Progress Investigating Chatgpt for Gra...,99,Hahn 2025 Work in Progress Investigating Chatg...,Hahn,2025,Work in Progress Investigating Chatgpt for Gra...,...,AI language model (ChatGPT).,ChatGPT.,Undergraduate.,AI-generated feedback.,Feedback is provided after students submit the...,The feedback provided by ChatGPT is intended t...,Limited student agency in feedback timing; fee...,Students do not control feedback timing; feedb...,"Accuracy, precision, recall, F1-score, and cla...",Not specified.
107,108,2018,"Williams, JJ; Heffernan, N; Poquet, O",Workshop: Design and Application of Collaborat...,Workshop Design and Application of Collaborati...,99,Williams Et Al. 2018 Workshop Design and Appli...,Williams Et Al.,2018,Workshop Design and Application of Collaborati...,...,"Artificial intelligence, machine learning",www.assistments.org,"Middle school, university","Hints, explanations, motivational messages, fe...",Dynamic experimentation with real-time data an...,The feedback provided is adaptive and based on...,Student agency through feedback adaptation,Not specified,"Engagement measures, learning accuracy, time s...",Not specified


In [18]:
# Keep only ID, Title (from df_titles), and literature review coding columns
# The title from df_titles became 'Title_x' after the merge
columns_to_keep = [
    'ID',  # From df_titles
    'Title_x',  # Original title from df_titles (became Title_x after merge)
    'Include in Review (Y/N)',
    'Exclusion Reason',
    '1.1 Primary Stakeholders',
    '1.2 Context',
    '1.3 Tech/AI type',
    '1.4 Tool/Platform',
    '1.5 Education level',
    '2.1 Feedback term',
    '2.2 Description of context',
    '2.3 Our evaluation',
    '3.1 Agency type',
    '3.2 Feedback timing control',
    '4.1 Metrics for evaluation',
    '4.2 Measurement of agency'
]

# Create the final clean dataset
df_final = df_merged[columns_to_keep].copy()

# Rename Title_x back to Title for clarity
df_final = df_final.rename(columns={'Title_x': 'Title'})

print(f"Final dataset:")
print(f"Shape: {df_final.shape}")
print(f"Columns: {df_final.columns.tolist()}")

# Show summary of data completeness
print(f"\nData completeness:")
for col in df_final.columns:
    non_null_count = df_final[col].notna().sum()
    percentage = (non_null_count / len(df_final)) * 100
    print(f"{col}: {non_null_count}/{len(df_final)} ({percentage:.1f}%)")

df_final

Final dataset:
Shape: (109, 16)
Columns: ['ID', 'Title', 'Include in Review (Y/N)', 'Exclusion Reason', '1.1 Primary Stakeholders', '1.2 Context', '1.3 Tech/AI type', '1.4 Tool/Platform', '1.5 Education level', '2.1 Feedback term', '2.2 Description of context', '2.3 Our evaluation', '3.1 Agency type', '3.2 Feedback timing control', '4.1 Metrics for evaluation', '4.2 Measurement of agency']

Data completeness:
ID: 109/109 (100.0%)
Title: 109/109 (100.0%)
Include in Review (Y/N): 107/109 (98.2%)
Exclusion Reason: 107/109 (98.2%)
1.1 Primary Stakeholders: 107/109 (98.2%)
1.2 Context: 107/109 (98.2%)
1.3 Tech/AI type: 107/109 (98.2%)
1.4 Tool/Platform: 107/109 (98.2%)
1.5 Education level: 107/109 (98.2%)
2.1 Feedback term: 107/109 (98.2%)
2.2 Description of context: 107/109 (98.2%)
2.3 Our evaluation: 107/109 (98.2%)
3.1 Agency type: 107/109 (98.2%)
3.2 Feedback timing control: 107/109 (98.2%)
4.1 Metrics for evaluation: 107/109 (98.2%)
4.2 Measurement of agency: 107/109 (98.2%)


Unnamed: 0,ID,Title,Include in Review (Y/N),Exclusion Reason,1.1 Primary Stakeholders,1.2 Context,1.3 Tech/AI type,1.4 Tool/Platform,1.5 Education level,2.1 Feedback term,2.2 Description of context,2.3 Our evaluation,3.1 Agency type,3.2 Feedback timing control,4.1 Metrics for evaluation,4.2 Measurement of agency
0,1,Adaptive intelligent tutoring systems for STEM...,Y,Not applicable,Students and educators.,Classroom and online learning environment.,Intelligent Tutoring System (ITS) utilizing de...,Not specified.,University level.,Adaptive feedback.,Feedback is provided in real-time based on stu...,"The feedback is immediate and personalized, ef...",Student agency is present through interaction ...,Students have some control over feedback timin...,"Precision, progress rate, and student satisfac...",Agency is measured through student interaction...
1,2,Affective-motivational effects of performance ...,Y,Not applicable,University students,Computer-based assessment,Automated performance feedback,SoSci Survey,Undergraduate and graduate,"Knowledge of Results (KR), Knowledge of Correc...",Immediate feedback after incorrect responses,Feedback complexity affects emotional responses,Not specified,Not specified,"Positive and negative emotions, perceived usef...",Not specified
2,3,AI-Driven Feedback for Enhancing Students' Mat...,Y,Not applicable,"Students, specifically primary school students...",The context is primary mathematics education i...,AI-driven feedback system.,ScaffoldiaMyMaths.,Primary school.,"Immediate, tailored feedback.",Feedback is provided in real-time based on stu...,The feedback provided is immediate and persona...,Self-directed learning agency.,Students do not control feedback timing; it is...,Not specified.,Not specified.
3,4,ALEKS in High School Mathematics Classrooms: E...,Y,Not applicable,Teachers and students,High school mathematics classrooms,Adaptive learning system,ALEKS,High school,Immediate feedback,Feedback is provided through assessments and d...,The feedback provided by ALEKS is immediate an...,Student agency through personalized learning p...,Students have some control over feedback timin...,Student progress and performance on assessments,Agency is measured through the ability of stud...
4,5,Algebra Error Classification with Large Langua...,Y,Not applicable,Students and teachers.,Not specified,Pre-trained Large Language Models (LLMs).,Cognitive Tutor.,Middle school (Algebra I).,"Automated feedback, hints, descriptive feedback.",Feedback is generated based on error classific...,The feedback provided is immediate and tailore...,Limited student agency; feedback is predefined...,Students do not control feedback timing; feedb...,Classification accuracy.,Not specified.
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
104,105,Virtual tutor and pupil interaction: A study o...,Y,Not applicable,Students (children aged 9-11 years),Classroom setting,Embodied Conversational Agent (ECA),Virtual Interactive Behavior (VIB) platform,Primary school (fifth grade),Formative feedback,Feedback provided through verbal and facial ex...,Feedback is perceived as more effective when e...,Student agency in response to feedback,Not specified,"Accuracy rates, reaction times, and questionna...",Not specified
105,106,What makes tablet-based learning effective? A ...,Y,Not applicable,Students and teachers.,Classroom setting.,Intelligent Tutoring System (ITS) with real-ti...,IntuiGéo app.,Seventh graders.,Real-time corrective feedback.,Feedback is provided immediately after drawing...,The feedback provided is immediate and correct...,Limited student agency; feedback is controlled...,Students do not control feedback timing; it is...,Success in drawing geometric shapes and angle ...,Not specified.
106,107,Work in Progress: Investigating ChatGPT for Gr...,Y,"Not applicable, as the paper meets the inclusi...",Students and instructors.,Classroom setting within an algebra course.,AI language model (ChatGPT).,ChatGPT.,Undergraduate.,AI-generated feedback.,Feedback is provided after students submit the...,The feedback provided by ChatGPT is intended t...,Limited student agency in feedback timing; fee...,Students do not control feedback timing; feedb...,"Accuracy, precision, recall, F1-score, and cla...",Not specified.
107,108,Workshop: Design and Application of Collaborat...,Y,Not applicable,"Students, teachers, researchers","Digital educational environments, classroom, o...","Artificial intelligence, machine learning",www.assistments.org,"Middle school, university","Hints, explanations, motivational messages, fe...",Dynamic experimentation with real-time data an...,The feedback provided is adaptive and based on...,Student agency through feedback adaptation,Not specified,"Engagement measures, learning accuracy, time s...",Not specified


In [19]:
df_final.to_csv('literature_coding_final_dataset.csv', index=False)