In [1]:
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy import Column, Integer, String, DateTime, func
from sqlalchemy.orm import declarative_base
from sqlalchemy import inspect

In [2]:
from database import Base, SessionLocal, engine, ensure_views_from_files, init_db

In [6]:
from main.gsheets import load_dict
import main.Constants as Constants

dict_sheet_name = Constants.RESPONSE_LOG_SHEET_NAME
gsheet_name = Constants.SHEET_NAME

response_log = load_dict(gsheet_mode=True, gsheet_name=gsheet_name, worksheet_name=dict_sheet_name)
response_log.columns = [col.lower().replace(' ', '_') for col in response_log.columns]

In [8]:
response_log.to_sql('ResponseLog', con=engine, if_exists='append', index=False)

28

In [None]:
responselog_sql = pd.read_sql("""
                 SELECT *
                 FROM ResponseLog
                 """, engine)

responselog_sql

Unnamed: 0,quiz_id,prompt,prompt_pinyin,prompt_meaning,response,response_pinyin,response_meaning,correctness,naturalness,contextual_appropriateness,comment,complexity,tone


In [27]:
df = pd.read_sql("""
                 SELECT 
                    WordDict.*, 
                    IIF(num_quiz_attempt IS NULL, 0, num_quiz_attempt) AS num_quiz_attempt,
                    IIF(pinyin_correct_cnt IS NULL, 0, pinyin_correct_cnt) AS pinyin_correct_cnt,
                    IIF(meaning_correct_cnt IS NULL, 0, meaning_correct_cnt) AS meaning_correct_cnt, 
                    last_quiz
                 FROM WordDict
                 LEFT JOIN QuizScore ON (WordDict.word_id = QuizScore.word_id) AND (WordDict.word = QuizScore.word)
                 """, engine)

In [None]:
rename_dict = {'word_id': 'Word Id',
 'word': 'Word',
 'pinyin': 'Pinyin',
 'pinyin_simplified': 'Pinyin Simplified',
 'type': 'Type',
 'word_category': 'Word Category',
 'word_rarity': 'Word Rarity',
 'meaning': 'Meaning',
 'sentence': 'Sentence',
 'sentence_pinyin': 'Sentence Pinyin',
 'sentence_meaning': 'Sentence Meaning',
 'added_date': 'Added Date',
 'num_quiz_attempt': 'Quiz Attempts',
 'pinyin_correct_cnt': 'Num Pinyin Correct',
 'meaning_correct_cnt': 'Num Meaning Correct',
 'last_quiz': 'Last Quiz'}

df = df.rename(columns=rename_dict)

In [29]:
df

Unnamed: 0,Word Id,Word,Pinyin,Pinyin Simplified,Type,Word Category,Word Rarity,Meaning,Sentence,Sentence Pinyin,Sentence Meaning,Added Date,Quiz Attempts,Num Pinyin Correct,Num Meaning Correct,Last Quiz
0,D000001,帮助,bang1 zhu4,bang1 zhu4,Noun/Verb,Support,Common,Help/Assistance,我可以帮助你学习中文,Wǒ kěyǐ bāngzhù nǐ xuéxí Zhōngwén.,I can help you study Chinese.,2024-09-21 00:00:00.000000,0,0,0,
1,D000002,包裹,bao1 guo3,bao1 guo3,Noun,Object,Common,Package,他昨天收到了一个包裹。,Wǒ zuótiān shōudào le bāoguǒ.,I received the package yesterday.,2024-09-21 00:00:00.000000,0,0,0,
2,D000004,不错,bu2 cuo4,bu2 cuo4,Adjective,Opinion,Common,Good (More positive than 还好),这个电影不错，我们可以一起去看。,Zhè gè diànyǐng bùcuò wǒmen kěyǐ yìqǐ qù kàn,This movie is pretty good; we can go watch it ...,2024-09-21 00:00:00.000000,0,0,0,
3,D000005,不过,bu2 guo4,bu2 guo4,Grammar,Grammar,Common,But (Similar to dan4 shi4 but less formal),我喜欢这家餐厅，不过价格有点贵。,Wǒ xǐhuān zhè jiā cāntīng búguò jiàgé yǒudiǎn ...,I like this restaurant but the price is a bit ...,2024-09-21 00:00:00.000000,0,0,0,
4,D000006,不太,bu2 tai4,bu2 tai4,Adjuster,Degree,Common,not quite,不太好,bu2 tai4 hao3,not quite good,2024-09-21 00:00:00.000000,0,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1902,D002655,阳光,yáng guāng,yang2 guang1,Noun,Nature,Common,Sunlight; the light or warmth that comes from ...,阳光透过窗户洒在地板上。,Yángguāng tòuguò chuānghù sǎ zài dìbǎn shàng.,Sunlight shines through the window onto the fl...,2025-08-17 00:00:00.000000,0,0,0,
1903,D002656,阳光,yáng guāng,yang2 guang1,Adjective,Character,Common,Optimistic or cheerful; having a bright and po...,她总是那么阳光，感染身边的人。,"Tā zǒng shì nàme yángguāng, gǎnrǎn shēnbiān de...","She is always so cheerful, spreading positivit...",2025-08-17 00:00:00.000000,0,0,0,
1904,D002657,混蛋,hún dàn,hun2 dan4,Noun,Emotion,Common,A contemptible or despicable person; often use...,他真是个混蛋，居然骗了我。,"Tā zhēn shì gè húndàn, jūrán piàn le wǒ.",He's such a jerk for deceiving me.,2025-08-17 00:00:00.000000,0,0,0,
1905,D002658,动手,dòng shǒu,dong4 shou3,Verb,Action,Common,To start doing something; to begin a task or a...,开始动手，我们还有很多工作要做。,"Kāishǐ dòngshǒu, wǒmen hái yǒu hěn duō gōngzuò...","Let's get started, we have a lot of work to do.",2025-08-17 00:00:00.000000,0,0,0,


In [30]:
cols = ['Word Id', 'Word', 'Pinyin', 'Meaning', 'Added Date', 'Word Category', 'Word Rarity', 'Type', 'Sentence', 'Sentence Pinyin', 'Sentence Meaning', 'Quiz Attempts', 'Num Pinyin Correct', 'Num Meaning Correct', 'Last Quiz']
df[cols]

Unnamed: 0,Word Id,Word,Pinyin,Meaning,Added Date,Word Category,Word Rarity,Type,Sentence,Sentence Pinyin,Sentence Meaning,Quiz Attempts,Num Pinyin Correct,Num Meaning Correct,Last Quiz
0,D000001,帮助,bang1 zhu4,Help/Assistance,2024-09-21 00:00:00.000000,Support,Common,Noun/Verb,我可以帮助你学习中文,Wǒ kěyǐ bāngzhù nǐ xuéxí Zhōngwén.,I can help you study Chinese.,0,0,0,
1,D000002,包裹,bao1 guo3,Package,2024-09-21 00:00:00.000000,Object,Common,Noun,他昨天收到了一个包裹。,Wǒ zuótiān shōudào le bāoguǒ.,I received the package yesterday.,0,0,0,
2,D000004,不错,bu2 cuo4,Good (More positive than 还好),2024-09-21 00:00:00.000000,Opinion,Common,Adjective,这个电影不错，我们可以一起去看。,Zhè gè diànyǐng bùcuò wǒmen kěyǐ yìqǐ qù kàn,This movie is pretty good; we can go watch it ...,0,0,0,
3,D000005,不过,bu2 guo4,But (Similar to dan4 shi4 but less formal),2024-09-21 00:00:00.000000,Grammar,Common,Grammar,我喜欢这家餐厅，不过价格有点贵。,Wǒ xǐhuān zhè jiā cāntīng búguò jiàgé yǒudiǎn ...,I like this restaurant but the price is a bit ...,0,0,0,
4,D000006,不太,bu2 tai4,not quite,2024-09-21 00:00:00.000000,Degree,Common,Adjuster,不太好,bu2 tai4 hao3,not quite good,0,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1902,D002655,阳光,yáng guāng,Sunlight; the light or warmth that comes from ...,2025-08-17 00:00:00.000000,Nature,Common,Noun,阳光透过窗户洒在地板上。,Yángguāng tòuguò chuānghù sǎ zài dìbǎn shàng.,Sunlight shines through the window onto the fl...,0,0,0,
1903,D002656,阳光,yáng guāng,Optimistic or cheerful; having a bright and po...,2025-08-17 00:00:00.000000,Character,Common,Adjective,她总是那么阳光，感染身边的人。,"Tā zǒng shì nàme yángguāng, gǎnrǎn shēnbiān de...","She is always so cheerful, spreading positivit...",0,0,0,
1904,D002657,混蛋,hún dàn,A contemptible or despicable person; often use...,2025-08-17 00:00:00.000000,Emotion,Common,Noun,他真是个混蛋，居然骗了我。,"Tā zhēn shì gè húndàn, jūrán piàn le wǒ.",He's such a jerk for deceiving me.,0,0,0,
1905,D002658,动手,dòng shǒu,To start doing something; to begin a task or a...,2025-08-17 00:00:00.000000,Action,Common,Verb,开始动手，我们还有很多工作要做。,"Kāishǐ dòngshǒu, wǒmen hái yǒu hěn duō gōngzuò...","Let's get started, we have a lot of work to do.",0,0,0,


In [37]:
phrase_df = pd.read_sql("""
                 SELECT *
                 FROM PhraseDict
                 """, engine)

rename_dict = {col:col.replace('_', ' ').title() for col in phrase_df}
phrase_df = phrase_df.rename(columns=rename_dict)
phrase_df = phrase_df[['Phrase Id', 'Added Date', 'Complexity', 'Category', 'Tone', 'Line', 'Pinyin', 'Meaning', 'Response', 'Response Pinyin','Response Meaning']]

phrase_df

Unnamed: 0,Phrase Id,Added Date,Complexity,Category,Tone,Line,Pinyin,Meaning,Response,Response Pinyin,Response Meaning
0,P000001,2024-12-26,Medium,Daily Life,Casual,我们今天晚上做什么？,Wǒmen jīntiān wǎnshàng zuò shénme?,What should we do tonight?,你想做什么？我可以做饭或者看电影。,Nǐ xiǎng zuò shénme? Wǒ kěyǐ zuò fàn huòzhě kà...,What do you want to do? I can cook or we can w...
1,P000002,2024-12-26,Medium,Daily Life,Casual,你觉得这个房间要不要再收拾一下？,Nǐ juéde zhège fángjiān yào bù yào zài shōushi...,Do you think we should tidy up this room again?,我觉得可以，整理一下会更舒服。,"Wǒ juéde kěyǐ, zhěnglǐ yīxià huì gèng shūfu.","I think we can, tidying it up will make it mor..."
2,P000003,2024-12-26,Medium,Daily Life,Casual,你能帮我拿一下那本书吗？,Nǐ néng bāng wǒ ná yīxià nà běn shū ma?,Can you help me grab that book?,当然，在哪儿？,"Dāngrán, zài nǎr?","Of course, where is it?"
3,P000004,2024-12-26,Medium,Daily Life,Casual,我们还需要买点什么东西吗？,Wǒmen hái xūyào mǎi diǎn shénme dōngxī ma?,Do we need to buy anything else?,可能需要买一些蔬菜和水果。,Kěnéng xūyào mǎi yīxiē shūcài hé shuǐguǒ.,We might need to buy some vegetables and fruits.
4,P000005,2024-12-26,Medium,Daily Life,Casual,你今天下午有空吗？我们可以一起出去走走。,Nǐ jīntiān xiàwǔ yǒu kòng ma? Wǒmen kěyǐ yīqǐ ...,Are you free this afternoon? We can go out for...,下午我有点事，但可以晚点。,"Xiàwǔ wǒ yǒudiǎn shì, dàn kěyǐ wǎndiǎn.","I have some things in the afternoon, but I can..."
...,...,...,...,...,...,...,...,...,...,...,...
107,P000113,2025-08-31,Medium,Cooking,Casual,你来帮我尝尝这个汤怎么样。,Nǐ lái bāng wǒ chángchang zhège tāng zěnmeyàng.,Come help me taste this soup.,好的，我来试试。,"Hǎo de, wǒ lái shìshi.","Sure, let me try it."
108,P000114,2025-08-31 00:00:00.000000,Medium,Cooking,Casual,我们先切菜还是洗菜？,Wǒmen xiān qiē cài háishì xǐ cài?,Should we chop or wash the vegetables first?,我来洗菜，你来切菜。,"Wǒ lái xǐ cài, nǐ lái qiē cài.","I'll wash the vegetables, you chop them."
109,P000115,2025-08-31 00:00:00.000000,Medium,Cooking,Casual,你觉得这个菜需要加盐吗？,Nǐ juéde zhège cài xūyào jiā yán ma?,Do you think this dish needs more salt?,嗯，加一点会更好吃。,"Ń, jiā yīdiǎn huì gèng hǎochī.","Hmm, adding a little will make it tastier."
110,P000116,2025-08-31 00:00:00.000000,Medium,Commute,Polite,今天路况怎么样？,Jīntiān lùkuàng zěnme yàng?,How is the traffic today?,交通还算顺畅。,Jiāotōng hái suàn shùnchàng.,The traffic is fairly smooth.


Unnamed: 0,Phrase Id,Line,Pinyin,Meaning,Response,Response Pinyin,Response Meaning,Complexity,Category,Tone,Added Date
0,P000001,我们今天晚上做什么？,Wǒmen jīntiān wǎnshàng zuò shénme?,What should we do tonight?,你想做什么？我可以做饭或者看电影。,Nǐ xiǎng zuò shénme? Wǒ kěyǐ zuò fàn huòzhě kà...,What do you want to do? I can cook or we can w...,Medium,Daily Life,Casual,2024-12-26
1,P000002,你觉得这个房间要不要再收拾一下？,Nǐ juéde zhège fángjiān yào bù yào zài shōushi...,Do you think we should tidy up this room again?,我觉得可以，整理一下会更舒服。,"Wǒ juéde kěyǐ, zhěnglǐ yīxià huì gèng shūfu.","I think we can, tidying it up will make it mor...",Medium,Daily Life,Casual,2024-12-26
2,P000003,你能帮我拿一下那本书吗？,Nǐ néng bāng wǒ ná yīxià nà běn shū ma?,Can you help me grab that book?,当然，在哪儿？,"Dāngrán, zài nǎr?","Of course, where is it?",Medium,Daily Life,Casual,2024-12-26
3,P000004,我们还需要买点什么东西吗？,Wǒmen hái xūyào mǎi diǎn shénme dōngxī ma?,Do we need to buy anything else?,可能需要买一些蔬菜和水果。,Kěnéng xūyào mǎi yīxiē shūcài hé shuǐguǒ.,We might need to buy some vegetables and fruits.,Medium,Daily Life,Casual,2024-12-26
4,P000005,你今天下午有空吗？我们可以一起出去走走。,Nǐ jīntiān xiàwǔ yǒu kòng ma? Wǒmen kěyǐ yīqǐ ...,Are you free this afternoon? We can go out for...,下午我有点事，但可以晚点。,"Xiàwǔ wǒ yǒudiǎn shì, dàn kěyǐ wǎndiǎn.","I have some things in the afternoon, but I can...",Medium,Daily Life,Casual,2024-12-26
...,...,...,...,...,...,...,...,...,...,...,...
107,P000113,你来帮我尝尝这个汤怎么样。,Nǐ lái bāng wǒ chángchang zhège tāng zěnmeyàng.,Come help me taste this soup.,好的，我来试试。,"Hǎo de, wǒ lái shìshi.","Sure, let me try it.",Medium,Cooking,Casual,2025-08-31
108,P000114,我们先切菜还是洗菜？,Wǒmen xiān qiē cài háishì xǐ cài?,Should we chop or wash the vegetables first?,我来洗菜，你来切菜。,"Wǒ lái xǐ cài, nǐ lái qiē cài.","I'll wash the vegetables, you chop them.",Medium,Cooking,Casual,2025-08-31 00:00:00.000000
109,P000115,你觉得这个菜需要加盐吗？,Nǐ juéde zhège cài xūyào jiā yán ma?,Do you think this dish needs more salt?,嗯，加一点会更好吃。,"Ń, jiā yīdiǎn huì gèng hǎochī.","Hmm, adding a little will make it tastier.",Medium,Cooking,Casual,2025-08-31 00:00:00.000000
110,P000116,今天路况怎么样？,Jīntiān lùkuàng zěnme yàng?,How is the traffic today?,交通还算顺畅。,Jiāotōng hái suàn shùnchàng.,The traffic is fairly smooth.,Medium,Commute,Polite,2025-08-31 00:00:00.000000


### Old Manual Code

sdfsdfsdfs

In [2]:
df = pd.read_csv("data/MandarinWordList.csv")
df.columns = [col.lower().replace(" ", "_") for col in df.columns]

In [3]:
df.dtypes

word_id              object
word                 object
pinyin               object
pinyin_simplified    object
type                 object
word_category        object
word_rarity          object
meaning              object
sentence             object
sentence_pinyin      object
sentence_meaning     object
added_date           object
num_quiz_attempt      int64
num_correct           int64
num_wrong             int64
last_quiz            object
dtype: object

In [4]:
df['sentence_pinyin'] = df['sentence_pinyin'].fillna('')
df['sentence'] = df['sentence'].fillna('')
df['sentence_meaning'] = df['sentence_meaning'].fillna('')

df['added_date'] = pd.to_datetime(df['added_date'], errors='coerce')
df['last_quiz'] = pd.to_datetime(df['last_quiz'], errors='coerce')

In [5]:
from sqlalchemy import Column, Integer, String, DateTime, func
from sqlalchemy import ForeignKey
from sqlalchemy.orm import declarative_base

Base = declarative_base()

class WordDict(Base):
    __tablename__ = "WordDict"  
    word_id = Column(String, nullable=False, unique=True, primary_key=True, index=True)
    word = Column(String, nullable=False)
    pinyin = Column(String, nullable=False)
    pinyin_simplified = Column(String, nullable=False)
    type = Column(String, nullable=False)
    word_category = Column(String, nullable=False)
    word_rarity = Column(String, nullable=False)
    meaning = Column(String, nullable=False)
    sentence = Column(String, nullable=False)
    sentence_pinyin = Column(String, nullable=False)
    sentence_meaning = Column(String, nullable=False)
    added_date = Column(DateTime(timezone=True), server_default=func.now())
    #It's better to track quiz stats in a separate table to allow for multiple quiz attempts over time
    #We can display quiz stats in the app by querying the quiz attempts table and doing groupby 
    #num_quiz_attempt = Column(Integer, default=0)
    #num_correct = Column(Integer, default=0)
    #num_wrong = Column(Integer, default=0)
    #last_quiz = Column(DateTime(timezone=True), nullable=True)

class QuizAgg(Base):
    __tablename__ = "QuizAgg"  
    word_id = Column(String, ForeignKey("WordDict.word_id", ondelete="CASCADE"), primary_key=True)
    num_quiz_attempt = Column(Integer, default=0)
    num_correct = Column(Integer, default=0)
    num_wrong = Column(Integer, default=0)
    last_quiz = Column(DateTime(timezone=True), nullable=True)


In [6]:
df.word_id.value_counts().sort_values(ascending=False).head(10)

word_id
D000001    1
D000016    1
D000005    1
D000006    1
D000007    1
D000008    1
D000009    1
D000010    1
D000011    1
D000012    1
Name: count, dtype: int64

In [7]:
engine = create_engine("sqlite:///mydata.db", future=True)
Base.metadata.create_all(bind=engine)

df.drop(['num_quiz_attempt', 'num_correct', 'num_wrong', 'last_quiz'], axis=1).to_sql(WordDict.__tablename__, engine, if_exists="append", index=False)
df[['word_id', 'num_quiz_attempt', 'num_correct', 'num_wrong', 'last_quiz']].to_sql(QuizAgg.__tablename__, engine, if_exists="append", index=False)

1903

In [12]:
inspector = inspect(engine)

# List tables
print(inspector.get_table_names())

# Get columns for a table
for col in inspector.get_columns("WordDict"):
    print(col['name'], col['type'])


['QuizAgg', 'WordDict']
word_id VARCHAR
word VARCHAR
pinyin VARCHAR
pinyin_simplified VARCHAR
type VARCHAR
word_category VARCHAR
word_rarity VARCHAR
meaning VARCHAR
sentence VARCHAR
sentence_pinyin VARCHAR
sentence_meaning VARCHAR
added_date DATETIME


In [9]:
inspector.get_columns('WordDict')

[{'name': 'word_id',
  'type': VARCHAR(),
  'nullable': False,
  'default': None,
  'primary_key': 1},
 {'name': 'word',
  'type': VARCHAR(),
  'nullable': False,
  'default': None,
  'primary_key': 0},
 {'name': 'pinyin',
  'type': VARCHAR(),
  'nullable': False,
  'default': None,
  'primary_key': 0},
 {'name': 'pinyin_simplified',
  'type': VARCHAR(),
  'nullable': False,
  'default': None,
  'primary_key': 0},
 {'name': 'type',
  'type': VARCHAR(),
  'nullable': False,
  'default': None,
  'primary_key': 0},
 {'name': 'word_category',
  'type': VARCHAR(),
  'nullable': False,
  'default': None,
  'primary_key': 0},
 {'name': 'word_rarity',
  'type': VARCHAR(),
  'nullable': False,
  'default': None,
  'primary_key': 0},
 {'name': 'meaning',
  'type': VARCHAR(),
  'nullable': False,
  'default': None,
  'primary_key': 0},
 {'name': 'sentence',
  'type': VARCHAR(),
  'nullable': False,
  'default': None,
  'primary_key': 0},
 {'name': 'sentence_pinyin',
  'type': VARCHAR(),
  'nullab

In [10]:
pd.read_sql("SELECT COUNT(*) FROM WordDict", engine)


Unnamed: 0,COUNT(*)
0,1903


In [11]:
base = int(val.replace("D","")) if val else 0


NameError: name 'val' is not defined

In [None]:
val = pd.read_sql("SELECT MAX(word_id) AS max_id FROM WordDict", engine)["max_id"].iat[0]
base = int(val.replace("D","")) if val else 0
new_ids = [f"D{base+i:06d}" for i in range(1, len(df)+1)]
new_ids

['D002646']