# MGSE ICCR: Data Wrangling (Australian Lexicon)


Intro:
Just grab the terms, wrangle and export as csv for visualisation


Date: 30/1/2020

Version: 0.01

Environment: Python 3.7.6 and Jupyter notebook

Libraries used: 
    pandas, re, string, csv

In [63]:
# Import the libraries needed to read and report on data files
import pandas as pd
import re
import numpy as np
import string

In [64]:
colNames= ('term', 'descr', 'egs')
df = pd.read_csv("AusTerms.csv",names=colNames,skiprows=1)

# df.dropna(how='all', inplace=True)
df.head(62)

Unnamed: 0,term,descr,egs
0,answering questions,The activity of responding to a question or a ...,Example:
1,,,The teacher asks: “What’s the area of a triang...
2,,,A student responds: “Eighty square centimetres.”
3,,,Another student responds: “Forty square centim...
4,,,
...,...,...,...
57,,,Copying solutions from the board into a workbook.
58,,,
59,defining,"The teacher or student gives a clear meaning, ...",Example:
60,(giving a definition),,The teacher identifies the characteristics of ...


In [65]:
# the next row of a term can be an alternative term (if next is notna )
dfConcats = df
dfConcats['SameTerm'] =  (dfConcats.term.notna()) &(dfConcats.term.shift(-1).notna())
# show terms with an alternative 
dfConcats[((dfConcats.term.notna()) &(dfConcats.term.shift(-1).notna()))]

                                 

Unnamed: 0,term,descr,egs,SameTerm
59,defining,"The teacher or student gives a clear meaning, ...",Example:,True
72,differentiating,"Any action in which instruction is modified, a...",Examples:,True
106,engaging,A student is actively involved with an educati...,Examples:,True
113,explaining,Make an idea or situation clear to someone by ...,Example:,True


In [66]:
# set the Alt description
dfConcats['alt'] = np.nan
dfConcats.loc[dfConcats.SameTerm==True,'alt'] = dfConcats.term.shift(-1)

dfConcats.head(62)


Unnamed: 0,term,descr,egs,SameTerm,alt
0,answering questions,The activity of responding to a question or a ...,Example:,False,
1,,,The teacher asks: “What’s the area of a triang...,False,
2,,,A student responds: “Eighty square centimetres.”,False,
3,,,Another student responds: “Forty square centim...,False,
4,,,,False,
...,...,...,...,...,...
57,,,Copying solutions from the board into a workbook.,False,
58,,,,False,
59,defining,"The teacher or student gives a clear meaning, ...",Example:,True,(giving a definition)
60,(giving a definition),,The teacher identifies the characteristics of ...,False,


In [67]:
dfConcats.loc[dfConcats.SameTerm.shift()==True,'term']

60     (giving a definition)
73         (differentiation)
107             (engagement)
114            (explanation)
Name: term, dtype: object

In [68]:
# then null out the ones that aren't actually alt
dfConcats.loc[dfConcats.SameTerm.shift()==True,'term'] =np.nan
dfConcats[58:70]

Unnamed: 0,term,descr,egs,SameTerm,alt
58,,,,False,
59,defining,"The teacher or student gives a clear meaning, ...",Example:,True,(giving a definition)
60,,,The teacher identifies the characteristics of ...,False,
61,,,,False,
62,,,Non-example:,False,
63,,,A general discussion about the characteristics...,False,
64,,,,False,
65,demonstrating,An activity undertaken by the teacher or stude...,Examples:,False,
66,,,The teacher draws a diagram of a circle on the...,False,
67,,,The teacher uses a computer program to display...,False,


In [69]:
df = dfConcats
# fill in terms for further grouping
df['term'].fillna(method='ffill',inplace=True)

df['term'] = df['term'].apply(lambda x: x.strip())
df.head()

Unnamed: 0,term,descr,egs,SameTerm,alt
0,answering questions,The activity of responding to a question or a ...,Example:,False,
1,answering questions,,The teacher asks: “What’s the area of a triang...,False,
2,answering questions,,A student responds: “Eighty square centimetres.”,False,
3,answering questions,,Another student responds: “Forty square centim...,False,
4,answering questions,,,False,


In [70]:
df[df['term']=="defining"]

Unnamed: 0,term,descr,egs,SameTerm,alt
59,defining,"The teacher or student gives a clear meaning, ...",Example:,True,(giving a definition)
60,defining,,The teacher identifies the characteristics of ...,False,
61,defining,,,False,
62,defining,,Non-example:,False,
63,defining,,A general discussion about the characteristics...,False,
64,defining,,,False,


In [71]:
# turn nan into spaces
df['descr'] = df['descr'].replace(np.nan,' ',regex=True)
df['egs'] = df['egs'].replace(np.nan,' ',regex=True)
df['alt'] = df['alt'].replace(np.nan,' ',regex=True)
df[df['term']=="defining"]

Unnamed: 0,term,descr,egs,SameTerm,alt
59,defining,"The teacher or student gives a clear meaning, ...",Example:,True,(giving a definition)
60,defining,,The teacher identifies the characteristics of ...,False,
61,defining,,,False,
62,defining,,Non-example:,False,
63,defining,,A general discussion about the characteristics...,False,
64,defining,,,False,


In [72]:
df[df['term']=="checking"]

Unnamed: 0,term,descr,egs,SameTerm,alt
34,checking,The process by which a teacher or student,Examples:,False,
35,checking,,The teacher makes notes in her chronicle to in...,False,
36,checking,"• checks answers, by determining the exactness...",The teacher makes a mental note or observation.,False,
37,checking,"• checks progress, by determining whether the ...",,False,
38,checking,,Non-example:,False,
39,checking,,Students annotate their workbook solutions.,False,


In [73]:
# concatenate examples split across rows
groupDescr = df.groupby('term')['descr'].apply(''.join)
groupDescr

term
(use of a) hook           The engaging introduction of a topic or sub-to...
answering questions       The activity of responding to a question or a ...
applying                  An activity in which a taught procedure or con...
assessment                An activity undertaken by teacher or students ...
assigning homework        The teacher assigns tasks to be completed outs...
                                                ...                        
summative assessment      Information is collected for the purpose of su...
test/testing              A situation in which individuals are required ...
wait time                 A deliberate pause before or after a question ...
whole class discussion    An activity in which the teacher and students ...
worked example            The teacher (or student) writes out the steps ...
Name: descr, Length: 61, dtype: object

In [74]:
groupEgs = df.groupby('term')['egs'].apply(' '.join)
groupEgs

term
(use of a) hook           Example: The teacher introduces polyhedra with...
answering questions       Example: The teacher asks: “What’s the area of...
applying                  Examples: Having been taught Pythagoras theore...
assessment                Examples: The teacher administers a test. The ...
assigning homework        Example: The teacher writes the homework on th...
                                                ...                        
summative assessment      Examples: Students complete a test that measur...
test/testing              Examples: Students complete a complex problem ...
wait time                 Example: The teacher says: “What is the area o...
whole class discussion    Example: The teacher invites students to share...
worked example            Example: The teacher writes out the solution t...
Name: egs, Length: 61, dtype: object

In [75]:
dfSumm = df[df['term'].duplicated(keep='first')==False]
dfSumm = dfSumm[['term','alt']]
dfSumm.head(20)

Unnamed: 0,term,alt
0,answering questions,
5,applying,
12,assessment,
20,assigning homework,
26,board work,
34,checking,
40,clarifying,
46,collecting work,
52,correcting,
59,defining,(giving a definition)


In [76]:
df = pd.merge(dfSumm,groupDescr,on='term')
df = pd.merge(df,groupEgs,on='term')
df

Unnamed: 0,term,alt,descr,egs
0,answering questions,,The activity of responding to a question or a ...,Example: The teacher asks: “What’s the area of...
1,applying,,An activity in which a taught procedure or con...,Examples: Having been taught Pythagoras theore...
2,assessment,,An activity undertaken by teacher or students ...,Examples: The teacher administers a test. The ...
3,assigning homework,,The teacher assigns tasks to be completed outs...,Example: The teacher writes the homework on th...
4,board work,,"The teacher or students record workings, diagr...",Examples: A student solves a problem on the bo...
...,...,...,...,...
56,summative assessment,,Information is collected for the purpose of su...,Examples: Students complete a test that measur...
57,test/testing,,A situation in which individuals are required ...,Examples: Students complete a complex problem ...
58,wait time,,A deliberate pause before or after a question ...,Example: The teacher says: “What is the area o...
59,whole class discussion,,An activity in which the teacher and students ...,Example: The teacher invites students to share...


In [77]:
df[df['term']=='checking']

Unnamed: 0,term,alt,descr,egs
5,checking,,The process by which a teacher or student • ch...,Examples: The teacher makes notes in her chron...


In [78]:
# ==================================================
# Grab the author categories
colNames = ('catdescr','term')
dfCat = pd.read_csv("AusCategories.csv",names=colNames)
dfCat = dfCat.dropna().reset_index(drop=True)
dfCat

Unnamed: 0,catdescr,term
0,Administration,"assessment, assigning homework, checking, coll..."
1,Assessment,"assessment, correcting, elicit understanding, ..."
2,Classroom Management,"disciplining, encouraging, giving praise, moni..."
3,Learning Strategies,"answering questions, applying, board work, che..."
4,Teaching Strategies,"answering questions, applying, assigning homew..."


In [79]:
# keep the index as the id for the category
dfCat['cat'] = ['cat'+str(int(x)) for x in dfCat.index]
dfCat

Unnamed: 0,catdescr,term,cat
0,Administration,"assessment, assigning homework, checking, coll...",cat0
1,Assessment,"assessment, correcting, elicit understanding, ...",cat1
2,Classroom Management,"disciplining, encouraging, giving praise, moni...",cat2
3,Learning Strategies,"answering questions, applying, board work, che...",cat3
4,Teaching Strategies,"answering questions, applying, assigning homew...",cat4


In [80]:
# explode terms into one row for each term for each category
dfCat.term = [x.split(',') for x in dfCat.term]
dfCat = dfCat.explode('term').reset_index(drop=True)
dfCat.term = [x.strip() for x in dfCat.term]
dfCat

Unnamed: 0,catdescr,term,cat
0,Administration,assessment,cat0
1,Administration,assigning homework,cat0
2,Administration,checking,cat0
3,Administration,collecting work,cat0
4,Administration,handing back work,cat0
...,...,...,...
96,Teaching Strategies,student (individual) work,cat4
97,Teaching Strategies,summarising,cat4
98,Teaching Strategies,wait time,cat4
99,Teaching Strategies,whole class discussion,cat4


In [81]:
colNames= ['term', 'descr', 'cat']
df = pd.merge(df,dfCat,on='term')
#df = df[colNames]
df.head(20)

Unnamed: 0,term,alt,descr,egs,catdescr,cat
0,answering questions,,The activity of responding to a question or a ...,Example: The teacher asks: “What’s the area of...,Learning Strategies,cat3
1,answering questions,,The activity of responding to a question or a ...,Example: The teacher asks: “What’s the area of...,Teaching Strategies,cat4
2,applying,,An activity in which a taught procedure or con...,Examples: Having been taught Pythagoras theore...,Learning Strategies,cat3
3,applying,,An activity in which a taught procedure or con...,Examples: Having been taught Pythagoras theore...,Teaching Strategies,cat4
4,assessment,,An activity undertaken by teacher or students ...,Examples: The teacher administers a test. The ...,Administration,cat0
5,assessment,,An activity undertaken by teacher or students ...,Examples: The teacher administers a test. The ...,Assessment,cat1
6,assigning homework,,The teacher assigns tasks to be completed outs...,Example: The teacher writes the homework on th...,Administration,cat0
7,assigning homework,,The teacher assigns tasks to be completed outs...,Example: The teacher writes the homework on th...,Teaching Strategies,cat4
8,board work,,"The teacher or students record workings, diagr...",Examples: A student solves a problem on the bo...,Learning Strategies,cat3
9,board work,,"The teacher or students record workings, diagr...",Examples: A student solves a problem on the bo...,Teaching Strategies,cat4


In [82]:
df['egs'][10]

'Examples: The teacher makes notes in her chronicle to indicate the timely completion of homework. The teacher makes a mental note or observation.   Non-example: Students annotate their workbook solutions.'

In [83]:
df['egs']= df['egs'].apply(lambda x: x.replace('Non-ex','<br/>'+'Non-ex'))
df['egs'][10]

'Examples: The teacher makes notes in her chronicle to indicate the timely completion of homework. The teacher makes a mental note or observation.   <br/>Non-example: Students annotate their workbook solutions.'

In [84]:
colnames = df.columns
colnames

Index(['term', 'alt', 'descr', 'egs', 'catdescr', 'cat'], dtype='object')

In [85]:

# create columns for visualisation code
df['amount'] = 0
df['id'] = df.index
# reorder columsn for visualisation code
colnames = ['id','term', 'catdescr','alt', 'descr', 'egs',  'cat','amount']
df = df[colnames]
df.head()

Unnamed: 0,id,term,catdescr,alt,descr,egs,cat,amount
0,0,answering questions,Learning Strategies,,The activity of responding to a question or a ...,Example: The teacher asks: “What’s the area of...,cat3,0
1,1,answering questions,Teaching Strategies,,The activity of responding to a question or a ...,Example: The teacher asks: “What’s the area of...,cat4,0
2,2,applying,Learning Strategies,,An activity in which a taught procedure or con...,Examples: Having been taught Pythagoras theore...,cat3,0
3,3,applying,Teaching Strategies,,An activity in which a taught procedure or con...,Examples: Having been taught Pythagoras theore...,cat4,0
4,4,assessment,Administration,,An activity undertaken by teacher or students ...,Examples: The teacher administers a test. The ...,cat0,0


In [86]:
dfCats = df[['term','catdescr']]
dfCats= dfCats.groupby('term')['catdescr'].apply(np.array).reset_index(name='catdescr')
dfCats['catdescr'] = dfCats['catdescr'].apply(lambda x: ', '.join(x))
dfCats

Unnamed: 0,term,catdescr
0,(use of a) hook,Teaching Strategies
1,answering questions,"Learning Strategies, Teaching Strategies"
2,applying,"Learning Strategies, Teaching Strategies"
3,assessment,"Administration, Assessment"
4,assigning homework,"Administration, Teaching Strategies"
...,...,...
56,summative assessment,Assessment
57,test/testing,Assessment
58,wait time,Teaching Strategies
59,whole class discussion,Teaching Strategies


In [87]:
df.columns

Index(['id', 'term', 'catdescr', 'alt', 'descr', 'egs', 'cat', 'amount'], dtype='object')

In [88]:
# drop cat descr and replace with multiple cats
cols = ['id', 'term', 'alt', 'descr', 'egs', 'cat', 'amount']
df = df[cols]
df = pd.merge(df,dfCats,on='term')
df.head()

Unnamed: 0,id,term,alt,descr,egs,cat,amount,catdescr
0,0,answering questions,,The activity of responding to a question or a ...,Example: The teacher asks: “What’s the area of...,cat3,0,"Learning Strategies, Teaching Strategies"
1,1,answering questions,,The activity of responding to a question or a ...,Example: The teacher asks: “What’s the area of...,cat4,0,"Learning Strategies, Teaching Strategies"
2,2,applying,,An activity in which a taught procedure or con...,Examples: Having been taught Pythagoras theore...,cat3,0,"Learning Strategies, Teaching Strategies"
3,3,applying,,An activity in which a taught procedure or con...,Examples: Having been taught Pythagoras theore...,cat4,0,"Learning Strategies, Teaching Strategies"
4,4,assessment,,An activity undertaken by teacher or students ...,Examples: The teacher administers a test. The ...,cat0,0,"Administration, Assessment"


In [89]:
df['dup'] = df['term'].duplicated()

In [90]:
df.iloc[20:30,:]

Unnamed: 0,id,term,alt,descr,egs,cat,amount,catdescr,dup
20,20,demonstrating,,An activity undertaken by the teacher or stude...,Examples: The teacher draws a diagram of a cir...,cat4,0,Teaching Strategies,False
21,21,differentiating,(differentiation),"Any action in which instruction is modified, a...",Examples: The teacher groups students for inst...,cat4,0,Teaching Strategies,False
22,22,disciplining,,The teacher identifies undesirable behaviour t...,Example: A teacher stops her activity and asks...,cat2,0,Classroom Management,False
23,23,elaborating,,A teacher or student provides additional infor...,Example: The teacher shades the fraction one-h...,cat4,0,Teaching Strategies,False
24,24,elicit understanding,,An activity undertaken by the teacher or stude...,Examples: The teacher asks a student to demons...,cat1,0,"Assessment, Teaching Strategies",False
25,25,elicit understanding,,An activity undertaken by the teacher or stude...,Examples: The teacher asks a student to demons...,cat4,0,"Assessment, Teaching Strategies",True
26,26,encouraging,,An action undertaken by the teacher for the pu...,"Examples: A teacher comments: ""Keep trying."" ""...",cat2,0,"Classroom Management, Teaching Strategies",False
27,27,encouraging,,An action undertaken by the teacher for the pu...,"Examples: A teacher comments: ""Keep trying."" ""...",cat4,0,"Classroom Management, Teaching Strategies",True
28,28,engaging,(engagement),A student is actively involved with an educati...,Examples: A student keeps working on solving a...,cat3,0,"Learning Strategies, Teaching Strategies",False
29,29,engaging,(engagement),A student is actively involved with an educati...,Examples: A student keeps working on solving a...,cat4,0,"Learning Strategies, Teaching Strategies",True


In [91]:
df.head(2)

Unnamed: 0,id,term,alt,descr,egs,cat,amount,catdescr,dup
0,0,answering questions,,The activity of responding to a question or a ...,Example: The teacher asks: “What’s the area of...,cat3,0,"Learning Strategies, Teaching Strategies",False
1,1,answering questions,,The activity of responding to a question or a ...,Example: The teacher asks: “What’s the area of...,cat4,0,"Learning Strategies, Teaching Strategies",True


In [92]:
df['amount'] = df['catdescr'].apply(lambda x: len(x.split(',')))
df.head(2)

Unnamed: 0,id,term,alt,descr,egs,cat,amount,catdescr,dup
0,0,answering questions,,The activity of responding to a question or a ...,Example: The teacher asks: “What’s the area of...,cat3,2,"Learning Strategies, Teaching Strategies",False
1,1,answering questions,,The activity of responding to a question or a ...,Example: The teacher asks: “What’s the area of...,cat4,2,"Learning Strategies, Teaching Strategies",True


In [93]:
df.to_csv('ausdata2.csv',index=False)

In [94]:
df[['catdescr','cat']].drop_duplicates()

Unnamed: 0,catdescr,cat
0,"Learning Strategies, Teaching Strategies",cat3
1,"Learning Strategies, Teaching Strategies",cat4
4,"Administration, Assessment",cat0
5,"Administration, Assessment",cat1
6,"Administration, Teaching Strategies",cat0
7,"Administration, Teaching Strategies",cat4
10,"Administration, Learning Strategies, Teaching ...",cat0
11,"Administration, Learning Strategies, Teaching ...",cat3
12,"Administration, Learning Strategies, Teaching ...",cat4
15,Administration,cat0


In [95]:
df[['catdescr','cat']].drop_duplicates().to_csv('AusCatData.csv',index=False)

In [96]:
df.head()

Unnamed: 0,id,term,alt,descr,egs,cat,amount,catdescr,dup
0,0,answering questions,,The activity of responding to a question or a ...,Example: The teacher asks: “What’s the area of...,cat3,2,"Learning Strategies, Teaching Strategies",False
1,1,answering questions,,The activity of responding to a question or a ...,Example: The teacher asks: “What’s the area of...,cat4,2,"Learning Strategies, Teaching Strategies",True
2,2,applying,,An activity in which a taught procedure or con...,Examples: Having been taught Pythagoras theore...,cat3,2,"Learning Strategies, Teaching Strategies",False
3,3,applying,,An activity in which a taught procedure or con...,Examples: Having been taught Pythagoras theore...,cat4,2,"Learning Strategies, Teaching Strategies",True
4,4,assessment,,An activity undertaken by teacher or students ...,Examples: The teacher administers a test. The ...,cat0,2,"Administration, Assessment",False
