In [1]:
# Loading Packages
library(tidyverse)
library(easystats)

library(arrow) # For reading .parquet files, which are much smaller
library(data.table)
library(dtplyr)

library(ggplot2)
library(ggthemes)
library(patchwork)

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mggplot2  [39m 4.0.1     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.4     [32m✔[39m [34mtidyr    [39m 1.3.1
[32m✔[39m [34mpurrr    [39m 1.0.4     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors
[34m# Attaching packages: easystats 0.7.5[39m
[32m✔ [39m[37mbayestestR [39m [32m0.17.0[39m   [32m✔ [39m[37mcorrelation[39m [32m0.8.8 [39m
[32m✔ [39m[37m

In [107]:
context <- function(dt, target_id, num=3) {

    u_ids <- unique(dt$Sentence_ID)
    

    target_pos <- match(target_id, u_ids)
    
    if (is.na(target_pos)) return(NULL)
    

    start_idx <- max(1, target_pos - num)
    end_idx   <- target_pos - 1
    
    prev_ids <- if (target_pos > 1) u_ids[start_idx:end_idx] else integer(0)
    
    dt_context <- dt[.(prev_ids), on = "Sentence_ID", mult = "first"]
    dt_context[, Phrase_Token := ""]
    dt_context[, Phrase_Surprisal := NA]
    
    dt_target <- dt[Sentence_ID == target_id]
    
    result <- rbind(dt_context, dt_target)

    result <- result %>% select(-grp_idx)

    as_tibble(result)
    }

    context_full <- function(dt, target_id, num=3) {
    dt[, grp_idx := rleid(Sentence_ID)]
    target_grp <- dt[Sentence_ID == target_id, grp_idx[1]]
    result <- dt[grp_idx >= (target_grp - num) & grp_idx <= target_grp]
    result <- result %>% select(-grp_idx)
    as_tibble(result)
}

In [2]:
df_sent <- open_dataset("Data/Filtered Sentences.parquet") %>% collect()
df_nps <- open_dataset("Data/Head Nouns.parquet") %>% collect()


In [4]:
 df_full <- open_dataset("Data/Full Data.parquet") %>% 
    select(Sentence_Text, Phrase_Token, Phrase_Surprisal, Sentence_ID) %>% 
    collect() %>% 
    setDT()

In [106]:
df_nps %>% 
    filter(argPos == "obj") %>% 
    select(Sentence_Text, np_start_idx, Phrase_Token, surprisal, Sentence_ID) %>%
    arrange(-surprisal) %>% 
    head(20)

Sentence_Text,np_start_idx,Phrase_Token,surprisal,Sentence_ID
<chr>,<dbl>,<chr>,<dbl>,<chr>
Jessamy suddenly ran patience .,3,patience,18.73438,H8F_1563
The rehearsals promise texture .,3,texture,18.42188,FBL_1819
She tried attack .,2,attack,17.54688,C85_1336
ACTRESS Jane Fonda last night bowed movies .,6,movies,17.53125,CH6_4680
I asked Punjab .,2,Punjab,17.39062,H89_0272
Chrissie sidled Iris .,2,Iris,17.1875,GVP_1798
We had acknowledgement . ’,2,acknowledgement,16.57812,HWA_3574
He had Claire .,2,Claire,16.46875,JXW_4142
She set Grimm down .,2,Grimm,16.20312,CM4_0694
I want Philippe Beirut .,2,Philippe Beirut,16.01953,CEC_2248


In [116]:
context(df_full, "JYF_2365", 15)

Sentence_Text,Phrase_Token,Phrase_Surprisal,Sentence_ID
<chr>,<chr>,<dbl>,<chr>
"‘ You allowed me to deflect your questions far too easily , ’ he answered , ‘ Is it any wonder that from almost the first moment I saw you … ’ he paused ‘ …",,,JYF_2350
I should start to be intrigued by you ? ’,,,JYF_2351
"‘ Oh , ’ Fabia murmured again .",,,JYF_2352
"But she instructed her fast-beating heart not to be so idiotic , that Ven meant nothing by that that he was intrigued that her journalistic methods were unlike those of any other journalist he had met .",,,JYF_2353
"‘ Urm — so — hmm — how did you find out , about Cara being married ? ’",,,JYF_2354
He shrugged .,,,JYF_2355
‘ It was quite simple .,,,JYF_2356
I rang magazine . ’,,,JYF_2357
Fabia 's mouth fell open — she had n't thought of that — though belatedly realised then that it was a fairly obvious thing for him to do .,,,JYF_2358
‘ You wanted to authenticate that I was who I said I was ? ’ she questioned .,,,JYF_2359


Sentence_Text,Phrase_Token,Phrase_Surprisal,Sentence_ID
<chr>,<chr>,<dbl>,<chr>
‘ And so you try to make it shorter . ’,,,FNT_4186
She looked at him for a moment and burst out laughing .,,,FNT_4187
‘ Are you offering that as an interpretation ? ’,,,FNT_4188
Jacob shook his head sadly .,Jacob,9.21875,FNT_4189
Jacob shook his head sadly .,shook,3.44335938,FNT_4189
Jacob shook his head sadly .,his head,0.01050377,FNT_4189
Jacob shook his head sadly .,his head,0.01050377,FNT_4189
Jacob shook his head sadly .,sadly,4.9609375,FNT_4189
Jacob shook his head sadly .,.,0.51953125,FNT_4189


In [68]:
# Pulls the preceding sentences from the full dataset to view context window

df_full[, grp_idx := rleid(Sentence_ID)]
target_grp <- df_full[Sentence_ID == "EF4_0684", grp_idx[1]]
result <- df_full[grp_idx >= (target_grp - 3) & grp_idx <= target_grp]

as_tibble(result)

Sentence_Text,Phrase_Token,Phrase_Surprisal,Sentence_ID,grp_idx
<chr>,<chr>,<dbl>,<chr>,<int>
"And since that assumption is implicit in the Webbs ' criticism of Owen , it can not be understood too clearly that on their argument there can be no place for industrial co-operatives , properly so-called : no place , because the ownership , whether individual or collective , of an industrial co-operative rests in the people working in it .",And,3.6738281,EF4_0681,2178987
"And since that assumption is implicit in the Webbs ' criticism of Owen , it can not be understood too clearly that on their argument there can be no place for industrial co-operatives , properly so-called : no place , because the ownership , whether individual or collective , of an industrial co-operative rests in the people working in it .",since,5.0000000,EF4_0681,2178987
"And since that assumption is implicit in the Webbs ' criticism of Owen , it can not be understood too clearly that on their argument there can be no place for industrial co-operatives , properly so-called : no place , because the ownership , whether individual or collective , of an industrial co-operative rests in the people working in it .",that assumption,2.2856445,EF4_0681,2178987
"And since that assumption is implicit in the Webbs ' criticism of Owen , it can not be understood too clearly that on their argument there can be no place for industrial co-operatives , properly so-called : no place , because the ownership , whether individual or collective , of an industrial co-operative rests in the people working in it .",that assumption,2.2856445,EF4_0681,2178987
"And since that assumption is implicit in the Webbs ' criticism of Owen , it can not be understood too clearly that on their argument there can be no place for industrial co-operatives , properly so-called : no place , because the ownership , whether individual or collective , of an industrial co-operative rests in the people working in it .",is,0.4821777,EF4_0681,2178987
"And since that assumption is implicit in the Webbs ' criticism of Owen , it can not be understood too clearly that on their argument there can be no place for industrial co-operatives , properly so-called : no place , because the ownership , whether individual or collective , of an industrial co-operative rests in the people working in it .",implicit,6.7304688,EF4_0681,2178987
"And since that assumption is implicit in the Webbs ' criticism of Owen , it can not be understood too clearly that on their argument there can be no place for industrial co-operatives , properly so-called : no place , because the ownership , whether individual or collective , of an industrial co-operative rests in the people working in it .",in,0.2346191,EF4_0681,2178987
"And since that assumption is implicit in the Webbs ' criticism of Owen , it can not be understood too clearly that on their argument there can be no place for industrial co-operatives , properly so-called : no place , because the ownership , whether individual or collective , of an industrial co-operative rests in the people working in it .",the Webbs ' criticism,3.7994385,EF4_0681,2178987
"And since that assumption is implicit in the Webbs ' criticism of Owen , it can not be understood too clearly that on their argument there can be no place for industrial co-operatives , properly so-called : no place , because the ownership , whether individual or collective , of an industrial co-operative rests in the people working in it .",the Webbs ' criticism,3.7994385,EF4_0681,2178987
"And since that assumption is implicit in the Webbs ' criticism of Owen , it can not be understood too clearly that on their argument there can be no place for industrial co-operatives , properly so-called : no place , because the ownership , whether individual or collective , of an industrial co-operative rests in the people working in it .",the Webbs ' criticism,3.7994385,EF4_0681,2178987
