In [None]:
# try to replicate these using spacy:

# agent: agent
# An agent is the complement of a passive verb which is introduced by the preposition “by” and does the
# action. This relation only appears in the collapsed dependencies, where it can replace prep by, where
# appropriate. It does not appear in basic dependencies output.
# “The man has been killed by the police” agent(killed, police)
# “Effects caused by the protein are important” agent(caused, protein)

In [3]:
import spacy

nlp = spacy.load("en_core_web_sm")

In [4]:
sentence = "The man has been killed by the police"

doc = nlp(sentence)

for token in doc:
    if token.dep_ == "agent":
        print(token.head.text, "--", "agent", "-->", token.text)
        print("\n")

killed -- agent --> by




In [7]:
dep_label = "ccomp"

sent1 = "He says that you like to swim"
sent2 = "I am certain that he did it"
sent3 = "I admire the fact that you are honest"

for sent in [sent1, sent2, sent3]:
    doc = nlp(sent)
    for token in doc:
        if token.dep_ == dep_label:
            print(token.head.text, "--", dep_label, "-->", token.text)
            print("\n")
    print("-------------------")

says -- ccomp --> like


-------------------
certain -- ccomp --> did


-------------------
-------------------


In [9]:
dep_label = "csubj"

sent1 = "What she said is not true"
sent2 = "What she said makes sense"

for sent in [sent1, sent2]:
    doc = nlp(sent)
    for token in doc:
        if token.dep_ == dep_label:
            print(token.head.text, "--", dep_label, "-->", token.text)
            print("\n")
    print("-------------------")

is -- csubj --> said


-------------------
makes -- csubj --> said


-------------------


In [10]:
# write down all the dependecies between entities in the sentence Bill is big and honest

sentence = "Bill is big and honest"

doc = nlp(sentence)

for token in doc:
    print(token.text, "-->", token.dep_, "-->", token.head.text)
    print("\n")

Bill --> nsubj --> is


is --> ROOT --> is


big --> acomp --> is


and --> cc --> big


honest --> conj --> big




In [11]:
# same for the sentence They heard about you missing classes

sentence = "They heard about you missing classes"

doc = nlp(sentence)

for token in doc:
    print(token.text, "-->", token.dep_, "-->", token.head.text)
    print("\n")

They --> nsubj --> heard


heard --> ROOT --> heard


about --> prep --> heard


you --> nsubj --> missing


missing --> pcomp --> about


classes --> dobj --> heard




In [12]:
sentence = "We have no information on whether users are at risk"

doc = nlp(sentence)

for token in doc:
    print(token.text, "-->", token.dep_, "-->", token.head.text)
    print("\n")

We --> nsubj --> have


have --> ROOT --> have


no --> det --> information


information --> dobj --> have


on --> prep --> information


whether --> mark --> are


users --> nsubj --> are


are --> pcomp --> on


at --> prep --> are


risk --> pobj --> at




In [13]:
# same for The guy, John said, left early in the morning

sentence = "The guy, John said, left early in the morning"

doc = nlp(sentence)

for token in doc:
    print(token.text, "-->", token.dep_, "-->", token.head.text)
    print("\n")

The --> det --> guy


guy --> nsubj --> left


, --> punct --> said


John --> nsubj --> said


said --> parataxis --> left


, --> punct --> said


left --> ROOT --> left


early --> advmod --> in


in --> prep --> left


the --> det --> morning


morning --> pobj --> in




In [19]:
sentences = [
    "Truffles picked during the spring are tasty. Points to establish are the following. I don't have anything to say to you. Bill tried to shoot demonstrating his incompetence. I saw the man you love.",
]

for sentence in sentences:
    print("Sentence:", sentence)
    doc = nlp(sentence)

    for token in doc:
        print(token.text, "-->", token.dep_, "-->", token.head.text)
        print("\n")
    print("-------------------")

Sentence: Truffles picked during the spring are tasty. Points to establish are the following. I don't have anything to say to you. Bill tried to shoot demonstrating his incompetence. I saw the man you love.
Truffles --> nsubj --> are


picked --> acl --> Truffles


during --> prep --> picked


the --> det --> spring


spring --> pobj --> during


are --> ROOT --> are


tasty --> acomp --> are


. --> punct --> are


Points --> nsubj --> are


to --> aux --> establish


establish --> relcl --> Points


are --> ROOT --> are


the --> det --> following


following --> attr --> are


. --> punct --> are


I --> nsubj --> have


do --> aux --> have


n't --> neg --> have


have --> ROOT --> have


anything --> dobj --> have


to --> aux --> say


say --> relcl --> anything


to --> prep --> say


you --> pobj --> to


. --> punct --> have


Bill --> nsubj --> tried


tried --> ROOT --> tried


to --> aux --> shoot


shoot --> xcomp --> tried


demonstrating --> xcomp --> shoot


his --> pos

In [5]:
import re


def extract_plain_text(markdown_text):
    # Remove markdown links (e.g., [text](url))
    plain_text = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", markdown_text)
    # Remove bold and italic markdown formatting (*text* or **text**)
    plain_text = re.sub(r"\*{1,2}([^\*]+)\*{1,2}", r"\1", plain_text)
    # Remove markdown headings or dividers (e.g., "---", "### Text")
    plain_text = re.sub(r"^\s*[-#]+.*$", "", plain_text, flags=re.MULTILINE)
    # Replace escaped newlines with actual newlines
    plain_text = re.sub(r"\\n", "\n", plain_text)
    # Strip leading/trailing whitespace
    # remove all the extra white spaces
    plain_text = re.sub(r"\s+", " ", plain_text)
    return plain_text.strip()


# Example text
# markdown_text = """Hi u/QiDogQi, this submission has been removed.\n\nNSFW responses to non-NSFW prompts are not allowed and this prompt is likely to generate them \n\n* *This was removed [based on the comments it's likely to attract](https://www.reddit.com/r/WritingPrompts/wiki/rules#wiki_rule_7.3A_prompts_will_be_removed_if_there.27s_a_high_possibility_for_rule_breaking_responses), specifically via [Rule 2](https://www.reddit.com/r/WritingPrompts/wiki/rules#wiki_rule_2.3A_no_explicitly_sexual_responses.2C_hate_speech.2C_or_other_harmful_content)*\n\n---\n\nNo prompts prone to comment infractions. See [here](https://www.reddit.com/r/WritingPrompts/wiki/rules#wiki_rule_7.3A_prompts_will_be_removed_if_there.27s_a_high_possibility_for_rule_breaking_responses) for more info. \n\n* *From Rule 7: [Prompts will be removed if there's a high possibility for rule breaking responses](https://www.reddit.com/r/WritingPrompts/wiki/rules#wiki_rule_7.3A_prompts_will_be_removed_if_there.27s_a_high_possibility_for_rule_breaking_responses)*\n\n---\n\n\n\n---\n\n[Modmail](https://www.reddit.com/message/compose?to=%2Fr%2FWritingPrompts&subject=Removed%20post&message=https://www.reddit.com/r/WritingPrompts/comments/jy8l3n/-/%0A%0A) us if you have any questions or concerns.  In the future, please refer to the [sidebar](https://www.reddit.com/r/WritingPrompts/wiki/config/sidebar) before posting. \n\n*This action was not automated and this moderator is human. Time to go do human things.*"""

markdown_text = """Penny reached for her holographic tablet. Again. He’d done it again. At her tender age, Penny should be thinking about school, debate society, boys... Anything, but Uncle Gadget.\n\nHer grades were suffering: she’d never had a ‘B+’ before, particularly not in Comp-Sci. She could program fluently in six languages and hack the Pentagon. So HOW did SHE get a ‘B+’? Gadget. She fell asleep 14 minutes into the hour, all of her answers right...and then blank. Her professor didn’t know about her secret life, of course. Just assumed she’d been out too late with friends the night before or online gaming. \n\nBut, no. It was Thursday and already she’d been up late three nights in a row fixing Gadget’s mistakes and saving the world YET AGAIN from Dr Claw and his evil, but decidedly cute, cat. \n\nUncle Gadget was loyal. He took care of her. Let her see a world other kids her age and even most adults would never know existed: Dr Claw’s shadow cabal.\n\nPenny knew she should fear Claw, but he played by the rules of logic. Claw was smart. Each villainous plan was carefully engineered. She hated to admit it, but she admired the thought he put into his work. Defeating him was challenging to be sure, but also kind of fun. Each new case was a problem to crack. Despite her youth, Penny had years of experience under her belt and a mind to match. She could defeat Claw, and for the most part, did. But Uncle Gadget was another matter entirely.\n\nBefore she died, Mom used to say her brother, Gadget, was ‘a bit special’. Kind heart, but not the brightest bulb. Which must have been tough growing up in a family with several Nobel Prize winners, 314 patents, and a global computing empire. And that was just among Mom, Uncle Bob, Grandpa, and Grandma. To be sure, Gadget benefited from ‘off books’ funding from the family empire. There was no other way he could finance his crazy detective business before Penny started as an intern once he took over her care. Who would pay for Gadget’s ‘uneven’ results? \n\nAnd it was these ‘uneven results’ that were now killing her. Penny was nothing if not rational. Evil plots that could be foiled in a couple hours after school were drawn out because Gadget insisted on testing his latest stupid invention on each caper. The moronic umbrella parachute hat. His idiotic extendable arms. Whatever he could hide under his beige trench coat. \n\nThe coat was really the source of many of Penny’s problems. If she could just see in advance what latest ‘gadget’ her Uncle was bringing to the battle, she could have planned around it. But no, Gadget liked to surprise her. His grand unveilings would be the death of her, if she wasn’t careful. The proverbial wrench in each of Penny’s carefully-crafted counter plans. \n\nDecrypting Claw’s latest plan from his AES-256 bit system was a cakewalk. It helped, of course, that Penny’s Uncle Bob had invented it. Then gather the requisite mission tools from Grandma’s top-secret advanced prototyping center. Pick up Uncle Gadget, as according to Gram, even the world’s greatest detective ‘needed a chaperone at age ten.’ Hop onto one of the family’s private supersonic jets to defeat Claw and get back in time for homework. \n\nEasy as pie, except the Gadget part. Much as she loved him, Uncle G always added hours Penny did not have to her missions. \n\nIf she was going to complete her specially created combo Harvard undergrad / JD / MBA / Comp Sci and Criminology PHDs by 13, she really needed a bit more spare time...\n\nMaybe she could at least get Uncle G to ditch the coat and its myriad surprises. That should save an hour or two."""

# Extract plain text
plain_text = extract_plain_text(markdown_text)
print(len(plain_text.split()))
print(plain_text)

633
Penny reached for her holographic tablet. Again. He’d done it again. At her tender age, Penny should be thinking about school, debate society, boys... Anything, but Uncle Gadget. Her grades were suffering: she’d never had a ‘B+’ before, particularly not in Comp-Sci. She could program fluently in six languages and hack the Pentagon. So HOW did SHE get a ‘B+’? Gadget. She fell asleep 14 minutes into the hour, all of her answers right...and then blank. Her professor didn’t know about her secret life, of course. Just assumed she’d been out too late with friends the night before or online gaming. But, no. It was Thursday and already she’d been up late three nights in a row fixing Gadget’s mistakes and saving the world YET AGAIN from Dr Claw and his evil, but decidedly cute, cat. Uncle Gadget was loyal. He took care of her. Let her see a world other kids her age and even most adults would never know existed: Dr Claw’s shadow cabal. Penny knew she should fear Claw, but he played by the ru