In [1]:
import json
import spacy
from spacy.matcher import Matcher

# Load the spaCy English model
nlp = spacy.load("en_core_web_sm")

# Initialize the Matcher with the shared vocabulary
matcher = Matcher(nlp.vocab)

# Define patterns to match
patterns = [
    # Pattern for winners: [Person] wins [Award]
    {
        "label": "WINNER",
        "pattern": [
            {"ENT_TYPE": "PERSON", "OP": "+"},
            {"LEMMA": "win"},
            {"POS": "DET", "OP": "?"},
            {"POS": "PROPN", "OP": "+"}
        ]
    },
    # Pattern for nominees: [Person] is nominated for [Award]
    {
        "label": "NOMINEE",
        "pattern": [
            {"ENT_TYPE": "PERSON", "OP": "+"},
            {"LOWER": "is"},
            {"LEMMA": "nominate"},
            {"LOWER": "for"},
            {"POS": "DET", "OP": "?"},
            {"POS": "PROPN", "OP": "+"}
        ]
    },
    # Pattern for speakers: [Person] will be speaking at [Event]
    {
        "label": "SPEAKER",
        "pattern": [
            {"ENT_TYPE": "PERSON", "OP": "+"},
            {"LOWER": "will"},
            {"LOWER": "be"},
            {"LEMMA": "speak"},
            {"LOWER": "at"},
            {"POS": "DET", "OP": "?"},
            {"POS": "PROPN", "OP": "+"}
        ]
    },
    # Pattern for award names in context: Congratulations to [Person] on winning [Award]
    {
        "label": "AWARD_WIN",
        "pattern": [
            {"LOWER": "congratulations"},
            {"LOWER": "to"},
            {"ENT_TYPE": "PERSON", "OP": "+"},
            {"LOWER": "on"},
            {"LEMMA": "win"},
            {"POS": "DET", "OP": "?"},
            {"POS": "PROPN", "OP": "+"}
        ]
    }
]

# Add patterns to the matcher
for pattern in patterns:
    matcher.add(pattern["label"], [pattern["pattern"]])

# Load tweets from JSON file
with open('gg2013.json', 'r') as f:
    tweets = json.load(f)

# Process each tweet
for tweet in tweets:
    text = tweet.get('text', '')
    doc = nlp(text)
    matches = matcher(doc)

    for match_id, start, end in matches:
        span = doc[start:end]
        label = nlp.vocab.strings[match_id]

        # Initialize variables
        person = ''
        award = ''
        event = ''

        # Extract information based on the label
        if label == "WINNER" or label == "AWARD_WIN":
            # Extract person and award
            person_tokens = []
            award_tokens = []
            # Iterate over the tokens to separate person and award
            for token in span:
                if token.ent_type_ == "PERSON":
                    person_tokens.append(token.text)
                elif token.pos_ == "PROPN" or token.ent_type_ in ["WORK_OF_ART", "EVENT"]:
                    award_tokens.append(token.text)
            person = ' '.join(person_tokens)
            award = ' '.join(award_tokens)
            print(f"Winner: {person}, Award: {award}")

        elif label == "NOMINEE":
            # Extract nominee and award
            person_tokens = []
            award_tokens = []
            for token in span:
                if token.ent_type_ == "PERSON":
                    person_tokens.append(token.text)
                elif token.pos_ == "PROPN" or token.ent_type_ in ["WORK_OF_ART", "EVENT"]:
                    award_tokens.append(token.text)
            person = ' '.join(person_tokens)
            award = ' '.join(award_tokens)
            print(f"Nominee: {person}, Award: {award}")

        elif label == "PRESENTER":
            # Extract speaker and event
            person_tokens = []
            event_tokens = []
            for token in span:
                if token.ent_type_ == "PERSON":
                    person_tokens.append(token.text)
                elif token.pos_ == "PROPN" or token.ent_type_ == "EVENT":
                    event_tokens.append(token.text)
            person = ' '.join(person_tokens)
            event = ' '.join(event_tokens)
            print(f"Speaker: {person}, Event: {event}")


Winner: Adele, Award: Globe
Winner: Adele, Award: Globe
Winner: Murray, Award: beard
Winner: Bill Murray, Award: beard
Winner: Waltz, Award: Golden
Winner: Christoph Waltz, Award: Golden
Winner: Waltz, Award: Golden Globe
Winner: Christoph Waltz, Award: Golden Globe
Winner: Waltz, Award: Golden
Winner: Christoph Waltz, Award: Golden
Winner: Waltz, Award: Golden Globe
Winner: Christoph Waltz, Award: Golden Globe
Winner: Waltz, Award: Golden
Winner: Christoph Waltz, Award: Golden
Winner: Waltz, Award: Golden Globe
Winner: Christoph Waltz, Award: Golden Globe
Winner: Waltz Golden, Award: 
Winner: Christoph Waltz Golden, Award: 
Winner: Waltz Golden Globe, Award: 
Winner: Christoph Waltz Golden Globe, Award: 
Winner: Waltz, Award: Golden
Winner: Christoph Waltz, Award: Golden
Winner: Waltz, Award: Golden Globe
Winner: Christoph Waltz, Award: Golden Globe
Winner: Waltz, Award: Golden
Winner: Christoph Waltz, Award: Golden
Winner: Waltz, Award: Golden Globe
Winner: Christoph Waltz, Award: Go

KeyboardInterrupt: 