In [1]:
import spacy
import classy_classification

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
!python -m spacy download en_core_web_md

Collecting en-core-web-md==3.2.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.2.0/en_core_web_md-3.2.0-py3-none-any.whl (45.7 MB)
[+] Download and installation successful
You can now load the package via spacy.load('en_core_web_md')


In [3]:
data = {
    "furniture": ["This text is about chairs.",
               "Couches, benches and televisions.",
               "I really need to get a new sofa."],
    "kitchen": ["There also exist things like fridges.",
                "I hope to be getting a new stove today.",
                "Do you also have some ovens."]
}

nlp = spacy.load("en_core_web_md")
nlp.add_pipe(
    "text_categorizer", 
    config={
        "data": data, 
        "model": "spacy"
    }
) 

print(nlp("I am looking for kitchen appliances.")._.cats)

Fitting 2 folds for each of 6 candidates, totalling 12 fits
{'furniture': 0.36584954541133713, 'kitchen': 0.6341504545886628}


In [4]:
nlp = spacy.blank("en")
nlp.add_pipe(
    "text_categorizer", 
    config={
        "data": data, 
        "model": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
        "device": "cpu"
    }
) 

print(nlp("I am looking for kitchen appliances.")._.cats)

Fitting 2 folds for each of 6 candidates, totalling 12 fits
{'furniture': 0.14032856821897882, 'kitchen': 0.8596714317810211}


In [5]:
data = {
    "furniture": ["This text is about chairs.",
               "Couches, benches and televisions.",
               "I really need to get a new sofa."],
}


In [6]:
with open ('data/fear.txt', "r") as f:
    fear = f.read().splitlines()

In [7]:
fear

['He strode over to the sofa where Dudley sat frozen with fear.',
 "Reckon Dumbledore's the only one You-Know-Who was afraid of.",
 'Scared of the students, scared of his own subject now, where\'s me umbrella?"']

In [8]:
data["fear"] = fear

In [9]:
data

{'furniture': ['This text is about chairs.',
  'Couches, benches and televisions.',
  'I really need to get a new sofa.'],
 'fear': ['He strode over to the sofa where Dudley sat frozen with fear.',
  "Reckon Dumbledore's the only one You-Know-Who was afraid of.",
  'Scared of the students, scared of his own subject now, where\'s me umbrella?"']}

In [10]:
nlp = spacy.blank("en")
nlp.add_pipe(
    "text_categorizer", 
    config={
        "data": data, 
        "model": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
        "device": "cpu"
    }
) 

Fitting 2 folds for each of 6 candidates, totalling 12 fits


<classy_classification.classifiers.spacy_few_shot_external.classySpacyFewShotExternal at 0x1e5c8f6ea88>

In [11]:
sentence_model = spacy.blank("en")

In [12]:
sentence_model.add_pipe("sentencizer")

<spacy.pipeline.sentencizer.Sentencizer at 0x1e5be652908>

In [13]:
with open ("data/harry_potter_cleaned.txt", "r") as f:
    text = f.read()

In [22]:
sentences = sentence_model(text)

In [23]:
final_data = []
for sentence in sentences.sents:
    doc = nlp(sentence.text)
    final_data.append({"sentence": doc.text, "cats": doc._.cats})

In [33]:
final_data[0]

{'sentence': 'Mr. and Mrs. Dursley, of number four, Privet Drive, were proud to say that they were perfectly normal, thank you very much.',
 'cats': {'fear': 0.491562778224367, 'furniture': 0.508437221775633}}

In [38]:
for item in final_data:
    if item["cats"]["fear"] > .8:
        print (item["sentence"].strip())
        print (item["cats"])
        print ()

The Dursleys had a small son called Dudley and in their opinion there was no finer boy anywhere.
{'fear': 0.8069044861590964, 'furniture': 0.19309551384090365}

The Dursleys had everything they wanted, but they also had a secret, and their greatest fear was that somebody would discover it.
{'fear': 0.8315565498986689, 'furniture': 0.1684434501013312}

At half past eight, Mr. Dursley picked up his briefcase, pecked Mrs. Dursley on the cheek, and tried to kiss Dudley good-bye but missed, because Dudley was now having a tantrum and throwing his cereal at the walls. "
{'fear': 0.8293892762654316, 'furniture': 0.17061072373456837}

For a second, Mr. Dursley didn't realize what he had seen -- then he jerked his head around to look again.
{'fear': 0.8415876523365974, 'furniture': 0.1584123476634027}

What could he have been thinking of?
{'fear': 0.8365429154382247, 'furniture': 0.16345708456177555}

Mr. Dursley blinked and stared at the cat.
{'fear': 0.826508680436725, 'furniture': 0.17349131