In [4]:
import spacy
import spacy_transformers
from nltk.tokenize import sent_tokenize
import os
import re
import xlsxwriter
import pandas

In [5]:
nlp = spacy.load("en_core_web_trf")

In [6]:
def open_file(filename):
    with open(filename, encoding='utf-8') as f:
        text = f.read()
    return text

In [7]:
def writeln(errors, filename):
    workbook = xlsxwriter.Workbook(filename)
    worksheet = workbook.add_worksheet()
    #Fill in the column names
    for x in range(len(errors[0])):
        worksheet.write(0, x, errors[0][x])

    col = 0
    for row_num in range(1, len(errors)):
        error = errors[row_num]
        for x in range(len(error)):
            worksheet.write(row_num, col + x, error[x])
    workbook.close()

In [1]:
def find_subj(pred):
    # print(pred, [child.text for child in pred.lefts]) use pred.lefts to find quotes
    # simple cases
    subjects = []
    subjects = [child for child in list(pred.children) if child.dep_.startswith(('nsubj', 'csubj'))]

    # handling 'there is' and 'there are' cases
    if 'there' in list(i.text.lower() for i in pred.children):
        subjects += [child for child in list(pred.children) if child.dep_ == 'attr']

    # if predicate is an auxiliary, we want to take subjects of its head
    if pred.dep_.startswith('aux'):
        subjects += [child for child in list(pred.head.children) if child.dep_.startswith(('nsubj', 'csubj'))]
    
    # handling conjuncts: multiple subjects as in 'Mother and father are key figures in a child's life'.
    add_subj = []
    for subject in subjects:
        add_subj += list(subject.conjuncts)

    cur_pred = pred
    while len(subjects) == 0 and cur_pred.dep_ == "conj":
        cur_pred = cur_pred.head
        subjects = find_subj(cur_pred)

    subjects += add_subj

    # the subjects' order may be different from sentence order, so we arrange it right
    subjects.sort(key=lambda subj: subj.i)

    if len(subjects) == 1 and subjects[0].text.lower() in ['who', 'that', 'which'] and pred.dep_ == 'relcl':
        subjects = [pred.head]

    return subjects

In [9]:
def find_pred_subj(doc):
    pred_sub = list()
    for token in doc:
        if token.pos_ in ['AUX', 'VERB']:
            # negation: cases like "He doesn't scare me"   
            aux = None
            if [ch for ch in list(token.lefts) if ch.dep_ == 'neg']:
                children = list(token.children)
                for ch in children:
                    if ch.dep_ == 'aux' and not aux:
                        aux = ch
                if aux:
                    pred_sub += [(aux, find_subj(aux))]


            # if the predicate is analytical like 'I have done',
            # spacy rightfully considers the participle to be the root,
            # but we need grammatical info, so we will consider aux the root
            if not aux: # for when negation is expressed with 'never' etc and does not need aux support
                if token.tag_ in ['VBN', 'VBG']:
                    aux = None
                    children = list(token.children)
                    for ch in children:
                        if ch.dep_ == 'aux' and ch.pos_ in ['VERB', 'AUX'] and ch.tag_ != 'VBN':
                            aux = ch
                        elif not aux and ch.dep_ == 'auxpass' and ch.pos_ in ['VERB', 'AUX']:
                            aux = ch
                    if aux:
                        pred_sub += [(aux, find_subj(aux))]

                # all other cases
                elif token.dep_ in ['ROOT', 'ccomp', 'xcomp', 'acl', 'relcl']:
                    pred_sub += [(token, find_subj(token))]

                # conjuncts: when there are multiple predicates connected by conjunction
                elif token.dep_ == 'conj' and token.head.dep_ in ['ROOT', 'ccomp', 'xcomp', 'acl', 'relcl']:
                    pred_sub += [(token, find_subj(token))]
                
    return pred_sub

In [56]:
def errors(ps):
    res = []
    for pair in ps:
        subj_agr, pred_agr = None, None #what each must agree with, variables must coincide in the end
        pred = pair[0]
        subj = pair[1]
        if len(subj) == 1:
            subject = subj[0]
            s = subject.text.lower()
            subject_left_children = subject.lefts
            subject_is_numeral = False
            for child in subject_left_children:
                if child.pos_ == 'NUM':
                    subject_is_numeral = True
                    break
            if subject_is_numeral:
                continue
            if s not in ambiguous:
                children = list(ch for ch in subject.children)
                children_text = list(ch.text.lower() for ch in children)
                
                #singular only pronouns
                if s in sing_only or subject.ent_type_ == 'ORG':
                    subj_agr = 'sg'
                elif subject.tag_ == 'VB':
                    subj_agr = 'sg'
                    
                # either singular or plural pronouns
                # if they have an 'of N, N, N...', after them we will require check if verb agrees with the last noun
                elif (s in {'some', 'any', 'none', 'all', 'most'} \
                      and 'of' in children_text):
                    of = [ch for ch in children if ch.text.lower() == 'of'][0]
                    noun = [ch for ch in of.children if ch.pos_ == 'NOUN']
                    if noun:
                        noun = noun[0]
                        while [ch for ch in noun.children if (ch.dep_ == 'conj' and ch.pos_ == 'NOUN')]:
                            noun = [ch for ch in noun.children if (ch.dep_ == 'conj' and ch.pos_ == 'NOUN')][-1]
                        if noun.tag_ in ['NNS', 'NNPS'] or noun.text.lower() in plur_only:
                            subj_agr = 'pl'
                        elif noun.tag_ in ['NN', 'NNP'] or noun.text.lower() in sing_only:
                            subj_agr = 'sg'
                    
                # plural only pronouns
                elif s in plur_only and not children:
                    subj_agr = 'pl'
                    
                elif s in {'i', 'we', 'you', 'they'}:
                    subj_agr = 'pl'
                elif s in {'he', 'she', 'it'}:
                    subj_agr = 'sg'
                    
                elif s == 'number':
                    if 'a' in children_text and 'of' in children_text:
                        subj_agr = 'pl'
                    else: subj_agr = 'sg'
                    
                # predicates in non-head clauses with 'who', 'that' agree with noun in head clause
                elif s in ['who', 'that', 'which']:
                    if pred.dep_ == 'relcl': 
                        print("This is probably a bug!")
                        #why relcl? we can only be sure about this tag that it is the case we're looking for.
                        #other possible predicate tags include '...comp', but these also apply
                        #in cases like "I asked the boys who was the winner",
                        #which, although with incorrect word order,
                        #are still clearly present in Russian essays
                        #and will be parsed by spacy as 'ccomp'
                        head = pred.head
                        if not head.conjuncts:
                            if head.tag_ in ['NNS', 'NNPS'] or head.text.lower() in plur_only:
                                subj_agr = 'pl'
                            elif head.tag_ in ['NN', 'NNP'] or head.text.lower() in sing_only:
                                subj_agr = 'sg'
                        else:
                            conjuncts = list(head.conjuncts)+[head]
                            for conjunct in conjuncts:
                                if 'and' in list(child.text.lower() for child in conjunct.children):
                                    subj_agr = 'pl' 
                        print(subj_agr)
                elif subject.tag_ in ['NNS', 'NNPS']:
                    subj_agr = 'pl'
                elif subject.tag_ in ['NN', 'NNP', 'VBG']:
                    subj_agr = 'sg'
                if subject.ent_type_ == 'LOC' or subject.ent_type_ == 'GPE':
                    if subject.tag_ in ['NNS', 'NNPS']:
                        continue
                    else:
                        subj_arg = 'sg'
                    
        elif len(subj) > 1:
            # 'Mother, father and brother were present.'
            # If conjuncts are connected by 'and', he predicate is plural
            # Exception: 'Every man, woman and child aprticipates in the tournament.'
            if 'and' in list(child.text.lower() for child in list(list(subj[0].children)+list(subj[-2].children))):
                subj_agr = 'pl'
                left_subj_children = list(child.text.lower() for child in list(subj[0].lefts))
                left_pred_children = list(child.text.lower() for child in list(pred.lefts))
                is_uppercase = True
                all_gerund = True
                for sub in subj:
                    if sub.tag_ != 'NNP':
                        is_uppercase = False
                    if sub.tag_ != 'VBG':
                        all_gerund = False
                #Don't check: 'Playing football and enjoying it + is a good thing | are different things)'
                if all_gerund:
                    continue
                #Don't check: 'Jones and Sons is a respectable company.'
                if is_uppercase:
                    continue
                # Don't check: 'There is Tom and Mary as a perfect example.'
                if pred.text.lower() in ['is', 'are'] and 'there' in left_pred_children:
                    continue
                if 'every' in left_subj_children or 'each' in left_subj_children:
                    subj_agr = 'sg'
                else:
                    subj_agr = 'pl'
            # 'Mother, father or brother comes to pick up the kid.'
            # If conjuncts are connected by 'or', verb agrees with the last one
            elif any(a in list(child.text.lower() for child in list(list(subj[0].children)+list(subj[-2].children))) for a in ['or', 'nor']):
                if subj[-1].tag_ in ['NNS', 'NNPS'] or subj[-1].text.lower() in plur_only:
                    subj_agr = 'pl'
                elif subj[-1].tag_ in ['NN', 'NNP', 'VBG'] or subj[-1].text.lower() in sing_only:
                    subj_agr = 'sg'

        if pred.tag_ == 'VBZ':
            pred_agr = 'sg'
        elif pred.tag_ == 'VBP':
            pred_agr = 'pl'
        elif pred.lemma_ == 'be':
            if pred.text == 'was':
                pred_agr = 'sg'
            elif pred.text == 'were':
                pred_agr = 'pl'

        if subj_agr != pred_agr and subj_agr and pred_agr:
            res += [pair]
            
    return res

In [59]:
def search(directory = 'test', only_errors=True): #If used with excel files, should be only used with similar format files, otherwise additional info can be messy
    final = []
    text_files = [os.path.join(dp, f) for dp, dn, filenames in os.walk(directory) for f in filenames if os.path.splitext(f)[1] == '.txt']
    excel_files = [os.path.join(dp, f) for dp, dn, filenames in os.walk(directory) for f in filenames if os.path.splitext(f)[1] == '.xlsx']

    if text_files: #If there is at least one txt file, we assume that the format is default
        if only_errors:
            final.append(("Sentence", "Errors", "Filename"))
        else:
            final.append(("Sentence", "Errors", "Filename", "Has Error"))

    for file in text_files:
        text = open_file(file)
        sentences = sent_tokenize(text)
        for sent in sentences:
            sent = re.sub(r'<.*?>', '', sent)
            sent = re.sub(r' +', ' ', sent)
            doc = nlp(sent)
            ps = find_pred_subj(doc)
            # ps looks like [(predicate1, [subject1, subject2]), (predicate2, [subject1])]
            er = errors(ps)
#             er = [error for error in er if not subjunctive(sent, error[0])]
            
            if only_errors:
                if er:
                    final.append((sent,
                                  ",\n".join(["{} {}".format(*er[x]) for x in range(len(er))]), # Wrong subjs and preds,
                                 file))
            else:
                final.append((sent,
                              ", ".join(["({} {})".format(*er[x]) for x in range(len(er))]),
                              file,
                              int(len(er) != 0)))  #Whether or not errors were found in this sentence

    for file in excel_files:
        df = pandas.read_excel(file, keep_default_na=False)
        if not final:
            additional_columns = tuple([x for x in df.columns if x != "Sentence"])
            if only_errors:
                final.append(("Sentence", "Errors", "Filename") + additional_columns)
            else:
                final.append(("Sentence", "Errors", "Filename", "Has Error") + additional_columns)

        for row_num in range(df.shape[0]):
            sent = df["Sentence"].iat[row_num]
            sent = re.sub(r'<.*?>', '', sent)
            sent = re.sub(r' +', ' ', sent)
            doc = nlp(sent)
            ps = find_pred_subj(doc)
            # ps looks like [(predicate1, [subject1, subject2]), (predicate2, [subject1])]
            er = errors(ps)
#             er = [error for error in er if not subjunctive(sent, error[0])]
            
            if only_errors:
                if er:
                    final.append((sent,
                                  ",\n".join(["{} {}".format(*er[x]) for x in range(len(er))]), # Wrong subjs and preds,
                                 file,
                                 *[df[column].iat[row_num] for column in additional_columns]))
            else:
                final.append((sent,
                              ", ".join(["({} {})".format(*er[x]) for x in range(len(er))]),
                              file,
                              int(len(er) != 0), #Whether or not errors were found
                              *[df[column].iat[row_num] for column in additional_columns]))
        
    return final

In [12]:
def subjunctive(sent, pred):
    adj_prt = {'advisable', 'best', 'crucial', 'desirable', 'vital',
               'essential', 'imperative', 'important', 'moved',
               'necessary', 'prohibited', 'unthinkable', 'urgent',
               'required', 'stipulated', 'requested', 'recommended',
               'advised', 'proposed', 'adamant', 'anxious', 
               'determined', 'eager', 'insistent', 'keen'}
    
    clause_is_subjunctive = False
    that_list = []

    if 'that' in sent.lower():
        doc = nlp(sent)
        that_list = [token for token in doc if token.text.lower() == 'that']
        for that in that_list: 
            # VERB/NOUN + that + CONJUNCTIVE
            if (that.head.i == pred.i \
                and pred.dep_ == 'ccomp' \
                and pred.head.pos_ in ['VERB', 'NOUN'] \
                and pred.head.lemma_ in {'advise', 'ask', 'command', 'demand', 'desire',
                                         'insist', 'move', 'order', 'prefer', 'propose',
                                         'recommend', 'request', 'stipulate', 'suggest', 'urge',
                                         'motion', 'order', 'preference', 'proposal', 'recommendation',
                                         'request', 'stipulation', 'suggestion'}):
                clause_is_subjunctive = True

            # SUBJECT + [be] ADJ/PRTC + that + CONJUNCTIVE
            elif (that.head.i == pred.i and pred.dep_ == 'ccomp'):
                if (pred.head.lemma_ == 'be' and \
                    any(a in adj_prt for a in list(b.text.lower() for b in pred.head.children))) \
                    or (pred.head.tag_ in ['VBN', 'JJ'] and \
                        pred.head.text in adj_prt):
                        clause_is_subjunctive = True

    return clause_is_subjunctive

In [13]:
ambiguous = {'bison', 'cod', 'deer', 'fish', 'moose', 'boar', 'salmon', 'sheep',
            'shrimp', 'swine', 'trout', 'buffalo', 'grouse', 'elk', 'fruit', 'reindeer',
            'offspring', 'pike',
            'statistics', 'politics', 'mechanics', 'economics',            
            'government', 'data', 'police', 'team', 'jury', 'family',
            'half', 'class', 'majority', 'part', 'percent', '%', 'cent', 'lot', 'group'}

sing_only = {'each', 'either', 'neither', 'one', 'nobody',
            'nothing', 'anyone', 'anybody', 'anything', 'someone', 
            'somebody', 'something', 'everyone', 'everybody', 'everything', 
             'this', 'one', 'other', 'measles'}

plur_only = {'several', 'few', 'many', 'both', 'these', 'those'}

In [60]:
%%time
f = search('test/evaluate/medium', False)

CPU times: user 3min 55s, sys: 635 ms, total: 3min 56s
Wall time: 59.7 s


In [61]:
writeln(f, "itog.xlsx")

IndexError: list index out of range

## Test

In [None]:
def run_model(*args):
    return search(os.path.join(*args), False)

In [None]:
def test_model(*args, write_errs=None, print_wrong=False): # Function to test the model on a directory with a correct.txt file and a wrong.txt file
    output = run_model(*args)
    
    if write_errs is not None:
        writeln(output, write_errs)
    
    false_pos = 0
    true_pos = 0
    false_neg = 0
    true_neg = 0
    for x in output[1:]:
        if x[2] == os.path.join(*args, "correct.txt"):
            if x[3] == 0:
                true_neg += 1
            else:
                if print_wrong:
                    print("False positive:", x)
                false_pos += 1
        else:
            if x[3] == 0:
                if print_wrong:
                    print("False negative:", x)
                false_neg += 1
            else:
                true_pos += 1
    precision = (true_pos + true_neg) / (true_pos + true_neg + false_pos + false_neg)
    recall = true_pos / max(1, true_pos + false_neg)
    print("Precision = {}\nRecall = {}\n".format(round(precision, 2), round(recall, 2)))

def evaluate(level, **kwargs): # level should be equal to "easy", "medium" or "insane"
    print(level.capitalize(), "tests:")
    test_model("test", "evaluate", level, **kwargs)

In [46]:
test_model("test/test_wa", print_wrong=True)

Precision = 1.0
Recall = 0.0



In [58]:
%%time
evaluate("medium")

Medium tests:
This chart shows us the data about the unemployment in a few world areas.
The data is divided into the information about 2014 and 2015 unemployment levels.
It is clearly seen that there are two main trends.
Firstly, the level of unemployment did not change at some areas such as North Africa and South Asia where that level remained stable.
Secondly, it is shown that that level decreased at some regions.
Talking about details, we can see that the level of unemployment decreased rapidly in the two regions.
It went from 11 to 9,6 points in the Middle East, and it also went down from 6,8 to 5,8 points in the Latin America.
However, there is an unusual trend.
In contrast to the previous examples, the EU's level of unemployment rose by the 2015 from 7,5 to 7,9 points.
In general, it is shown that there were different trends at the different areas.
However, the worldwide indicator did not change at all.
Nowadays there are a lot of disputes over pros and cons of illegal music and 

For instance, there is slight growth of unemployment rate in EU from 7,5% to 7,9%.
The worldwide level is the same, as it was mentioned above.
This could happen because of overpopulation of the region of Southern Asia, therefore it can affect the rate in a significant way.
The problem of illegal distribution of intellectual property is nowadays very sharp.
Some people even think, that not only distributors, but also consumers of illegal or "pirate" content should be punished.
Of course, it's very difficult problem and we can hardly solve it finally.
However, I'm agree with only one part of this statement.
It is true, that we should punish people, who illegally copy and share films or music, because it is property and property rights should be protected strictly.
However, sometimes a producer of some content leaves it in open access and anyone can download or share it.
Consequently, sharing and copying it will not be illegal.
Moreover, another question arises - should we blame or punish

The first one is that popular film makers and musicians do not get all the income they can.
Therefore, the cannot make new films or music because they are lack of money.
The second one is that sometimes they do not understand how popular they really are as there is no evidence or statistics about people downloading it.
These problems can be actually solved.
As a solution producers and musicians can allow to download some of their products for free and ask money for some other.
What is about the second problem, to my mind, it can be solved in connection with the first one.
People will listen or watch films for free and want to buy some else.
To sum up, illegal copying and sharing music and films is not such a big crime for that people should be punished in some way.
The chart illustrates the level of unemployment in different regions in 2014 and 2015.
Northern Africa shows greatest unemployment rate - it was 12,5% in 2014 and stood the same in 2015.
In South Asia the rate also hasn't ch

The picture demonstrates the percentage of unemployed people in the different world areas and in whole world in 2014 and 2015.
It is clear, that the highest level of unemployment is in North Africa, it is about 12,5 per cent in 2014 and in 2015.
To the opposite, the smallest level is in South Asia and it remains stable too, but there is about 3,9 per cent.
Between Africa and Asia Middle East, the EU and Latin America are located.
Difference between two close positions in 2015 as rule is 2 per cent points.
The biggest difference is fixated in case of North Africa and Middle East, in 2015 it was 2,9 per cent points.
There are some changes in number of unemployed persons between 2014 and 2014.
The leader and the outsider have the same growth results.
It is true also for worldwide unemployment rate.
But in Middle East and Latin America the rate decreased from 11 to 9,6 per cent and from 6,8 to 5,8 per cent.
At the same time, the rate slowly increased in the EU from 7,5 per cent to 7,9 per 

In conclusion, taking everything noted up into account it can be seen that in the different world regions can be different tendency of unemployment rate.
Also, it can be noted that in the Worldwide unemployment rate was unchangeable between 2014 and 2015.
In these days Internet and spending time on the different websites have become an essential part in our life.
There are two different points of illegal downloading films or music and importance of punishment for them.
In this essay will be explain some reasons of agreement or disagreement with this problem and will be given some examples for better evidence.
Firstly, it can be noted that musicians and film producers have author's patent on their product because it was their innovation.
In consequences they lose a lot of money from illegal pirate copies.
It is wrong because people must have a salary from their job.
In this point of view it can be understood why people who do illegal pirate copies should be punished.
On the other hand, 

A good example is a "Metro Golden Mayer" which loss more than 3,5 billions dollars from 2008 year.
The biggest project of company films about agent 007 James Bond hasn't enough money for making them, and new films of this franchise made only once in 6-7 years versus once in 2-3 years in 1990-08.
On the other hand a lot of people think that opportunity of free watching gaming or listening it a good chance not only for consumers but and for actors, artist and companies.
Yes, of course they are missing their money, but they are get fame and respect from million people which can't spend 20-30 $ on film or 50-60$ for game.
What is more, many people which download torrent files and get pleasure from them will buy not only licensed driver this product but and next product of this firm.
For example "Witcher 3" from "CD-Project" had the biggest sellers after 2 month of release, maybe it will effect of "white pirates" which buy this game after one-two month of gaming on pirate copies.
In conclus

Middle East is the second region after the highest one.
Also Middle East shows another tendency in the unemployment rate it is rising from 11 percent in 2014 to 9,6 in 2015.
Different trend is showing by Latin America region.
The rate of unemployment decreased by 1 percent from 6,8 to 5,8 in 2014 to 2015.
The lowest rate of unemployment is represented by South Asia region.
There is not any downward or rising.
South Africa region has stable rate in 2014 and 2015 in 3,9 percent.
To sum up, unemployment is a serious problem of our world.
However, Worldwide rate doesn't change in these years and still 5,9 percent.
That's why the solution to this problem should be found.
Many people saying that there is a big problem with illegal pirate copies with music and films.
Musicians and film makers are losing a lot of money from this type of coping and sharing without official buying the original discs.
So the idea is that people who downloading illegal copies from the internet should be punished.


In Western democratic societies people can think that this practice is illegal and abnormal, however people in not such democratic societies as European or American can think that practice of pirate copying is normal and So there are two point of view on this situation with pirate copying.
Firstly, almost all people in really democratic societies think that copying and sharing music or films on the internet and downloading these files is not acceptable for fair people and good citizens because if you do that you steal money from producers and musicians, which spend a lot of time, money and other resources for making this product.
So in these societies pirate copying is bad and illegal.
"Intellectual pirates" like a stealer, so they are punished by government and societies.
Exactly by this reason there are a lot of scandalous deals in courts of this government.
Intellectual rights as one of the main principles of modern democracy protected by Western governments.
The other situation wit

Artists want more money to earn, people want less money to spent.
It is clearly seen that free copies on the Internet make less harm than they are thought to.
So, probably this situation should not be radically solved.
This chart describes changes in the unemployment rate in selected world regions in 2014 and 2015.
First of all, it is hard not to mention that the unemployment rate in 2014 and 2015 changed only in 3 selected world regions.
In the North Africa, South Asia and Worldwide unemployment rate is still the same.
On 2014 and 2015 we have 12,5% of unemployment for North Africa, 3,9% for South Asia and 5,9 Worldwide.
Only the EU had a slight increase in the unemployment rate from 7,5 to 7,9%.
Other 2 regions Middle East and Latin America had a little decrease in unemployment rate.
Latin America from 6,8 to 5,8% and Middle East from 11 to 9,6%.
To sum up, it is clear that there was no huge difference between unemployment rate in 2014 and in 2015 in selected world regions.
This rate

In conclusion I would like to summarize all argument and give my point of view.
I definitely agree with the second group of people as I think pirating has small influence on incomes of musicians and film makers.
Moreover, the amount of unemployment in this region remain stable during the year.
There is different situations in world regions.
On the contrary, in the Middle East and Latin America there are serious decline in this rate - from 11% to 9,6% and 6,8% to 5,8%, accordingly.
Despite the fact, that in 2 regions there are decrease of unemployment levels and increase in another one.
No doubt, people, who violates laws and distribute illegal content should be punished.
Also Latin America demonstrate positive changing in the rate of unemployment during that period with 1 percent down.
Except all changes in the rates of unemployment in the different regions the last graph shows information about worldwide, that rate do not changes and still is 5,9%.
But, there is opposite opinion, that

## Example

In [62]:
s = ['"The cardsharps" is a 16th century masterpiece by Caravaggio']
for ss in s:
    doc = nlp(ss)
    print(doc)
    print(find_pred_subj(doc))
    for token in doc:
        print("%s\t%s\t\t\t%s\t%s\t\t%s\t\t%s\t\t%s" % (token.i, token.text, token.pos_, 
                                                        token.lemma_, token.tag_, token.dep_, token.head.i))
    
#print(dir(doc[0]))
#print(' '.join(list(ch.text for ch in doc[3].children)))

"The cardsharps" is a 16th century masterpiece by Caravaggio
[(is, [cardsharps])]
0	"			PUNCT	"		``		punct		4
1	The			DET	the		DT		det		2
2	cardsharps			NOUN	cardsharp		NNS		nsubj		4
3	"			PUNCT	"		''		punct		4
4	is			AUX	be		VBZ		ROOT		4
5	a			DET	a		DT		det		8
6	16th			ADJ	16th		JJ		amod		7
7	century			NOUN	century		NN		compound		8
8	masterpiece			NOUN	masterpiece		NN		attr		4
9	by			ADP	by		IN		prep		8
10	Caravaggio			PROPN	Caravaggio		NNP		pobj		9


In [14]:
def check(sent):
    sent = re.sub(r'<.*?>', '', sent)
    sent = re.sub(r' +', ' ', sent)
    doc = nlp(sent)
    ps = find_pred_subj(doc)
    er = errors(ps)
    print(ps)
    for token in doc:
        print("%s\t%s\t\t\t%s\t%s\t\t%s\t\t%s\t\t%s" % (token.i, token.text, token.pos_, 
                                                        token.lemma_, token.tag_, token.dep_, token.head.i))
    print(er)

In [15]:
check("Special abilities is also what we get from our ancestors.")

is [abilities]
get [we]
[(is, [abilities]), (get, [we])]
0	Special			ADJ	special		JJ		amod		1
1	abilities			NOUN	ability		NNS		nsubj		2
2	is			AUX	be		VBZ		ROOT		2
3	also			ADV	also		RB		advmod		2
4	what			PRON	what		WP		dobj		6
5	we			PRON	we		PRP		nsubj		6
6	get			VERB	get		VBP		ccomp		2
7	from			ADP	from		IN		prep		6
8	our			PRON	our		PRP$		poss		9
9	ancestors			NOUN	ancestor		NNS		pobj		7
10	.			PUNCT	.		.		punct		2
[(is, [abilities])]


In [36]:
spacy.explain("nsubj")

'nominal subject'

In [130]:
st = sent_tokenize("Apples is good")