# Lab 3 : TextBlob

https://textblob.readthedocs.io/en/dev/


## Import

In [1]:
from textblob import TextBlob
import nltk
# setup nltk data
from os.path import expanduser
nltk.data.path.append( expanduser("~") + "/data/nltk_data")

text = """TextBlob aims to provide access to common text-processing operations 
through a familiar interface. You can treat TextBlob objects as if they were Python 
strings that learned how to do Natural Language Processing.
"""

tb = TextBlob(text)
print(tb)

TextBlob aims to provide access to common text-processing operations 
through a familiar interface. You can treat TextBlob objects as if they were Python 
strings that learned how to do Natural Language Processing.



## Tokenization

In [2]:
print(tb.words)
print()

print(tb.sentences)
print()

['TextBlob', 'aims', 'to', 'provide', 'access', 'to', 'common', 'text-processing', 'operations', 'through', 'a', 'familiar', 'interface', 'You', 'can', 'treat', 'TextBlob', 'objects', 'as', 'if', 'they', 'were', 'Python', 'strings', 'that', 'learned', 'how', 'to', 'do', 'Natural', 'Language', 'Processing']

[Sentence("TextBlob aims to provide access to common text-processing operations 
through a familiar interface."), Sentence("You can treat TextBlob objects as if they were Python 
strings that learned how to do Natural Language Processing.")]



## Sentiments

In [13]:
tweets = ["I love bigmacs",  
          "I hate this traffic!",  
          "American Idol is awesome!", 
          "this song is lame", 
          "Macy's is crap",
         "Macy's is the crap",
         "I love you a little"]

for tweet in tweets:
    tb = TextBlob(tweet)
    print("{} ==> {}".format(tweet, tb.sentiment))

I love bigmacs ==> Sentiment(polarity=0.5, subjectivity=0.6)
I hate this traffic! ==> Sentiment(polarity=-1.0, subjectivity=0.9)
American Idol is awesome! ==> Sentiment(polarity=0.5, subjectivity=0.5)
this song is lame ==> Sentiment(polarity=-0.5, subjectivity=0.75)
Macy's is crap ==> Sentiment(polarity=-0.8, subjectivity=0.8)
Macy's is the crap ==> Sentiment(polarity=-0.8, subjectivity=0.8)
I don't don't love you ==> Sentiment(polarity=0.5, subjectivity=0.6)


## Inflection and Lemmatization

In [4]:
from textblob import Word

words = ["cat", "dog", "man", "person"]
for w in words:
    print(Word(w).pluralize())

cats
dogs
men
people


In [5]:
# lematize
from textblob import Word
print(Word("went").lemmatize('v'))

go


## WordNet integration / Definitions

In [6]:
from textblob import Word
print(Word("linux").define())
print()

for d in Word("basketball").definitions:
    print("- " + d)

['an open-source version of the UNIX operating system']

- a game played on a court by two opposing teams of 5 players; points are scored by throwing the ball through an elevated horizontal hoop
- an inflated ball used in playing basketball


## Word Counts

In [7]:
text = """It was a sunny day! We went to the dog park.  Lots of dogs were running around.  
My dog likes to run too; so he had a great time.  
I bought ice cream from the ice cream truck. Yummy!
It was a perfect sunny day!"""

tb = TextBlob(text)
print(tb.word_counts)
print()

print(tb.word_counts['sunny'])

defaultdict(<class 'int'>, {'it': 2, 'was': 2, 'a': 3, 'sunny': 2, 'day': 2, 'we': 1, 'went': 1, 'to': 2, 'the': 2, 'dog': 2, 'park': 1, 'lots': 1, 'of': 1, 'dogs': 1, 'were': 1, 'running': 1, 'around': 1, 'my': 1, 'likes': 1, 'run': 1, 'too': 1, 'so': 1, 'he': 1, 'had': 1, 'great': 1, 'time': 1, 'i': 1, 'bought': 1, 'ice': 2, 'cream': 2, 'from': 1, 'truck': 1, 'yummy': 1, 'perfect': 1})

2


## Ngrams

In [8]:
text = """It was a sunny day! We went to the dog park.  Lots of dogs were running around.  
My dog likes to run too; so he had a great time.  
I bought ice cream from the ice cream truck. Yummy!
It was a perfect sunny day!"""

tb = TextBlob(text)

print("n=2 grams")
print(tb.ngrams(n=2))


n=2 grams
[WordList(['It', 'was']), WordList(['was', 'a']), WordList(['a', 'sunny']), WordList(['sunny', 'day']), WordList(['day', 'We']), WordList(['We', 'went']), WordList(['went', 'to']), WordList(['to', 'the']), WordList(['the', 'dog']), WordList(['dog', 'park']), WordList(['park', 'Lots']), WordList(['Lots', 'of']), WordList(['of', 'dogs']), WordList(['dogs', 'were']), WordList(['were', 'running']), WordList(['running', 'around']), WordList(['around', 'My']), WordList(['My', 'dog']), WordList(['dog', 'likes']), WordList(['likes', 'to']), WordList(['to', 'run']), WordList(['run', 'too']), WordList(['too', 'so']), WordList(['so', 'he']), WordList(['he', 'had']), WordList(['had', 'a']), WordList(['a', 'great']), WordList(['great', 'time']), WordList(['time', 'I']), WordList(['I', 'bought']), WordList(['bought', 'ice']), WordList(['ice', 'cream']), WordList(['cream', 'from']), WordList(['from', 'the']), WordList(['the', 'ice']), WordList(['ice', 'cream']), WordList(['cream', 'truck'])

## Language Detection & Translation

In [15]:
text_en = "I just had dinner"
print(text_en)
print("to Spanish : {} ".format (TextBlob(text_en).translate(to='es')))
print("to Japanese : {} ".format(TextBlob(text_en).translate(to='ja')))





text_jp = "hello"
print("Language detection : {} ".format(TextBlob(text_jp).detect_language()))

I just had dinner
to Spanish : Acabo de cenar 
to Japanese : 私はちょうど夕食 
Language detection : en 
