# **VERSION 5**

---



##**A) pre-work segment**

###**1. download and install modules**

In [1]:
!pip install ConfigParser



In [2]:
pip install googlemaps



###**2. mount drive**

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


###**3. import needed classes and methods**

In [4]:
import configparser
from textblob import TextBlob
import tweepy
import matplotlib.pyplot as plt
import regex as re
import spacy
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import googlemaps
import sys
import os
import pandas as pd



###**4. download from imported modules**

In [5]:
nltk.download("vader_lexicon")

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

###**5. connect & authenticate**

In [6]:
!ls '/content/drive/My Drive/Colab Notebooks/uemprojectgroup covid_1.properties'

'/content/drive/My Drive/Colab Notebooks/uemprojectgroup covid_1.properties'


In [7]:
config=configparser.RawConfigParser()
config.read('/content/drive/My Drive/Colab Notebooks/uemprojectgroup covid_1.properties')
print(config.sections())

['twitter', 'gmaps']


In [8]:
consumerKey=config.get('twitter','consumerKey')
consumerSecret=config.get('twitter','consumerSecret')
accessToken=config.get('twitter','accessToken')
accessTokenSecret=config.get('twitter','accessTokenSecret')

In [9]:
#print(consumerKey,"\n",consumerSecret,"\n",accessToken,"\n",accessTokenSecret)

In [10]:
auth=tweepy.OAuthHandler(consumer_key=consumerKey,consumer_secret=consumerSecret)
auth.set_access_token(accessToken, accessTokenSecret)
api=tweepy.API(auth,wait_on_rate_limit=True,wait_on_rate_limit_notify=True)

In [11]:
gmaps=googlemaps.Client(key=config.get('gmaps','gApiKey'))

##**B) methods for work segment**

###**1. send tweet**

In [12]:
def sendTweet():
  tweet=input("enter your tweet: ")
  api.update_status(tweet)

###**2. search tweet with keyword**

In [13]:
def searchTweetWithKeyword():

  text_user_loc_geo=[]

  while True:
    searchTerm,noOfRecords=input("enter keyword and max no. of records needed:").split()
    noOfRecords=int(noOfRecords)

    a_tweets=tweepy.Cursor(api.search, q=searchTerm+"-filter:retweets",lang="English").items(noOfRecords)
    for i in a_tweets:
      text_user_loc_geo.append([i.text,i.user.screen_name,i.user.location,i.geo])

    ch=input("do you want to search and append another keyword in your dataset?\n y or n:")
    if ch=='n' or ch=='N':
      break

 
  totalNoOfRecords=len(text_user_loc_geo)
  print("total no. of records: ",totalNoOfRecords)

  for i in text_user_loc_geo:
      print(i)

  covid_df=pd.DataFrame(text_user_loc_geo,columns=["tweets","user_name","location","geo"])
  return covid_df,totalNoOfRecords

###**3. check tweet sentiments using TextBlob**

In [14]:
def percentage(part,whole):
  return part*100/whole


In [15]:
def checkSentimentTextBlob(covid_df,noOfRecords):
  positive=0
  negative=0
  neutral=0
  polarity=0

  for i in range(noOfRecords):
    textblob_text=TextBlob(covid_df.iloc[i,0])
    value=textblob_text.sentiment.polarity
    polarity+=value
    
    if value<0:
        negative+=1
    elif value==0:
        neutral+=1
    elif value>0:
        positive+=1

  print("raw numbers:\npositive=",positive,"\nnegative=",negative,"\nneutral=",neutral,"\npolarity=",polarity)
  
  positive=format(percentage(positive,noOfRecords),'.2f')
  negative=format(percentage(negative,noOfRecords),'.2f')
  neutral=format(percentage(neutral,noOfRecords),'.2f')
  polarity=format(percentage(polarity,noOfRecords),'.2f')

  print("percentage:\npositive=",positive,"\nnegative=",negative,"\nneutral=",neutral,"\npolarity=",polarity)

###**4. count tweet from each location (city)**

In [16]:
def tweetLocations(covid_df,noOfRecords):
  print(covid_df['location'].value_counts())

###**5. clean tweets**

In [17]:
def cleanTweets(covid_df,noOfRecords):
  for i in range(noOfRecords):
    text=covid_df.iloc[i,0]
    
    text=re.sub("@[\w]*","",text)
    text=re.sub("RT @[\w]*:","",text)
    text=re.sub("https?://[a-zA-Z0-9./]*","",text)
    text=re.sub("\n","",text)

    covid_df.iloc[i,0]=text
  
  print(covid_df)
  return covid_df,noOfRecords

###**6. convert dataframe to csv**

In [18]:
def toCSV(covid_df):
  covid_df.to_csv('covid_csv.csv')

###**7. show entites in text**

In [19]:
def showEntities(covid_df,noOfRecords):
  nlp=spacy.load('en_core_web_sm')

  covid_df['entities']=covid_df['tweets'].apply(lambda tweet: [(ent.text,ent.label_) if (not ent.text.startswith('#')) else "" for ent in nlp(tweet).ents])
  
  covid_df.head(20)
  return covid_df,noOfRecords

###**8. check tweet sentiments using Sentiment Intensity Analyzer**

In [20]:
def tweetSentimentIntensityAnalyzer(covid_df,noOfRecords):
  sid=SentimentIntensityAnalyzer()
  covid_df['sentiment']=covid_df['tweets'].apply(lambda tweet: sid.polarity_scores(tweet))
  return covid_df,noOfRecords 

###**9. categorizing by location using googlemaps api**

In [21]:
def getCountry(location):
  result=gmaps.geocode(location)
  result=(result[0]['formatted_address'].split(','))[-1].strip()
  return result

In [22]:
def tweetLocationsUsingGmaps(covid_df,noOfRecords):
  covid_df['country']=covid_df['location'].apply(lambda location: "" if(not location.strip() ) else getCountry(location))

  print(covid_df['country'].value_counts())

  return covid_df,noOfRecords


##**C) main method segment**

In [23]:
def main():
  covid_df=None
  noOfRecords=None

  print(
  "press 0 to display dataframe\n"
  "press 1 to send tweet\n"
  "press 2 to find tweets with keyword\n"
  "press 3 to check sentiments using TextBlob\n"
  "press 4 to categorize and count by location (city)\n"
  "press 5 to clean tweets\n"
  "press 6 to covert dataframe into csv\n"
  "press 7 to show entities in text\n"
  "press 8 to check tweet sentiments using Sentiment Intensity Analyzer\n"
  "press 9 to categorize and count by location (country) using googlemaps api\n"
  "press 10 to exit\n")

  while True:
    ch=int(input("choose: "))
    if ch==0:
        print(covid_df)
    elif ch==1:
      sendTweet()
    elif ch==2:
      covid_df,noOfRecords=searchTweetWithKeyword()
    elif ch==3:
      checkSentimentTextBlob(covid_df,noOfRecords)
    elif ch==4:
      tweetLocations(covid_df,noOfRecords)
    elif ch==5:
      covid_df,noOfRecords=cleanTweets(covid_df,noOfRecords)
    elif ch==6:
      toCSV(covid_df)
    elif ch==7:
      covid_df,noOfRecords=showEntities(covid_df,noOfRecords)
    elif ch==8:
      covid_df,noOfRecords=tweetSentimentIntensityAnalyzer(covid_df,noOfRecords)
    elif ch==9:
      covid_df,noOfRecords=tweetLocationsUsingGmaps(covid_df,noOfRecords)
    elif ch==10:
      break
    else:
      print("INVALID CHOICE !")

In [24]:
if __name__=="__main__":
 main()

press 0 to display dataframe
press 1 to send tweet
press 2 to find tweets with keyword
press 3 to check sentiments using TextBlob
press 4 to categorize and count by location (city)
press 5 to clean tweets
press 6 to covert dataframe into csv
press 7 to show entities in text
press 8 to check tweet sentiments using Sentiment Intensity Analyzer
press 9 to categorize and count by location (country) using googlemaps api
press 10 to exit

choose: 1
enter your tweet: version 5 complete, google maps api part cannnot be implemented
choose: 2
enter keyword and max no. of records needed:covid 100
do you want to search and append another keyword in your dataset?
 y or n:n
total no. of records:  7
['Access to human rights spaces like the @UN_HRC and @IACourtHR has taken a hit with #COVID__19 , but this is also an… https://t.co/08GZKV5O1f', 'OGR_EN', '', None]
['@ElizabethPeloza Ah - the downfall of covering via livestream during COVID! Thanks for update. Must be on hard copi… https://t.co/heInbeLKY



---


# **END OF CODE, ROUGH WORK BEGINS:**


---



In [25]:
print(__name__)

__main__


In [26]:
df=pd.DataFrame([['qwerty','be','mumbai, india','d','e'],['uiop','be','canada',None,'e'],['wasd','bi','dallas tx','d','e'],['fghjklzxc','b','european union','d','e']],columns=['A','B','C','D','E'])
df

Unnamed: 0,A,B,C,D,E
0,qwerty,be,"mumbai, india",d,e
1,uiop,be,canada,,e
2,wasd,bi,dallas tx,d,e
3,fghjklzxc,b,european union,d,e


In [27]:
df.iloc[3,2]

'european union'

In [28]:
df['A']

0       qwerty
1         uiop
2         wasd
3    fghjklzxc
Name: A, dtype: object

In [29]:
listo=[]
for i in range(len(df)):
  listo.append(df.iloc[i,0])

print(listo)

['qwerty', 'uiop', 'wasd', 'fghjklzxc']


In [30]:
df['B'].value_counts()

be    2
b     1
bi    1
Name: B, dtype: int64

In [31]:
nlp=spacy.load('en_core_web_sm')
text = ("When Sebastian Thrun started working on self-driving cars at "
        "Google in 2007, few people outside of the company took him "
        "seriously. “I can tell you very senior CEOs of major American "
        "car companies would shake my hand and turn away because I wasn’t "
        "worth talking to,” said Thrun, in an interview with Recode earlier "
        "this week.")
print(type(text))


for ent in nlp(text).ents:
  print(ent.text,ent.label_)

<class 'str'>
Sebastian NORP
Google ORG
2007 DATE
American NORP
Recode ORG
earlier this week DATE


In [32]:
df['A'].apply(lambda x: x+': test')

0       qwerty: test
1         uiop: test
2         wasd: test
3    fghjklzxc: test
Name: A, dtype: object

In [33]:
df['A'].apply(lambda x: [len(x) for i in range(len(x))])

0             [6, 6, 6, 6, 6, 6]
1                   [4, 4, 4, 4]
2                   [4, 4, 4, 4]
3    [9, 9, 9, 9, 9, 9, 9, 9, 9]
Name: A, dtype: object

In [34]:
df['A'].apply(lambda x: ["e" if i%2==0 else "o"  for i in range(len(x))])

0             [e, o, e, o, e, o]
1                   [e, o, e, o]
2                   [e, o, e, o]
3    [e, o, e, o, e, o, e, o, e]
Name: A, dtype: object

In [35]:
df['e/o 1']=df['A'].apply(lambda x: ["e" if i%2==0 else "o"  for i in range(len(x))])
df

Unnamed: 0,A,B,C,D,E,e/o 1
0,qwerty,be,"mumbai, india",d,e,"[e, o, e, o, e, o]"
1,uiop,be,canada,,e,"[e, o, e, o]"
2,wasd,bi,dallas tx,d,e,"[e, o, e, o]"
3,fghjklzxc,b,european union,d,e,"[e, o, e, o, e, o, e, o, e]"


In [36]:
df['e/o 2']=df['A'].apply(lambda x: [(i,"e") if i%2==0 else (i,"o")  for i in range(len(x))])
df

Unnamed: 0,A,B,C,D,E,e/o 1,e/o 2
0,qwerty,be,"mumbai, india",d,e,"[e, o, e, o, e, o]","[(0, e), (1, o), (2, e), (3, o), (4, e), (5, o)]"
1,uiop,be,canada,,e,"[e, o, e, o]","[(0, e), (1, o), (2, e), (3, o)]"
2,wasd,bi,dallas tx,d,e,"[e, o, e, o]","[(0, e), (1, o), (2, e), (3, o)]"
3,fghjklzxc,b,european union,d,e,"[e, o, e, o, e, o, e, o, e]","[(0, e), (1, o), (2, e), (3, o), (4, e), (5, o..."


In [37]:
df['e/o 3']=df['A'].apply(lambda x: [(i,"e") if i%2==0 else ""  for i in range(len(x))])
df

Unnamed: 0,A,B,C,D,E,e/o 1,e/o 2,e/o 3
0,qwerty,be,"mumbai, india",d,e,"[e, o, e, o, e, o]","[(0, e), (1, o), (2, e), (3, o), (4, e), (5, o)]","[(0, e), , (2, e), , (4, e), ]"
1,uiop,be,canada,,e,"[e, o, e, o]","[(0, e), (1, o), (2, e), (3, o)]","[(0, e), , (2, e), ]"
2,wasd,bi,dallas tx,d,e,"[e, o, e, o]","[(0, e), (1, o), (2, e), (3, o)]","[(0, e), , (2, e), ]"
3,fghjklzxc,b,european union,d,e,"[e, o, e, o, e, o, e, o, e]","[(0, e), (1, o), (2, e), (3, o), (4, e), (5, o...","[(0, e), , (2, e), , (4, e), , (6, e), , (8, e)]"


In [38]:
df.to_csv('df.csv')

!ls

covid_csv.csv  df.csv  drive  sample_data


In [39]:
gmaps=googlemaps.Client(key=config.get('gmaps','gApiKey'))
geocode_result=gmaps.geocode(df.iloc[2,2])

print(geocode_result)
print(geocode_result[0])
print(geocode_result[0]['formatted_address'])
print(geocode_result[0]['formatted_address'].split(',')[-1])
print(geocode_result[0]['formatted_address'].split(',')[-1].strip())


ApiError: ignored

In [40]:
trial="  asd  "
print(not trial.strip())
print(not( not trial.strip()))

trial="    "
print(not trial.strip())
print(not( not trial.strip()))

False
True
True
False
