# LACLICHEV


Import LACLICHEV Dependencies

In [40]:
from IPython.display import HTML

def hideCode():
    return HTML('''<script>
    code_show=true; 
    function code_toggle() {
     if (code_show){
     $("div.input").hide();
     } else {
     $("div.input").show();
     }
     code_show = !code_show
    } 
    $( document ).ready(code_toggle);
    </script>
    The raw code for this IPython notebook is by default hidden for easier reading.
    To toggle on/off the raw code, click <a href="javascript:code_toggle()">here</a>.''')

hideCode()

In [25]:
#Python
import os
import datetime

#Extractor
from dataExtractors.theGuardianExtractor import TheGuardianExtractor

#Indexer
from dataIndexer.indexer import Indexer

#DB
from dataDB.dbHandler    import DBHandler

#Plotly
import plotly.offline        as     pOff
import plotly.figure_factory as     ff
import plotly.graph_objs     as     go
from   plotly.graph_objs     import Scatter, Layout

#GMaps
import gmaps

In [26]:
# Create a database for the obtained data and the user query
archivedDB = DBHandler('ArchivedDB')
queryDB    = DBHandler('QueryDB')

# Initialize plotly in offline mode
pOff.init_notebook_mode(connected=True)

# Configure GMaps
gmaps.configure(api_key="AIzaSyCjBzqKcJoUUd1ALelOL1qeG6jgRPHYmcA")

## Get the user's search query

Request content containing this free text.

Supports AND(&), OR(|) and NOT(!) operators, and exact phrase queries.

e.g. storm, heavy storm, snow & (rain | storms), storm & ! snow

In [27]:
userInput = input('What are you searching? ')

What are you searching? heavy storms


Generate and execute user's query

In [28]:
theGuardian        = TheGuardianExtractor(userInput)
theGuardianContent = theGuardian.getContent()

#Save the query to QueryDB
queryDoc = { "query":theGuardian.getQuery(),
             "date":datetime.datetime.utcnow(),
             "articlesSize": len(theGuardianContent),
             "keys": theGuardian.getKeywords()
           }
queryDB.Insert(queryDoc)

Retrieving: |██████████████████████████████████████████████████| 100.0% - 0.257s - 69 of 69

Save content to DB, remove duplicates and recover all the documents in the DB

In [29]:
archivedDB.Insert(theGuardianContent)
archivedDB.RemoveDuplicatesBy('name')

theGuardianContent = archivedDB.GetDocuments()
archivedDB.Empty()

Index documents

In [30]:
documentIndexer = Indexer(debug=True)
documentIndexer.IndexDocs(theGuardianContent)

Lucene version is:  6.5.0
Index Directory:  RAM Memory
Indexed 59 documents (59 docs in index)


Do a index search

In [31]:
documentIndexer.Search("weather", Indexer.TAGS)

Found 1 document(s) that matched query 'tags:weather':
Document Nº: 12 - Score: 1.89279
Name: Storm-weary New York City needs to adapt faster to climate change
Tags: public-leaders-network|us-news|weather|environment|world



## Generate Frequency Matrix

In [32]:
matrix = documentIndexer.FreqMatrix(scattered=True)

Generating Frequency Matrix...
Progress: |██████████████████████████████████████████████████| 100.0% - 1.455s - 58 of 58
Scattering Frequency Matrix...
Progress: |██████████████████████████████████████████████████| 100.0% - 2.358s - 6038 of 6038

In [33]:
# Frequency Matrix Table Configuration

#columnStart must be >= 1
columnStart = 1
columnSize  = 10
#rowStart must be >= 1
rowStart    = 455
rowSize     = 20

#Create Matrix Size
columnSize = columnSize + columnStart
rowSize    = rowSize    + rowStart

#### Frequency Matrix Table Preview

In [34]:
#Include Document Index
sliceM  = [[matrix[0][0]] + matrix[0][columnStart:columnSize]]
for i in range(rowStart, rowSize):
    row = [matrix[i][0]] + matrix[i][columnStart:columnSize]
    sliceM.append(row)
#Generate Table Plot
table = ff.create_table(sliceM, index=True, index_title='Term', height_constant=20)
pOff.iplot(table, filename='FreqMtrx')

In [35]:
# Heatmap Configuration
wordsToShow = 200
wordsOffset = 455

wordsToShow = wordsOffset + wordsToShow

#### Frequency Matrix Heatmap Preview

In [36]:
xAxis = matrix[0][1:]
yAxis = []
zAxis = []
for i in range(wordsOffset, wordsToShow):
    yAxis.append('‌‌{0}'.format(matrix[i][0]))
    zAxis.append(matrix[i][1:])

trace = go.Heatmap(z=zAxis, x=xAxis, y=yAxis)
data  = [trace]
pOff.iplot(data, filename='heatmap')

## Geopolical Entities
Analyze document and obtain posible Geopolitical Entities

In [37]:
cities = documentIndexer.AnalyzeDocument(0)

print("\nFound %d cities" % (len(cities)))

Analazing Document 0
Progress: |██████████████████████████████████████████████████| 100.0% - 0.862s - 22 of 22
Found 5 cities


In [38]:
#Obtain GeoJSON Features
infoBoxTemplate = """ <dl> <dt>{0}</dt> <dd>{1}</dd> <dt>Location</dt><dd>{2}, {3}</dd> </dl>"""
features        = []
markers         = []
markersInfo     = []
cityLabels      = []
for city, value in cities.items():
    markers.append((value['latitude'], value['longitude']))
    markersInfo.append(infoBoxTemplate.format(city, value['location'], value['latitude'], value['longitude']))
    cityLabels.append(city)
    if (value['geojson']['type'] != 'Point') and (value['geojson']['type'] != 'LineString'):    
        feature = {'type' : 'Feature',
                            'geometry'  :   value['geojson'],
                            'properties': { 'name':value['location'] } }
        features.append(feature)
    print("{0}.- {1} - {2}".format(len(cityLabels), city, value['location']))

#Generate a FeatureCollection GeoJSON
features = {'type':'FeatureCollection',
           'features': features }

#Generate Random Colors
n         = len(features['features'])
max_value = 16581375 #255**3
interval  = int(max_value / n)
colors    = [hex(I)[2:].zfill(6) for I in range(0, max_value, interval)]   
hexColors = [(int(i[:2], 16), int(i[2:4], 16), int(i[4:], 16)) for i in colors]

1.- Marine - Marineo, PA, SIC, Italia
2.- Australia - Australia
3.- Japan - 日本
4.- Queensland - QLD, Australia
5.- Okinawa - 沖縄県, 日本


In [39]:
mapFig      = gmaps.figure()
citiesLayer = gmaps.geojson_layer(features, fill_color=hexColors, stroke_color=hexColors, fill_opacity=0.8)
markerLayer = gmaps.marker_layer(markers, hover_text=cityLabels, info_box_content=markersInfo)
mapFig.add_layer(markerLayer)
mapFig.add_layer(citiesLayer)
mapFig