# LACLICHEV


Import LACLICHEV Dependencies

In [None]:
#Python
import os
import datetime

#Extractor
from dataExtractors.theGuardianExtractor import TheGuardianExtractor

#Indexer
from dataIndexer.indexer import Indexer

#DB
from dataDB.dbHandler    import DBHandler

#Plotly
import plotly.offline        as     pOff
import plotly.figure_factory as     ff
from   plotly.graph_objs     import Scatter, Layout

#GMaps
import gmaps

LACLICHEV Configuration

In [None]:
# Create a database for the obtained data and the user query
archivedDB = DBHandler('ArchivedDB')
queryDB    = DBHandler('QueryDB')

# Initialize plotly in offline mode
pOff.init_notebook_mode(connected=True)

# Configure GMaps
gmaps.configure(api_key="AIzaSyCjBzqKcJoUUd1ALelOL1qeG6jgRPHYmcA")

## Get the user's search query

Request content containing this free text.

Supports AND(&), OR(|) and NOT(!) operators, and exact phrase queries.

e.g. storm, heavy storm, snow & (rain | storms), storm & ! snow

In [None]:
userInput = input('What are you searching? ')

Generate and execute user's query

In [None]:
theGuardian        = TheGuardianExtractor(userInput)
theGuardianContent = theGuardian.getContent()

#Save the query to QueryDB
queryDoc = { "query":theGuardian.getQuery(),
             "date":datetime.datetime.utcnow(),
             "articlesSize": len(theGuardianContent),
             "keys": theGuardian.getKeywords()
           }
queryDB.Insert(queryDoc)

Save content to DB, remove duplicates and recover all the documents in the DB

In [None]:
archivedDB.Insert(theGuardianContent)
archivedDB.RemoveDuplicatesBy('name')

theGuardianContent = archivedDB.GetDocuments()
archivedDB.Empty()

Index documents

In [None]:
documentIndexer = Indexer(debug=True)
documentIndexer.IndexDocs(theGuardianContent)

Do a index search

In [None]:
documentIndexer.Search("weather", Indexer.TAGS)

## Generate Frequency Matrix

In [None]:
matrix = documentIndexer.FreqMatrix(scattered=True)

Table Configuration

In [None]:
#columnStart must be >= 1
columnStart = 1
columnSize  = 10
#rowStart must be >= 1
rowStart    = 5000
rowSize     = 20

#Create Matrix Size
columnSize = columnSize + columnStart
rowSize    = rowSize    + rowStart

In [None]:
#Include Document Index
sliceM  = [[matrix[0][0]] + matrix[0][columnStart:columnSize]]
for i in range(rowStart, rowSize):
    row = [matrix[i][0]] + matrix[i][columnStart:columnSize]
    sliceM.append(row)
#Generate Table Plot
table = ff.create_table(sliceM, index=True, index_title='Term', height_constant=20)
pOff.iplot(table, filename='FreqMtrx')

## Geopolical Entities
Analyze document and obtain posible Geopolitical Entities

In [None]:
cities = documentIndexer.AnalyzeDocument(0)

print("\nFound %d cities" % (len(cities)))

In [None]:
#Obtain GeoJSON Features
infoBoxTemplate = """ <dl> <dt>{0}</dt> <dd>{1}</dd> <dt>Location</dt><dd>{2}, {3}</dd> </dl>"""
features        = []
markers         = []
markersInfo     = []
index           = 1
for city, value in cities.items():
    markers.append((value['latitude'], value['longitude']))
    feature = {'type' : 'Feature',
                        'geometry'  :   value['geojson'],
                        'properties': { 'name':value['location'] } }
    features.append(feature)
    markersInfo.append(infoBoxTemplate.format(city, value['location'], value['latitude'], value['longitude']))
    print("{0}.- {1} - {2}".format(index, city, value['location']))
    index += 1

#Generate a FeatureCollection GeoJSON
features = {'type':'FeatureCollection',
           'features': features }

#Generate Random Colors
n = len(cities)
max_value = 16581375 #255**3
interval  = int(max_value / n)
colors    = [hex(I)[2:].zfill(6) for I in range(0, max_value, interval)]   
hexColors = [(int(i[:2], 16), int(i[2:4], 16), int(i[4:], 16)) for i in colors]

In [None]:
mapFig      = gmaps.figure()
citiesLayer = gmaps.geojson_layer(features, fill_color=hexColors, stroke_color=hexColors, fill_opacity=0.8)
markerLayer = gmaps.marker_layer(markers, info_box_content=markersInfo)
mapFig.add_layer(markerLayer)
mapFig.add_layer(citiesLayer)
mapFig