# LACLICHEV


Import LACLICHEV Dependencies

In [None]:
from IPython.display import HTML

def hideCode():
    return HTML('''<script>
    code_show=true; 
    function code_toggle() {
     if (code_show){
     $("div.input").hide();
     } else {
     $("div.input").show();
     }
     code_show = !code_show
    } 
    $( document ).ready(code_toggle);
    </script>
    The raw code for this IPython notebook is by default hidden for easier reading.
    To toggle on/off the raw code, click <a href="javascript:code_toggle()">here</a>.''')

hideCode()

In [None]:
#Python
import os
import datetime
import sys
sys.path.insert(1, os.getcwd() + "/..")


#Extractor
from dataExtractors.theGuardianExtractor import TheGuardianExtractor

#Indexer
from dataIndexer.indexer import Indexer

#DB
from dataDB.dbHandler    import DBHandler

#Plotly
import plotly.offline        as     pOff
import plotly.figure_factory as     ff
import plotly.graph_objs     as     go
from   plotly.graph_objs     import Scatter, Layout

#GMaps
import gmaps

In [None]:
# Create a database for the obtained data and the user query
archivedDB = DBHandler('ArchivedDB')
queryDB    = DBHandler('QueryDB')

# Initialize plotly in offline mode
pOff.init_notebook_mode(connected=True)

# Configure GMaps
gmaps.configure(api_key="AIzaSyCjBzqKcJoUUd1ALelOL1qeG6jgRPHYmcA")

## Get the user's search query

Request content containing this free text.

Supports AND(&), OR(|) and NOT(!) operators, and exact phrase queries.

e.g. storm, heavy storm, snow & (rain | storms), storm & ! snow

In [None]:
userInput = input('What are you searching? ')

Generate and execute user's query

In [None]:
theGuardian        = TheGuardianExtractor(userInput)
theGuardianContent = theGuardian.getContent()

#Save the query to QueryDB
queryDoc = { "query":theGuardian.getQuery(),
             "date":datetime.datetime.utcnow(),
             "articlesSize": len(theGuardianContent),
             "keys": theGuardian.getKeywords()
           }
queryDB.Insert(queryDoc)

Save content to DB, remove duplicates and recover all the documents in the DB

In [None]:
archivedDB.Insert(theGuardianContent)
archivedDB.RemoveDuplicatesBy('name')

theGuardianContent = archivedDB.GetDocuments()
archivedDB.Empty()

Index documents

In [None]:
documentIndexer = Indexer(debug=True)
documentIndexer.IndexDocs(theGuardianContent)

Do a index search

In [None]:
documentIndexer.Search("weather", Indexer.TAGS)

## Generate Frequency Matrix

In [None]:
matrix = documentIndexer.FreqMatrix(scattered=True)

In [None]:
# Frequency Matrix Table Configuration

#columnStart must be >= 1
columnStart = 1
columnSize  = 10
#rowStart must be >= 1
rowStart    = 455
rowSize     = 20

#Create Matrix Size
columnSize = columnSize + columnStart
rowSize    = rowSize    + rowStart

#### Frequency Matrix Table Preview

In [None]:
#Include Document Index
sliceM  = [[matrix[0][0]] + matrix[0][columnStart:columnSize]]
for i in range(rowStart, rowSize):
    row = [matrix[i][0]] + matrix[i][columnStart:columnSize]
    sliceM.append(row)
#Generate Table Plot
table = ff.create_table(sliceM, index=True, index_title='Term', height_constant=20)
pOff.iplot(table, filename='FreqMtrx')

In [None]:
# Heatmap Configuration
wordsToShow = 200
wordsOffset = 455

wordsToShow = wordsOffset + wordsToShow

#### Frequency Matrix Heatmap Preview

In [None]:
xAxis = matrix[0][1:]
yAxis = []
zAxis = []
for i in range(wordsOffset, wordsToShow):
    yAxis.append('‌‌{0}'.format(matrix[i][0]))
    zAxis.append(matrix[i][1:])

trace = go.Heatmap(z=zAxis, x=xAxis, y=yAxis)
data  = [trace]
pOff.iplot(data, filename='heatmap')

Cosine Similarity (Top 10)

In [None]:
freqMatrix = documentIndexer.FreqMatrix(byTerms=False)
top10      = documentIndexer.GetSimilarity(userInput, freqMatrix)[:10]

In [None]:
top10

## Geopolical Entities
Analyze document and obtain posible Geopolitical Entities

In [None]:
features = documentIndexer.AnalyzeDocument(0)

print("\nFound %d cities" % (len(features['features'])))

In [None]:
if len(features['features']) > 0:
    #Obtain GeoJSON Features
    infoBoxTemplate = """ <dl> <dt>{name}</dt> <dd>{location}</dd> <dt>Location</dt><dd>{latitude}, {longitude}</dd> </dl>"""
    markers         = []
    markersInfo     = []
    cityLabels      = []
    index           = 0
    for feature in features['features']:
        markers.append((feature['properties']['latitude'], feature['properties']['longitude']))
        markersInfo.append(infoBoxTemplate.format(**feature['properties']))
        cityLabels.append(feature['properties']['name'])
        if (feature['geometry']['type'] == 'Point') or (feature['geometry']['type'] == 'LineString'):
            del features['features'][index]
        print("{0}.- {1} - {2}".format(len(cityLabels), feature['properties']['name'], feature['properties']['location']))


    #Generate Random Colors
    n         = len(features['features'])
    max_value = 16581375 #255**3
    interval  = int(max_value / n)
    colors    = [hex(I)[2:].zfill(6) for I in range(0, max_value, interval)]   
    hexColors = [(int(i[:2], 16), int(i[2:4], 16), int(i[4:], 16)) for i in colors]

In [None]:
if len(features['features']) > 0:
    mapFig      = gmaps.figure()
    citiesLayer = gmaps.geojson_layer(features, fill_color=hexColors, stroke_color=hexColors, fill_opacity=0.8)
    markerLayer = gmaps.marker_layer(markers, hover_text=cityLabels, info_box_content=markersInfo)
    mapFig.add_layer(markerLayer)
    mapFig.add_layer(citiesLayer)
mapFig    