# LACLICHEV


Import LACLICHEV Dependencies

In [None]:
%%javascript

if($(IPython.toolbar.selector.concat(' > #kill-run-all')).length == 0){
  IPython.toolbar.add_buttons_group([
        {
             'label'   : 'kill and run-all',
             'icon'    : 'fa fa-angle-double-down',
             'callback': function(){
                 IPython.notebook.kernel.restart();
                 $(IPython.events).one('kernel_ready.Kernel',
                                       function(){IPython.notebook.execute_all_cells();});
             }
        }
    ], 'kill-run-all');
}

In [None]:
import sys
import os
sys.path.insert(1, os.getcwd() + "/..")
#Data Explorer
from tools.dtExplorer import DTExplorer

In [None]:
# Initialize DTExplorer HTML
DTExplorer.HideCodeCells()

In [None]:
#Python
import datetime

#Extractor
from dataExtractors.theGuardianExtractor import TheGuardianExtractor

#Indexer
from dataIndexer.indexer import Indexer

#DB
from dataDB.dbHandler    import DBHandler

#Plotly
import plotly.offline        as     pOff
import plotly.figure_factory as     ff
import plotly.graph_objs     as     go
from   plotly.graph_objs     import Scatter, Layout

#GMaps
import gmaps

# Geocoding
from dataEnhancer.geocode import Geocode

# ipywidgets
from   ipywidgets import interact, interactive, fixed, interact_manual, VBox, HBox
import ipywidgets as widgets

# IPhython
from IPython.display import clear_output

In [None]:
# Create a database for the obtained data and the user query
archivedDB = DBHandler('ArchivedDB')
queryDB    = DBHandler('QueryDB')

# Initialize plotly in offline mode
pOff.init_notebook_mode(connected=True)

# Configure GMaps
gmaps.configure(api_key="AIzaSyCrNWI9q6ZK1B6w2ePE3Ix-IGfIGKxBCkU")

## User's search query

Request content containing this free text.

Supports AND(&), OR(|) and NOT(!) operators, and exact phrase queries.

e.g. storm, heavy storm, snow & (rain | storms), storm & ! snow

In [None]:
userInput = input('What are you searching? ')

Generate and execute user's query

In [None]:
theGuardian        = TheGuardianExtractor(userInput)
theGuardianContent = theGuardian.getContent()

#Save the query to QueryDB
queryDoc = { "query":theGuardian.getQuery(),
             "date":datetime.datetime.utcnow(),
             "articlesSize": len(theGuardianContent),
             "keys": theGuardian.getKeywords()
           }
queryDB.Insert(queryDoc)

Index documents

In [None]:
documentIndexer = Indexer(debug=True)
documentIndexer.IndexDocs(theGuardianContent)

## Frequency Matrix

Generate the frequency matrix of all the documents retrieved from the latest search

In [None]:
toggle = widgets.ToggleButtons(
    options=['No', 'Yes'],
    description=' ',
    disabled=False,
    button_style='info'
)

# Global Variables
colMin = None
colMax = None
rowMin = None
rowMax = None
matrix = None

def ShowMatrixTable(b):
    clear_output()
    if (colMin.value < colMax.value) and (rowMin.value < rowMax.value):
        columnStart = colMin.value + 1
        columnEnd   = colMax.value + 2
        #Include Document Index
        sliceM  = [[matrix[0][0]] + matrix[0][columnStart:columnEnd]]
        for i in range(rowMin.value, rowMax.value):
            row =  [matrix[i][0]] + matrix[i][columnStart:columnEnd]
            sliceM.append(row)
        #Generate Table Plot
        table = ff.create_table(sliceM, index=True, index_title='Term', height_constant=20)
        pOff.iplot(table, filename='FreqMtrx')
    else:
        print("Wrong Table Range")
        
def ShowHeatmap(b):
    clear_output()
    if (colMin.value < colMax.value) and (rowMin.value < rowMax.value):
        columnStart = colMin.value + 1
        columnEnd   = colMax.value + 2
        
        xAxis = matrix[0][columnStart:columnEnd]
        yAxis = []
        zAxis = []
        for i in range(rowMin.value, rowMax.value):
            yAxis.append('‌‌{0}'.format(matrix[i][0]))
            zAxis.append(matrix[i][columnStart:columnEnd])

        trace = go.Heatmap(z=zAxis, x=xAxis, y=yAxis)
        data  = [trace]
        pOff.iplot(data, filename='heatmap')  
    else:
        print("Wrong Table Range")

def ShowMatrix():
    # Frequency Matrix Table Configuration
    global colMin
    global colMax
    global rowMin
    global rowMax
    
    colMin = widgets.BoundedIntText(
        value=0,
        min=0,
        max=len(matrix[0]) - 3,
        step=1,
        description='Min: 0',
        disabled=False
    )
    
    colMax = widgets.BoundedIntText(
        value=10,
        min=1,
        max=len(matrix[0]) - 2,
        step=1,
        description='Max: {0}'.format(len(matrix[0]) - 2),
        disabled=False
    )

    rowMin = widgets.BoundedIntText(
        value=455,
        min=1,
        max=len(matrix) - 1,
        step=1,
        description='Min: 1',
        disabled=False
    )
    
    rowMax = widgets.BoundedIntText(
        value=475,
        min=2,
        max=len(matrix),
        step=1,
        description='Max: {0}'.format(len(matrix)),
        disabled=False
    )
    
    showButton = widgets.Button(
        description='Show Table',
        disabled=False,
        button_style='info',
        tooltip='Click me',
        icon=''
    )
    
    showHeatmap = widgets.Button(
        description='Show Heat Map',
        disabled=False,
        button_style='danger',
        tooltip='Click me',
        icon=''
    )
    
    showButton.on_click(ShowMatrixTable)
    showHeatmap.on_click(ShowHeatmap)
    matrixButtons = HBox([showHeatmap, showButton])
    accordion     = widgets.Accordion(children=[HBox([colMin, colMax]), HBox([rowMin, rowMax])])
    accordion.set_title(0, 'Column Matrix Range:')
    accordion.set_title(1, 'Row Matrix Range')
    display(VBox([accordion, matrixButtons]))
    
def ScatterMatrix(scatter):
    if scatter in 'Yes':
        print()
        global matrix
        
        matrix = documentIndexer.FreqMatrix(scattered=True)
        display(widgets.HTML(value="<h3> Preview </h3>"))
        ShowMatrix()
        return matrix
    else:
        clear_output()

In [None]:
matrixWidget = interactive(ScatterMatrix, scatter=toggle)
matrixWidget

## Top 10
Cosine Similarity

In [None]:
freqMatrix = documentIndexer.FreqMatrix(byTerms=False)
top10      = documentIndexer.GetSimilarity(userInput, freqMatrix)[:10]

## Explore Content

In [None]:
citiesWdgt = []
def ExploreContent(docID):
    global citiesWdgt
    explorer   = DTExplorer()
    content    = documentIndexer.GetDocField(docSlider.value)
    display(explorer.Parse(content))
    cities = explorer.GetNamedEntities()
    i          = 0
    citiesWdgt = []
    for city in cities:
        cityWdgt = widgets.Text(
            value=city,
            description='GPE[{0}]: '.format(i),
            disabled=False
        )
        citiesWdgt.append(cityWdgt)
        i += 1
    citiesVBox = VBox(citiesWdgt)
    display(citiesVBox)

In [None]:
docList = []
for item in top10:
    docList.append(int(item[0]))

docSlider = widgets.SelectionSlider(
    options=docList,
    value=docList[0],
    description='Select Document: ',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True
)

contentWidget = interactive(ExploreContent, docID=docSlider)
contentWidget

## Geopolical Entities

Search the geopolitical entities found at the document and display them.

In [None]:
def ShowCities(b):
    clear_output()
    gpeList    = {}
    geolocator = Geocode()

    for cityWdgt in citiesWdgt:
        if cityWdgt.value not in gpeList:
            location = geolocator.GetGPE(cityWdgt.value)
            if location:
                gpeList.update(location)
            else:
                print('GPE: "{0}" not found'.format(cityWdgt.value))

    features = geolocator.GetFeatureCollection(gpeList)
    if len(features['features']) > 0 :
        DisplayMap(features)

def DisplayMap(features):
    #Obtain GeoJSON Features
    infoBoxTemplate = """ <dl> <dt>{name}</dt> <dd>{location}</dd> <dt>Location</dt><dd>{latitude}, {longitude}</dd> </dl>"""
    markers         = []
    markersInfo     = []
    cityLabels      = []
    index           = 0
    for feature in features['features']:
        markers.append((feature['properties']['latitude'], feature['properties']['longitude']))
        markersInfo.append(infoBoxTemplate.format(**feature['properties']))
        cityLabels.append(feature['properties']['name'])
        print("{0}.- {1} - {2}".format(len(cityLabels), feature['properties']['name'], feature['properties']['location']))
        
    mapFig      = gmaps.figure()
    markerLayer = gmaps.marker_layer(markers, hover_text=cityLabels, info_box_content=markersInfo)
    mapFig.add_layer(markerLayer)
    display(mapFig)

In [None]:
showCitites = widgets.Button(
        description='Show Cities',
        disabled=False,
        button_style='warning',
        tooltip='Click me',
        icon=''
)

showCitites.on_click(ShowCities)
showCitites

In [None]:
%%html

<a href="#Explore-Content" class="jupyter-widgets jupyter-button widget-button mod-info" style="text-decoration: none;">Keep exploring</a>