In [5]:
%%capture
!pip install wikidataintegrator
!pip install bibtexparser
import bs4
import taxon
import gui_widgets
from wikidataintegrator import wdi_core
import bibtexparser
import requests
import pandas as pd
import json
import ipywidgets as widgets
from IPython.display import IFrame, clear_output,  HTML, Image
from ipywidgets import interact, interactive, fixed, interact_manual, Layout
import math

In [6]:
def fetch_missing_wikipedia_articles(url):
    photos = json.loads(requests.get(url).text)
    temp_results = []
    for obs in photos["results"]:
        if len(obs["taxon"]["name"].split(" "))==2:
            if obs["taxon"]["wikipedia_url"] is None:
                result = dict()
                result["inat_obs_id"] = obs["id"]
                result["inat_taxon_id"] =  obs["taxon"]["id"]
                result["taxon_name"] = obs["taxon"]["name"]
                temp_results.append(result)
    to_verify = []
    for temp in temp_results:
        if temp["taxon_name"] not in to_verify:
            to_verify.append(temp["taxon_name"])
    verified = verify_wikidata(to_verify)
    results = []
    for temp in temp_results:
        if temp["taxon_name"] in verified:
            results.append(temp)    
    return results

def verify_wikidata(taxon_names):
    progress = widgets.IntProgress(
                value=1,
                min=0,
                max=len(taxon_names)/50,
                description='Wikidata:',
                bar_style='', # 'success', 'info', 'warning', 'danger' or ''
                style={'bar_color': 'blue'},
                orientation='horizontal')
    display(progress)
    verified = []
    i = 1
    for chunks in [taxon_names[i:i + 50] for i in range(0, len(taxon_names), 50)]:
        query = """
             SELECT DISTINCT ?taxon_name (COUNT(?item) AS ?item_count) (COUNT(?article) AS ?article_count)   WHERE {{
                        VALUES ?taxon_name {{{names}}} 
                {{?item wdt:P225 ?taxon_name .}}
               UNION
               {{?item wdt:P225 ?taxon_name .
                ?article schema:about ?item ;
                         schema:isPartOf 	<https://en.wikipedia.org/> .}}
                 UNION 
               {{?basionym wdt:P566 ?item ;
                          wdt:P225 ?taxon_name .
               ?article schema:about ?item ;
                        schema:isPartOf 	<https://en.wikipedia.org/> .}}
               UNION
               {{
                  ?item wdt:P225 ?taxon_name .
                  ?wikidata_item wdt:P460 ?item ;
                                 schema:isPartOf 	<https://en.wikipedia.org/> .
               }}
               UNION
               {{?basionym wdt:P566 ?item .
                ?item wdt:P225 ?taxon_name .
               ?article schema:about ?basionym ;
                        schema:isPartOf 	<https://en.wikipedia.org/> .}}
      }} GROUP BY ?taxon_name  
            """.format(names=" ".join('"{0}"'.format(w) for w in chunks))

        url = "https://query.wikidata.org/sparql?format=json&query="+query  
        #print(url)
        progress.value = i
        i+=1
        try:
            results = json.loads(requests.get(url).text)
        except:
            continue
        for result in results["results"]["bindings"]:
            if result["article_count"]["value"]=='0':
                verified.append(result["taxon_name"]["value"])
    return verified

def render_results(photos, url):
    progress = widgets.IntProgress(
                value=1,
                min=0,
                max=math.ceil(photos["total_results"]/200)+1,
                description='iNaturalist:',
                bar_style='', # 'success', 'info', 'warning', 'danger' or ''
                style={'bar_color': 'green'},
                orientation='horizontal')
    display(progress)
    
    for page in range(1, math.ceil(photos["total_results"]/200)+1):
        try:
            nextpageresult = json.loads(requests.get(url+"&page="+str(page)).text)
        except:
            continue
        progress.value = page+1
        if "results" in nextpageresult:
            for obs in nextpageresult["results"]:
                photos["results"].append(obs)
    table = dict()
    for result in photos["results"]:
        if result["taxon"]["id"] not in table.keys():
            table[result["taxon"]["id"]] = dict()
        table[result["taxon"]["id"]]["taxon_name"] = result["taxon"]["name"]
        for photo in result["observation_photos"]:
            if "photos" not in table[result["taxon"]["id"]].keys():
                table[result["taxon"]["id"]]["photos"] = []
            table[result["taxon"]["id"]]["photos"].append(photo["photo"]["url"])

    to_verify = []
    for taxon in table.keys():
        to_verify.append(table[taxon]['taxon_name'])
    verified = verify_wikidata(to_verify)

    result_rows = []
    print("""
    {| class="wikitable"
|+ Potential missing Wikipedia articles
|-
! Taxon !! iNaturalist !! Exists as...*
    """)
    for taxon in table.keys():
        if table[taxon]["taxon_name"] in verified:
            wordcount = len(str(table[taxon]["taxon_name"]).split(" "))
            
            if wordcount == 2:
                print("|-")
                print("|[https://en.wikipedia.org/wiki/"+str(table[taxon]["taxon_name"]).replace(" ","_")+" "+ str(table[taxon]["taxon_name"])+"] "+ 
                  "||[https://www.inaturalist.org/taxa/"+str(taxon)+" "+str(taxon)+"] ||")                               
            #photos = []
            #for photo in table[taxon]["photos"]:
            #    photos.append(photo)
            #result_row.append(widgets.HTML(gallery(photos)))
            #result_rows.append(widgets.VBox(result_row))
    print("|}")
    return widgets.VBox(result_rows)
    
def fetch_by_user(username, license):
    url = "https://api.inaturalist.org/v1/observations?photo_license="+license+"&quality_grade=research&per_page=200&user_id="+username
    return display(render_results(json.loads(requests.get(url).text), url))

def fetch_by_taxon(taxon_id, license):
    url = "https://api.inaturalist.org/v1/observations?photo_license="+license+"&taxon_id="+str(taxon_id)+"&quality_grade=research&per_page=200&subview=grid"
    return display(render_results(json.loads(requests.get(url).text), url))

def fetch_by_project(project_id, license):
    url = "https://api.inaturalist.org/v1/observations?photo_license="+license+"&project_id="+str(project_id)+"&quality_grade=research&per_page=200&subview=grid"
    return display(render_results(json.loads(requests.get(url).text), url))

def fetch_by_country(country_code, license):
    # results = fetch_by_place_code(country_code)
    url = "https://api.inaturalist.org/v1/observations?photo_license="+license+"&place_id="+str(country_code)+"&quality_grade=research&per_page=200&subview=grid"
    return display(render_results(json.loads(requests.get(url).text), url))

def search_by_taxon(taxon_str, rank, license):
    url = "https://api.inaturalist.org/v1/taxa/autocomplete?q="+taxon_str+"&rank="+rank
    results = json.loads(requests.get(url).text)
    display(fetch_by_taxon(results["results"][0]["id"], license))
    

def search_species_place(place, license):
    url = "https://api.inaturalist.org/v1/places/autocomplete?q="+str(place)
    results = json.loads(requests.get(url).text)
    display(fetch_by_country(results["results"][0]["id"], license))
                              
                              
def _src_from_data(data):
    """Base64 encodes image bytes for inclusion in an HTML img element"""
    img_obj = Image(data=data)
    for bundle in img_obj._repr_mimebundle_():
        for mimetype, b64value in bundle.items():
            if mimetype.startswith('image/'):
                return f'data:{mimetype};base64,{b64value}'

def gallery(images, row_height='auto'):
    """Shows a set of images in a gallery that flexes with the width of the notebook.
    
    Parameters
    ----------
    images: list of str or bytes
        URLs or bytes of images to display

    row_height: str
        CSS height value to assign to all images. Set to 'auto' by default to show images
        with their native dimensions. Set to a value like '250px' to make all rows
        in the gallery equal height.
    """
    figures = []
    for image in images:
        if isinstance(image, bytes):
            src = _src_from_data(image)
            caption = ''
        else:
            src = image
        figures.append(f'''
            <figure style="margin: 5px !important;">
              <img src="{src}" style="height: {row_height}">
            </figure>
        ''')
    return f'''
        <div style="display: flex; flex-flow: row wrap; text-align: center;">
        {''.join(figures)}
        </div>
    '''   

## create wikitext

In [9]:
tab1 = widgets.Output()


with tab1:
    clear_output()
    def get_data(b):
        global data
        data = taxon.external_data(inaturalist_id=b.taxon_id)
        html = "<table><tr><td><img src='"+data.inaturalist_data[0]['default_photo']['medium_url']+"'><br>"+data.inaturalist_data[0]['default_photo']['attribution']+"</td>"
        html += "<td>"
        html += "stub-type: "+data.inaturalist_data[0]["iconic_taxon_name"]
        html += "<br>iNaturalist taxon id: "+ str(data.inaturalist_data[0]["id"])
        html += "<br>name: "+data.inaturalist_data[0]["name"]
        if "preferrd_common_name" in data.inaturalist_data[0].keys():
            html += "<br>common name: "+data.inaturalist_data[0]["preferred_common_name"]
        html += "<br>rank: "+data.inaturalist_data[0]["rank"]
        html += "<br>parent id: "+str(data.inaturalist_parent_data[0]["id"])
        html += "<br>parent name: "+data.inaturalist_parent_data[0]["name"]
        html += "<br>parent rank: "+data.inaturalist_parent_data[0]["rank"]
        html += "</td></tr></table>"
               
        output_widget = widgets.HTML(value=html)
        display(HTML(html))

        
        
    tab1tab1 = widgets.Output()
    tab1tab2 = widgets.Output()
    tab1tab3 = widgets.Output()
    tab1tab4 = widgets.Output()
    tab1tab = widgets.Tab(children=[tab1tab1,tab1tab2,tab1tab3,tab1tab4])
    tab1tab.set_title(0, 'search by taxon') 
    tab1tab.set_title(1, 'search by user')
    tab1tab.set_title(2, 'search by country')
    tab1tab.set_title(3, 'search by project')
    
    with tab1tab1:
        interact_manual(search_by_taxon, taxon_str='', rank=["genus", "family", "order"], license=["cc0,cc-by,cc-by-sa", "cc0", "cc-by", "cc-by-sa"])
    with tab1tab2:
        interact_manual(fetch_by_user, username='', license=["cc0,cc-by,cc-by-sa", "cc0", "cc-by", "cc-by-sa"])
    with tab1tab3:
        interact_manual(search_species_place, place='', license=["cc0,cc-by,cc-by-sa", "cc0", "cc-by", "cc-by-sa"])
    with tab1tab4:
        interact_manual(fetch_by_project, project_id='', license=["cc0,cc-by,cc-by-sa", "cc0", "cc-by", "cc-by-sa"])

    display(tab1tab)
    data = None
    #taxon_window = gui_widgets.interact_manual(get_data, taxon_id="")
   
display(tab1)

Output()