In [1]:
import math
import json
import requests
import os
import pandas as pd
import mdutils

In [2]:
project = "open-bombus"
license = "cc0,cc-by"
language = "en"

In [3]:
url = f"https://api.inaturalist.org/v1/observations?photo_license={license}&quality_grade=research&per_page=100&project_id={project}"
print(url)

https://api.inaturalist.org/v1/observations?photo_license=cc0,cc-by&quality_grade=research&per_page=100&project_id=open-bombus


In [4]:
result = json.loads(requests.get(url).text)
for page in range(1, math.ceil(result["total_results"]/100)+1):
    try:
        nextpageresult = json.loads(requests.get(url+"&page="+str(page)).text)
    except Exception as e:
        print(e)
        print(url+"&page="+str(page))
        continue
    if "results" in nextpageresult:
        for obs in nextpageresult["results"]:
            result["results"].append(obs)

Expecting value: line 1 column 1 (char 0)
https://api.inaturalist.org/v1/observations?photo_license=cc0,cc-by&quality_grade=research&per_page=100&project_id=wikiproject-biodiversity&page=163
Expecting value: line 1 column 1 (char 0)
https://api.inaturalist.org/v1/observations?photo_license=cc0,cc-by&quality_grade=research&per_page=100&project_id=wikiproject-biodiversity&page=165
Expecting value: line 1 column 1 (char 0)
https://api.inaturalist.org/v1/observations?photo_license=cc0,cc-by&quality_grade=research&per_page=100&project_id=wikiproject-biodiversity&page=167
Expecting value: line 1 column 1 (char 0)
https://api.inaturalist.org/v1/observations?photo_license=cc0,cc-by&quality_grade=research&per_page=100&project_id=wikiproject-biodiversity&page=169
Expecting value: line 1 column 1 (char 0)
https://api.inaturalist.org/v1/observations?photo_license=cc0,cc-by&quality_grade=research&per_page=100&project_id=wikiproject-biodiversity&page=171
Expecting value: line 1 column 1 (char 0)
htt

In [5]:
table = dict()
for record in result["results"]:
    if record['taxon']['rank'] == "species":
        if record["taxon"]["id"] not in table.keys():
            table[record["taxon"]["id"]] = dict()
        table[record["taxon"]["id"]]["taxon_name"] = record["taxon"]["name"]
        for photo in record["observation_photos"]:
            if "photos" not in table[record["taxon"]["id"]].keys():
                table[record["taxon"]["id"]]["photos"] = []
            table[record["taxon"]["id"]]["photos"].append(photo["photo"]["url"])

to_verify = []
for taxon in table.keys():
    to_verify.append(table[taxon]['taxon_name'])


In [6]:
def verify_wikidata(taxon_names):
    verified = []
    i = 1
    for chunks in [taxon_names[i:i + 50] for i in range(0, len(taxon_names), 50)]:
        names=" ".join('"{0}"'.format(w) for w in chunks)
        query = f"""
             SELECT DISTINCT ?taxon_name (COUNT(?item) AS ?item_count) (COUNT(?article) AS ?article_count)   WHERE {{
                        VALUES ?taxon_name {{{names}}} 
                {{?item p:P31 [
                                ps:P31 wd:Q55983715 ;
                                pq:P642 ?taxon_name ;
                            ] .
                  OPTIONAL {{?article schema:about ?item ;
                         schema:isPartOf 	<{"https://"+language+".wikipedia.org/"}> }}}}
               UNION
               {{?item wdt:P225 ?taxon_name .
                OPTIONAL {{?article schema:about ?item ;
                         schema:isPartOf 	<{"https://"+language+".wikipedia.org/"}> }}}}
                 UNION 
               {{?basionym wdt:P566 ?item ;
                          wdt:P225 ?taxon_name .
               OPTIONAL {{?article schema:about ?item ;
                         schema:isPartOf 	<{"https://"+language+".wikipedia.org/"}> }}}}
               UNION
               {{
                  ?item wdt:P225 ?taxon_name .
                  ?wikidata_item wdt:P460 ?item ;
                  OPTIONAL {{?article schema;about ?wikidata_item ;
                                 schema:isPartOf 	<{"https://"+language+".wikipedia.org/"}> }}
               }}
               UNION
               {{?basionym wdt:P566 ?item .
                ?item wdt:P225 ?taxon_name .
                OPTIONAL {{?article schema:about ?item ;
                         schema:isPartOf 	<{"https://"+language+".wikipedia.org/"}> }}}}
      }} GROUP BY ?taxon_name  
            """

        url = "https://query.wikidata.org/sparql?format=json&query="+query  
        try:
            results = json.loads(requests.get(url).text)
        except:
            continue
        for result in results["results"]["bindings"]:
            if result["article_count"]["value"]=='0':
                verified.append(result["taxon_name"]["value"])
    return verified

In [7]:
verified = verify_wikidata(to_verify)
verified

[]

In [12]:
table

{48484: {'taxon_name': 'Harmonia axyridis',
  'photos': ['https://inaturalist-open-data.s3.amazonaws.com/photos/249826831/square.jpg',
   'https://inaturalist-open-data.s3.amazonaws.com/photos/249826831/square.jpg',
   'https://inaturalist-open-data.s3.amazonaws.com/photos/247546521/square.jpg',
   'https://inaturalist-open-data.s3.amazonaws.com/photos/240841883/square.jpg',
   'https://inaturalist-open-data.s3.amazonaws.com/photos/240840714/square.jpg',
   'https://inaturalist-open-data.s3.amazonaws.com/photos/234192747/square.jpg',
   'https://inaturalist-open-data.s3.amazonaws.com/photos/234192438/square.jpg',
   'https://inaturalist-open-data.s3.amazonaws.com/photos/234192163/square.jpg',
   'https://inaturalist-open-data.s3.amazonaws.com/photos/234192196/square.jpg',
   'https://inaturalist-open-data.s3.amazonaws.com/photos/229498638/square.jpg',
   'https://inaturalist-open-data.s3.amazonaws.com/photos/227612271/square.jpg',
   'https://inaturalist-open-data.s3.amazonaws.com/phot

In [None]:
missing_en_wp_by_user = pd.DataFrame(columns=["taxon", "iNaturalist_taxon_id", "iNaturalist_observation_id", "reviewers"])

for observation in result["results"]:
    if observation["taxon"]["name"] in verified:
        missing_en_wp_by_user.loc[len(missing_en_wp_by_user)] = [observation["taxon"]["name"], observation["taxon"]["id"], observation["id"], len(observation["reviewed_by"])]
missing_en_wp_by_user

In [None]:
import shutil
shutil.rmtree('./new_articles/')
os.makedirs(os.path.dirname('./new_articles/'), exist_ok=True)

In [None]:
for taxon in table.keys():
    if table[taxon]["taxon_name"] in verified:
        mdFile = mdutils.MdUtils(file_name='new_articles/'+table[taxon]["taxon_name"].replace(" ", "_"),title=table[taxon]["taxon_name"])
        mdFile.new_line(f"[iNaturalist taxon id: {taxon}](https://www.inaturalist.org/taxa/{taxon})")
        for image in table[taxon]["photos"]:
            mdFile.new_line(f"![{table[taxon]['taxon_name']}]({image.replace('square', 'medium')})")
        mdFile.create_md_file()

In [None]:
import yaml

with open(r'_toc.yml') as file:
    # The FullLoader parameter handles the conversion from YAML
    # scalar values to Python the dictionary format
    species = yaml.load(file, Loader=yaml.FullLoader)

species["parts"][0]["chapters"] = []
for file in os.listdir('./new_articles/'):
    species["parts"][0]["chapters"].append({'file': 'new_articles/'+file})
with open(r'_toc.yml', 'w+') as file:
    documents = yaml.dump(species, file)

In [None]:
from ghp_import import ghp_import
ghp_import("_build/html", nojekyll=True, push=True, force=True)