In [1]:
import math
import json
import requests
import os
import pandas as pd
import mdutils

In [3]:
project = "wikiproject-biodiversity"
license = "cc0,cc-by"
language = "en"

In [4]:
url = f"https://api.inaturalist.org/v1/observations?photo_license={license}&quality_grade=research&per_page=200&project_id={project}"
print(url)

https://api.inaturalist.org/v1/observations?photo_license=cc0,cc-by&quality_grade=research&per_page=200&project_id=wikiproject-biodiversity


In [5]:
result = json.loads(requests.get(url).text)
for page in range(1, math.ceil(result["total_results"]/200)+1):
    try:
        nextpageresult = json.loads(requests.get(url+"&page="+str(page)).text)
    except:
        print(url+"&page="+str(page))
        continue
    if "results" in nextpageresult:
        for obs in nextpageresult["results"]:
            result["results"].append(obs)

https://api.inaturalist.org/v1/observations?photo_license=cc0,cc-by&quality_grade=research&per_page=200&project_id=wikiproject-biodiversity&page=114
https://api.inaturalist.org/v1/observations?photo_license=cc0,cc-by&quality_grade=research&per_page=200&project_id=wikiproject-biodiversity&page=116
https://api.inaturalist.org/v1/observations?photo_license=cc0,cc-by&quality_grade=research&per_page=200&project_id=wikiproject-biodiversity&page=118
https://api.inaturalist.org/v1/observations?photo_license=cc0,cc-by&quality_grade=research&per_page=200&project_id=wikiproject-biodiversity&page=120
https://api.inaturalist.org/v1/observations?photo_license=cc0,cc-by&quality_grade=research&per_page=200&project_id=wikiproject-biodiversity&page=122
https://api.inaturalist.org/v1/observations?photo_license=cc0,cc-by&quality_grade=research&per_page=200&project_id=wikiproject-biodiversity&page=124
https://api.inaturalist.org/v1/observations?photo_license=cc0,cc-by&quality_grade=research&per_page=200&pr

In [6]:
table = dict()
for record in result["results"]:
    if record['taxon']['rank'] == "species":
        if record["taxon"]["id"] not in table.keys():
            table[record["taxon"]["id"]] = dict()
        table[record["taxon"]["id"]]["taxon_name"] = record["taxon"]["name"]
        for photo in record["observation_photos"]:
            if "photos" not in table[record["taxon"]["id"]].keys():
                table[record["taxon"]["id"]]["photos"] = []
            table[record["taxon"]["id"]]["photos"].append(photo["photo"]["url"])

to_verify = []
for taxon in table.keys():
    to_verify.append(table[taxon]['taxon_name'])


In [7]:
def verify_wikidata(taxon_names):
    verified = []
    i = 1
    for chunks in [taxon_names[i:i + 50] for i in range(0, len(taxon_names), 50)]:
        names=" ".join('"{0}"'.format(w) for w in chunks)
        query = f"""
             SELECT DISTINCT ?taxon_name (COUNT(?item) AS ?item_count) (COUNT(?article) AS ?article_count)   WHERE {{
                        VALUES ?taxon_name {{{names}}} 
                {{?item wdt:P225 ?taxon_name .}}
               UNION
               {{?item wdt:P225 ?taxon_name .
                ?article schema:about ?item ;
                         schema:isPartOf 	<{"https://"+language+".wikipedia.org/"}> .}}
                 UNION 
               {{?basionym wdt:P566 ?item ;
                          wdt:P225 ?taxon_name .
               ?article schema:about ?item ;
                        schema:isPartOf 	<{"https://"+language+".wikipedia.org/"}> .}}
               UNION
               {{
                  ?item wdt:P225 ?taxon_name .
                  ?wikidata_item wdt:P460 ?item ;
                                 schema:isPartOf 	<{"https://"+language+".wikipedia.org/"}> .
               }}
               UNION
               {{?basionym wdt:P566 ?item .
                ?item wdt:P225 ?taxon_name .
               ?article schema:about ?basionym ;
                        schema:isPartOf 	<{"https://"+language+".wikipedia.org/"}> .}}
      }} GROUP BY ?taxon_name  
            """

        url = "https://query.wikidata.org/sparql?format=json&query="+query  
        try:
            results = json.loads(requests.get(url).text)
        except:
            continue
        for result in results["results"]["bindings"]:
            if result["article_count"]["value"]=='0':
                verified.append(result["taxon_name"]["value"])
    return verified

In [8]:
verified = verify_wikidata(to_verify)
verified

['Felis catus',
 'Antigone canadensis',
 'Oudemansiella radicata',
 'Negosiana dualis',
 'Coccinella quinquepunctata',
 'Chauliognathus flavipes',
 'Kuschelina nigrovittata',
 'Tenthredo campestris',
 'Phintelloides versicolor',
 'Zeltus amasa',
 'Eurema andersoni',
 'Camponotus rufipes',
 'Muntiacus vaginalis',
 'Perola chica',
 'Adeloneivaia apicalis',
 'Perola brumalis',
 'Apoica gelida',
 'Nannopterum brasilianum',
 'Orobanche gracilis',
 'Aspidisca cicada',
 'Pygochelidon cyanoleuca',
 'Rhionaeschna bonariensis',
 'Micrathena annulata',
 'Hypselonotus interruptus',
 'Baccharis trimera',
 'Camponotus lespesii',
 'Eucnide lobata',
 'Tamonea spicata',
 'Alpaida truncata',
 'Sipanea pratensis',
 'Urocentrum turbo',
 'Pseudofumaria lutea',
 'Otospermophilus beecheyi',
 'Nereocystis luetkeana',
 'Rhinella alata',
 'Pachysylvia aurantiifrons',
 'Camponotus cruentatus',
 'Mystacides longicornis',
 'Pharia pyramidata',
 'Anisotremus taeniatus',
 'Johnrandallia nigrirostris',
 'Acontia trab

In [9]:
missing_en_wp_by_user = pd.DataFrame(columns=["taxon", "iNaturalist_taxon_id", "iNaturalist_observation_id", "reviewers"])

for observation in result["results"]:
    if observation["taxon"]["name"] in verified:
        missing_en_wp_by_user.loc[len(missing_en_wp_by_user)] = [observation["taxon"]["name"], observation["taxon"]["id"], observation["id"], len(observation["reviewed_by"])]
missing_en_wp_by_user

Unnamed: 0,taxon,iNaturalist_taxon_id,iNaturalist_observation_id,reviewers
0,Felis catus,118552,144646346,5
1,Antigone canadensis,508048,144345715,3
2,Oudemansiella radicata,383374,144256941,3
3,Coccinella quinquepunctata,57871,144238063,2
4,Kuschelina nigrovittata,775889,144170052,2
...,...,...,...,...
208,Rhionaeschna bonariensis,542509,104859519,4
209,Otospermophilus beecheyi,180007,104691849,4
210,Otospermophilus beecheyi,180007,104691847,4
211,Otospermophilus beecheyi,180007,104691846,4


In [11]:
import shutil
# shutil.rmtree('./new_articles/')
os.makedirs(os.path.dirname('./new_articles/'), exist_ok=True)

In [12]:
for taxon in table.keys():
    if table[taxon]["taxon_name"] in verified:
        mdFile = mdutils.MdUtils(file_name='new_articles/'+table[taxon]["taxon_name"].replace(" ", "_"),title=table[taxon]["taxon_name"])
        mdFile.new_line(f"[iNaturalist taxon id: {taxon}](https://www.inaturalist.org/taxa/{taxon})")
        for image in table[taxon]["photos"]:
            mdFile.new_line(f"![{table[taxon]['taxon_name']}]({image.replace('square', 'medium')})")
        mdFile.create_md_file()

In [13]:
import yaml

with open(r'_toc.yml') as file:
    # The FullLoader parameter handles the conversion from YAML
    # scalar values to Python the dictionary format
    species = yaml.load(file, Loader=yaml.FullLoader)

species["parts"][0]["chapters"] = []
for file in os.listdir('./new_articles/'):
    species["parts"][0]["chapters"].append({'file': 'new_articles/'+file})
with open(r'_toc.yml', 'w+') as file:
    documents = yaml.dump(species, file)