In [1]:
import math
import json
import requests
import os
import pandas as pd
import mdutils

In [2]:
usernames = ["soetjipto", "andrawaag"]
license = "cc0"
language = "en"

In [3]:
url = "https://api.inaturalist.org/v1/observations?photo_license="+license+"&quality_grade=research&per_page=200&user_id="+",".join(usernames)
print(url)

https://api.inaturalist.org/v1/observations?photo_license=cc0&quality_grade=research&per_page=200&user_id=soetjipto,andrawaag


In [4]:
result = json.loads(requests.get(url).text)
for page in range(1, math.ceil(result["total_results"]/200)+1):
    try:
        nextpageresult = json.loads(requests.get(url+"&page="+str(page)).text)
    except:
        print(url+"&page="+str(page))
        continue
    if "results" in nextpageresult:
        for obs in nextpageresult["results"]:
            result["results"].append(obs)

In [5]:
table = dict()
for record in result["results"]:
    if record['taxon']['rank'] == "species":
        if record["taxon"]["id"] not in table.keys():
            table[record["taxon"]["id"]] = dict()
        table[record["taxon"]["id"]]["taxon_name"] = record["taxon"]["name"]
        for photo in record["observation_photos"]:
            if "photos" not in table[record["taxon"]["id"]].keys():
                table[record["taxon"]["id"]]["photos"] = []
            table[record["taxon"]["id"]]["photos"].append(photo["photo"]["url"])

to_verify = []
for taxon in table.keys():
    to_verify.append(table[taxon]['taxon_name'])


In [6]:
def verify_wikidata(taxon_names):
    verified = []
    i = 1
    for chunks in [taxon_names[i:i + 50] for i in range(0, len(taxon_names), 50)]:
        names=" ".join('"{0}"'.format(w) for w in chunks)
        query = f"""
             SELECT DISTINCT ?taxon_name (COUNT(?item) AS ?item_count) (COUNT(?article) AS ?article_count)   WHERE {{
                        VALUES ?taxon_name {{{names}}} 
                {{?item wdt:P225 ?taxon_name .}}
               UNION
               {{?item wdt:P225 ?taxon_name .
                ?article schema:about ?item ;
                         schema:isPartOf 	<{"https://"+language+".wikipedia.org/"}> .}}
                 UNION 
               {{?basionym wdt:P566 ?item ;
                          wdt:P225 ?taxon_name .
               ?article schema:about ?item ;
                        schema:isPartOf 	<{"https://"+language+".wikipedia.org/"}> .}}
               UNION
               {{
                  ?item wdt:P225 ?taxon_name .
                  ?wikidata_item wdt:P460 ?item ;
                                 schema:isPartOf 	<{"https://"+language+".wikipedia.org/"}> .
               }}
               UNION
               {{?basionym wdt:P566 ?item .
                ?item wdt:P225 ?taxon_name .
               ?article schema:about ?basionym ;
                        schema:isPartOf 	<{"https://"+language+".wikipedia.org/"}> .}}
      }} GROUP BY ?taxon_name  
            """

        url = "https://query.wikidata.org/sparql?format=json&query="+query  
        try:
            results = json.loads(requests.get(url).text)
        except:
            continue
        for result in results["results"]["bindings"]:
            if result["article_count"]["value"]=='0':
                verified.append(result["taxon_name"]["value"])
    return verified

In [7]:
verified = verify_wikidata(to_verify)
verified

['Ectatomma brunneum',
 'Miresa clarissa',
 'Pompeius pompeius',
 'Mormidea ypsilon',
 'Eperua rubiginosa',
 'Helicopis cupido',
 'Markea coccinea',
 'Macugonalia moesta',
 'Dianthera secunda',
 'Apoica strigata',
 'Vismia cayennensis',
 'Camponotus renggeri',
 'Pityrogramma calomelanos',
 'Piriqueta cistoides',
 'Leistes militaris',
 'Murdannia nudiflora',
 'Sosxetra grata',
 'Euglyphis braganza',
 'Sapajus apella',
 'Synedrella nodiflora',
 'Tamonea spicata',
 'Priva lappulacea',
 'Sipanea pratensis',
 'Goeppertia latifolia',
 'Luehea seemannii',
 'Apoica gelida',
 'Rhinella alata',
 'Palicourea tomentosa',
 'Earliella scabrosa',
 'Kalanchoe laxiflora',
 'Epidendrum jamiesonis',
 'Camponotus cruentatus',
 'Tillandsia lajensis',
 'Tillandsia pastensis',
 'Pygochelidon cyanoleuca',
 'Saguinus nigricollis',
 'Lissachatina fulica',
 'Pharia pyramidata',
 'Puya asplundii',
 'Anisotremus taeniatus',
 'Johnrandallia nigrirostris',
 'Lantana velutina',
 'Sanchezia oblonga',
 'Sporophila ango

In [8]:
missing_en_wp_by_user = pd.DataFrame(columns=["taxon", "iNaturalist_taxon_id", "iNaturalist_observation_id", "reviewers"])

for observation in result["results"]:
    if observation["taxon"]["name"] in verified:
        missing_en_wp_by_user.loc[len(missing_en_wp_by_user)] = [observation["taxon"]["name"], observation["taxon"]["id"], observation["id"], len(observation["reviewed_by"])]
missing_en_wp_by_user

Unnamed: 0,taxon,iNaturalist_taxon_id,iNaturalist_observation_id,reviewers
0,Pompeius pompeius,148737,143934998,3
1,Macugonalia moesta,733426,143890329,3
2,Mormidea ypsilon,296290,143428602,3
3,Dianthera secunda,942468,143427513,3
4,Miresa clarissa,764661,143020564,4
...,...,...,...,...
126,Nannopterum brasilianum,1289601,36978740,3
127,Peperomia emarginella,281323,36959802,4
128,Thamnobates subfalcata,1029635,36903468,5
129,Aiouea montana,867397,36896553,3


In [20]:
import shutil
shutil.rmtree('./new_articles/')
os.makedirs(os.path.dirname('./new_articles/'), exist_ok=True)

In [23]:
for taxon in table.keys():
    if table[taxon]["taxon_name"] in verified:
        mdFile = mdutils.MdUtils(file_name='new_articles/'+table[taxon]["taxon_name"].replace(" ", "_"),title=table[taxon]["taxon_name"])
        mdFile.new_line(f"[iNaturalist taxon id: {taxon}](https://www.inaturalist.org/taxa/{taxon})")
        for image in table[taxon]["photos"]:
            mdFile.new_line(f"![{table[taxon]['taxon_name']}]({image.replace('square', 'medium')})")
        mdFile.create_md_file()

In [22]:
import yaml

with open(r'_toc.yml') as file:
    # The FullLoader parameter handles the conversion from YAML
    # scalar values to Python the dictionary format
    species = yaml.load(file, Loader=yaml.FullLoader)

species["parts"][0]["chapters"] = []
for file in os.listdir('./new_articles/'):
    species["parts"][0]["chapters"].append({'file': 'new_articles/'+file})
with open(r'_toc.yml', 'w+') as file:
    documents = yaml.dump(species, file)