In [1]:
import modin.pandas as mipd
import os
import time
import pandas as pd
from tqdm import tqdm
import re
import numpy as np
import json

In [2]:
from jellyfish import damerau_levenshtein_distance
from jellyfish import jaro_similarity
from jellyfish import jaro_winkler_similarity
from jellyfish import match_rating_comparison
from collections import defaultdict

In [3]:
import psycopg2
from sqlalchemy import create_engine
from sqlalchemy import types as sqltype
from config.database import HOST, PORT, USER, PASSWORD, DATABASE

psql_engine = create_engine("postgresql://"+USER+":"+PASSWORD+"@"+HOST+":"+str(PORT)+"/"+DATABASE)

## Preprocessing from Likang's

In [4]:
def partial_de_alising(author_name):
    return str(alias_dict.get(author_name, author_name)).lower().strip()

def preprocess_name(name):
    # remove nan
    if pd.isna(name):
        return name
    # if it is an email, take only the user domain
    name = name.split('@')[0]
    # remove text within brakets and parentheses
    name = re.sub(r"[\(\[].*?[\)\]]", "", name)
    # some names are of this pattern: "sg $ $date: 2008/10/07 10:18:51 $"
    if '$' in name:
        name = name.split('$')[0]
    # remove non-alphanumeric chars
    name = re.sub("[^a-zA-Z ]+", '', name)
    if ' via ' in name:
        name = name.split(' via ')[0]
    return name.strip()

# return True if it is sent by a bot
def is_bots(author_name):
    return bool(author_name in bots)

# return True if it is a coding file
def is_coding(file_path):
    if pd.isna(file_path):
        return False
    extension = '.' + file_path.split('/')[-1].split('.')[-1]
    return bool(extension in coding_extensions)

In [5]:
def indices_dict(lis):
    d = defaultdict(list)
    for i,(a,b) in enumerate(lis):
        d[a].append(i)
        d[b].append(i)
    return d

def disjoint_indices(lis):
    d = indices_dict(lis)
    sets = []
    while len(d):
        que = set(d.popitem()[1])
        ind = set()
        while len(que):
            ind |= que 
            que = set([y for i in que 
                         for x in lis[i] 
                         for y in d.pop(x, [])]) - ind
        sets += [ind]
    return sets

# union-find algo
def disjoint_sets(lis):
    return [list(set([x for i in s for x in lis[i]])) for s in disjoint_indices(lis)]


def process_name(name):
    #print(name)
    # if it is an email, take only the user domain
    name = name.split('@')[0]
    # remove text within brakets and parentheses
    name = re.sub(r"[\(\[].*?[\)\]]", "", name)
    # remove non-alphanumeric chars
    name = re.sub("[^a-zA-Z ]+", '', name)
    if '$' in name:
        # some names are of this pattern: "sg $ $date: 2008/10/07 10:18:51 $"
        name = name.split('$')[0]
    return name.strip()


def check_segments(name1, name2):

    name_segs_1 = name1.split(' ')
    name_segs_2 = name2.split(' ')

    if len(name_segs_1) == len(name_segs_2) == 2:
        first_name_1, last_name_1 = name_segs_1
        first_name_2, last_name_2 = name_segs_2

        # option 1: first name 1 compare to first name 2, last name 1 compare to last name 2
        # e.g., "robert yates" v.s. "robert butts"
        first_name_score = jaro_winkler_similarity(first_name_1, first_name_2)
        last_name_score = jaro_winkler_similarity(last_name_1, last_name_2)
        if first_name_score < 0.8 or last_name_score < 0.8:
            return False
        # option 2: first name 1 compare to last name 2, last name 1 compare to first name 2
        # e.g., "yates robert" v.s. "robert butts"
        else:
            first_name_score = jaro_winkler_similarity(first_name_1, last_name_2)
            last_name_score = jaro_winkler_similarity(last_name_1, first_name_2)
            if first_name_score < 0.8 or last_name_score < 0.8:
                return False
    return True

In [6]:
df_commit = pd.read_csv('./psql_commit.csv')



In [7]:
df_commit_1 = df_commit[['commiter_aliase_id', 'proj_id']]
df_commit_2 = df_commit[['author_aliase_id', 'proj_id']]
df_commit_1.columns = ['aliase_id', 'proj_id']
df_commit_2.columns = ['aliase_id', 'proj_id']


In [8]:
commit_df = pd.concat([df_commit_1, df_commit_2]).drop_duplicates()

In [9]:
commit_df

Unnamed: 0,aliase_id,proj_id
0,Jeffrey,geonode
1,Tyler,geonode
6,Ariel,geonode
15,David,geonode
45,david.w.bitner,geonode
...,...,...
1214893,Benjamin,orfeotoolbox
1218069,Sylvain,orfeotoolbox
1219496,Arthur,orfeotoolbox
1224095,Philippe,orfeotoolbox


In [10]:
df_message = pd.read_csv('./psql_message.csv')

In [11]:
message_df = df_message[['thread_id', 'author_aliase_id']]
message_df['thread_id'] = message_df['thread_id'].apply(lambda x: x.split('-')[0].split('_')[0].split('#')[0])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [12]:
message_df.columns = ['proj_id', 'aliase_id']
message_df = message_df.drop_duplicates()

In [13]:
message_df

Unnamed: 0,proj_id,aliase_id
0,openlayers,Stefano Bonnin_stefano.bonnin@comai.to
1,openlayers,Matthias Pohl_m.pohl@m-click.de
2,openlayers,Tim Schaub_tschaub@opengeo.org
3,openlayers,VR26A_vinci.w.cat@gmail.com
4,openlayers,Yves Jacolin free_yjacolin@free.fr
...,...,...
489876,qgis,Homayoon1989_h.zahmatkesh@ut.ac.ir
489880,qgis,Mitchell Luhm_luhmx009@umn.edu
489949,qgis,Khare Simran_simran.khare@hsr.ch
490016,qgis,Robert Hewlett_rob.hewy@gmail.com


In [14]:
df_comment = pd.read_csv('./psql_comment.csv')

In [15]:
comment_df = df_comment[['comment_id', 'author_aliase_id']]
comment_df['comment_id'] = comment_df['comment_id'].apply(lambda x: x.split('-')[0].split('_')[0].split('#')[0])
comment_df.columns = ['proj_id', 'aliase_id']
comment_df = comment_df.drop_duplicates()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [16]:
comment_df

Unnamed: 0,proj_id,aliase_id
0,grass,wenzeslaus
1,qgis,qgib
3,geonode,BerryDaniel
4,geonode,bartvde
6,geonode,schignel
...,...,...
93808,leaflet,Danielku15
93836,qgis,yoichigmf
93897,leaflet,calandoa
93916,leaflet,dlev-


In [17]:
df_forjoin = pd.concat([commit_df, comment_df, message_df], axis=0 ).drop_duplicates()

In [18]:
df_forjoin

Unnamed: 0,aliase_id,proj_id
0,Jeffrey,geonode
1,Tyler,geonode
6,Ariel,geonode
15,David,geonode
45,david.w.bitner,geonode
...,...,...
489876,Homayoon1989_h.zahmatkesh@ut.ac.ir,qgis
489880,Mitchell Luhm_luhmx009@umn.edu,qgis
489949,Khare Simran_simran.khare@hsr.ch,qgis
490016,Robert Hewlett_rob.hewy@gmail.com,qgis


In [19]:
df_aliase = pd.read_csv('./psql_aliase.csv')

In [20]:
df_aliase

Unnamed: 0,aliase_id,mailaddress,person_id,personname,source
0,L Pool_marisap@telkomsa.net,marisap@telkomsa.net,,L Pool,emails
1,Justin Jent_noreply@github.com,noreply@github.com,,Justin Jent,emails
2,=?UTF-8?Q?Hern=C3=A1n_De_Angelis?=_variablesta...,variablestarlight@gmail.com,,=?UTF-8?Q?Hern=C3=A1n_De_Angelis?=,emails
3,LynnF_lynn.forrester@mottmac.com,lynn.forrester@mottmac.com,,LynnF,emails
4,Mohammed Rashad_rashadkm@hotmail.com,rashadkm@hotmail.com,,Mohammed Rashad,emails
...,...,...,...,...,...
40249,chandrasekhar_chandra436@gmail.com,chandra436@gmail.com,,chandrasekhar,emails
40250,Daniel Fenton_dmfenton@gmail.com,dmfenton@gmail.com,,Daniel Fenton,emails
40251,J.Krueger_kruegerj@gmx.de,kruegerj@gmx.de,,J.Krueger,emails
40252,"Akkineni, Vijay_vijay.akkineni@ttu.edu",vijay.akkineni@ttu.edu,,"Akkineni, Vijay",emails


In [21]:
df_aliase = df_aliase.merge(df_forjoin, how = 'right', on= ['aliase_id'])

In [22]:
df_aliase['proj_id'].notnull().describe()

count     29765
unique        1
top        True
freq      29765
Name: proj_id, dtype: object

In [23]:
df_aliase['proj_id'].describe()

count     29765
unique       77
top        gdal
freq       5309
Name: proj_id, dtype: object

In [24]:
df_aliase

Unnamed: 0,aliase_id,mailaddress,person_id,personname,source,proj_id
0,Jeffrey,jeffrey@skyhouseconsulting.com,,Jeffrey Harris,Github,geonode
1,Tyler,tbmcmullen@gmail.com,,Tyler McMullen,Github,geonode
2,Ariel,ingenieroariel@gmail.com,,ArielWaldman,Github,geonode
3,David,dwinslow@opengeo.org,,DavidLeal,Github,geonode
4,david.w.bitner,david.w.bitner@noaa.gov,,404,Github,geonode
...,...,...,...,...,...,...
29760,Homayoon1989_h.zahmatkesh@ut.ac.ir,h.zahmatkesh@ut.ac.ir,,Homayoon1989,emails,qgis
29761,Mitchell Luhm_luhmx009@umn.edu,luhmx009@umn.edu,,Mitchell Luhm,emails,qgis
29762,Khare Simran_simran.khare@hsr.ch,simran.khare@hsr.ch,,Khare Simran,emails,qgis
29763,Robert Hewlett_rob.hewy@gmail.com,rob.hewy@gmail.com,,Robert Hewlett,emails,qgis


In [25]:
emails_df = df_aliase[df_aliase['source'] == 'emails']

In [26]:
emails_df

Unnamed: 0,aliase_id,mailaddress,person_id,personname,source,proj_id
11480,Stefano Bonnin_stefano.bonnin@comai.to,stefano.bonnin@comai.to,,Stefano Bonnin,emails,openlayers
11481,Matthias Pohl_m.pohl@m-click.de,m.pohl@m-click.de,,Matthias Pohl,emails,openlayers
11482,Tim Schaub_tschaub@opengeo.org,tschaub@opengeo.org,,Tim Schaub,emails,openlayers
11483,VR26A_vinci.w.cat@gmail.com,vinci.w.cat@gmail.com,,VR26A,emails,openlayers
11484,Yves Jacolin free_yjacolin@free.fr,yjacolin@free.fr,,Yves Jacolin free,emails,openlayers
...,...,...,...,...,...,...
29760,Homayoon1989_h.zahmatkesh@ut.ac.ir,h.zahmatkesh@ut.ac.ir,,Homayoon1989,emails,qgis
29761,Mitchell Luhm_luhmx009@umn.edu,luhmx009@umn.edu,,Mitchell Luhm,emails,qgis
29762,Khare Simran_simran.khare@hsr.ch,simran.khare@hsr.ch,,Khare Simran,emails,qgis
29763,Robert Hewlett_rob.hewy@gmail.com,rob.hewy@gmail.com,,Robert Hewlett,emails,qgis


In [27]:
emails_df.loc[emails_df['personname'].isnull(),'personname'] = emails_df[emails_df['personname'].isnull()]['mailaddress']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [28]:
#emails_df['personname']

In [29]:
commits_df = df_aliase[df_aliase['source'] == 'Github'].append(df_aliase[df_aliase['source'] == 'SVN'])


In [30]:
commits_df.loc[commits_df['personname'].isnull(),'personname'] = commits_df[commits_df['personname'].isnull()]['aliase_id']

In [31]:
commits_df

Unnamed: 0,aliase_id,mailaddress,person_id,personname,source,proj_id
0,Jeffrey,jeffrey@skyhouseconsulting.com,,Jeffrey Harris,Github,geonode
1,Tyler,tbmcmullen@gmail.com,,Tyler McMullen,Github,geonode
2,Ariel,ingenieroariel@gmail.com,,ArielWaldman,Github,geonode
3,David,dwinslow@opengeo.org,,DavidLeal,Github,geonode
4,david.w.bitner,david.w.bitner@noaa.gov,,404,Github,geonode
...,...,...,...,...,...,...
11243,lucadelu,lucadelu@svn.osgeo.org,,lucadelu,SVN,osgeolive
11256,pagameba,pagameba@svn.osgeo.org,,pagameba,SVN,mapserver
11410,rdewit,rdewit@svn.osgeo.org,,rdewit,SVN,mapserver
11414,jachym,jachym@svn.osgeo.org,,jachym,SVN,mapserver


In [32]:
commits_dict = commits_df.to_dict('records')
emails_dict = emails_df.to_dict('records')

In [33]:
commits_dict

[{'aliase_id': 'Jeffrey',
  'mailaddress': 'jeffrey@skyhouseconsulting.com',
  'person_id': nan,
  'personname': 'Jeffrey Harris',
  'source': 'Github',
  'proj_id': 'geonode'},
 {'aliase_id': 'Tyler',
  'mailaddress': 'tbmcmullen@gmail.com',
  'person_id': nan,
  'personname': 'Tyler McMullen',
  'source': 'Github',
  'proj_id': 'geonode'},
 {'aliase_id': 'Ariel',
  'mailaddress': 'ingenieroariel@gmail.com',
  'person_id': nan,
  'personname': 'ArielWaldman',
  'source': 'Github',
  'proj_id': 'geonode'},
 {'aliase_id': 'David',
  'mailaddress': 'dwinslow@opengeo.org',
  'person_id': nan,
  'personname': 'DavidLeal',
  'source': 'Github',
  'proj_id': 'geonode'},
 {'aliase_id': 'david.w.bitner',
  'mailaddress': 'david.w.bitner@noaa.gov',
  'person_id': nan,
  'personname': '404',
  'source': 'Github',
  'proj_id': 'geonode'},
 {'aliase_id': 'Sara',
  'mailaddress': 'sarastanway@gmail.com',
  'person_id': nan,
  'personname': 'SaraAnn Stanway',
  'source': 'Github',
  'proj_id': 'geon

In [34]:
emails_dict

[{'aliase_id': 'Stefano Bonnin_stefano.bonnin@comai.to ',
  'mailaddress': 'stefano.bonnin@comai.to ',
  'person_id': nan,
  'personname': 'Stefano Bonnin',
  'source': 'emails',
  'proj_id': 'openlayers'},
 {'aliase_id': 'Matthias Pohl_m.pohl@m-click.de ',
  'mailaddress': 'm.pohl@m-click.de ',
  'person_id': nan,
  'personname': 'Matthias Pohl',
  'source': 'emails',
  'proj_id': 'openlayers'},
 {'aliase_id': 'Tim Schaub_tschaub@opengeo.org ',
  'mailaddress': 'tschaub@opengeo.org ',
  'person_id': nan,
  'personname': 'Tim Schaub',
  'source': 'emails',
  'proj_id': 'openlayers'},
 {'aliase_id': 'VR26A_vinci.w.cat@gmail.com ',
  'mailaddress': 'vinci.w.cat@gmail.com ',
  'person_id': nan,
  'personname': 'VR26A',
  'source': 'emails',
  'proj_id': 'openlayers'},
 {'aliase_id': 'Yves Jacolin free_yjacolin@free.fr ',
  'mailaddress': 'yjacolin@free.fr ',
  'person_id': nan,
  'personname': 'Yves Jacolin free',
  'source': 'emails',
  'proj_id': 'openlayers'},
 {'aliase_id': 'Christoph

In [35]:
committers = {}
contributors = {}

for commit in commits_dict:
    project_name = commit['proj_id']
    author_full_name = commit['personname']
    if project_name not in committers:
        committers[project_name] = set()
    committers[project_name].add(author_full_name)

for email in emails_dict:
    project_name = email['proj_id']
    author_full_name = email['personname']
    if project_name not in contributors:
        contributors[project_name] = set()
    contributors[project_name].add(author_full_name)


In [36]:
committers.keys()

dict_keys(['geonode', 'grass', 'pycsw', 'geoext', 'geonetwork', 'pygeoapi', 'istsos', 'geostyler', 'mdal', 'actinia', 'loader', 'geohealthcheck', 'mobilitydb', 'bezitopo', 'eoxserver', 'nasaworldwind', 'geomesa', 'geowave', 'gisquick', 'jtstopologysuite', 'koop', 'leaflet', 'lerc', 'pdal', 'perfecttin', 'proj-jni', 'tegola', 'terraformer', 'geotrellis', 'gc2/vidi', 'deegree', 'geomoose', 'geopaparazzi', 'geoserver', 'geowebcache', 'osgeolive', 'mapserver', 'mapbender', 'qgis', 'openlayers', 'opendatacube', 'geomajas', 'ossim', 'pgrouting', 'firstdraftgis', 'get-it', 'wradlib', 'oskari', 'gdal/ogr', 'proj', 'zoo', 'esa-nasaworldwind', 'pywps', 'geotools', 'owslib', 'portable-gis', 'teamengine', 'rasterframes', 'pronto raster', 'geoserver-client-php', 'marble', 'gdal', 'portablegis', 'fdo', 'geos', 'postgis', 'orfeotoolbox', 'mapguide', 'get', 'esa'])

In [37]:
contributors.keys()

dict_keys(['openlayers', 'mapserver', 'pywps', 'postgis', 'pycsw', 'gdal', 'grass', 'mapguide', 'owslib', 'mapbender', 'mdal', 'zoo', 'fdo', 'proj4j', 'gisquick', 'mapproxy', 'qgis', 'osgeo4w', 'pdal', 'pgrouting', 'osgeolive', 'oskari', 'proj4php', 'proj', 'metacrs', 'geos', 'pygeoapi', 'geonode', 'mobilitydb', 'gvsig'])

In [41]:
project_alias_clustering = {}

# get projects set
c_projects = set(committers.keys())
e_projects = set(contributors.keys())
#projects = sorted([p for p in c_projects.intersection(e_projects) if not pd.isna(p)])
projects = sorted(list(c_projects.union(e_projects)))
# print(len(c_projects), len(e_projects))

project_alias_clustering = {}
for project in projects:
    print(project)
    clustering_pairs = []
    if project in c_projects:
        committer_names = set(committers[project])
    else:
        committer_names = set()
    if project in e_projects:
        contributor_names = set(contributors[project])
    else:
        contributor_names = set()
        
    developer_names = list(committer_names.union(contributor_names))
    ### NULL name removed!!
    if np.nan in developer_names:
        developer_names.remove(np.nan)
    for i in tqdm(range(len(developer_names))):
        p1 = process_name(developer_names[i])
        
        for j in range(i+1, len(developer_names)):
            # if it is an email, take only the user domain
            p2 = process_name(developer_names[j])

            # reslove issues that two different devs sharing same first name, 
            # e.g., "robert ottaway", "robert sayre"
            if not check_segments(p1, p2):
                continue

            jaro_winkler_similarity_score = jaro_winkler_similarity(p1, p2)
            # sounding_match_score = any([match_rating_comparison(s1, s2) for s1 in name_segs_1 for s2 in name_segs_2])
            # sounding_match_score = any([sounding_match_score, match_rating_comparison(p1, p2)])

            # add pairs if:
            # (1) if the score fall down to 0.85 
            # (2) or if the score fall down to 0.82 then we use pronouncetion to help make decision
            if jaro_winkler_similarity_score > 0.85: # or (jaro_winkler_similarity_score > 0.82 and sounding_match_score):
                clustering_pairs.append([developer_names[i], developer_names[j]])
    
    project_alias_clustering[project] = disjoint_sets(clustering_pairs)

with open('./project_alias_clustering.json', 'w') as f:
    json.dump(project_alias_clustering, f, indent = 4)

with open('./project_alias_clustering.json', 'r') as f:
    project_alias_clustering = json.load(f)

actinia


100%|██████████| 26/26 [00:00<00:00, 16445.77it/s]


bezitopo


100%|██████████| 4/4 [00:00<00:00, 26173.50it/s]


deegree


100%|██████████| 69/69 [00:00<00:00, 4915.70it/s]


eoxserver


100%|██████████| 26/26 [00:00<00:00, 6584.07it/s]


esa


100%|██████████| 74/74 [00:00<00:00, 4397.54it/s]


esa-nasaworldwind


100%|██████████| 37/37 [00:00<00:00, 13967.17it/s]


fdo


100%|██████████| 173/173 [00:00<00:00, 2812.37it/s]


firstdraftgis


100%|██████████| 5/5 [00:00<00:00, 18624.80it/s]


gc2/vidi


100%|██████████| 30/30 [00:00<00:00, 16394.67it/s]


gdal


100%|██████████| 4840/4840 [00:47<00:00, 102.84it/s]


gdal/ogr


100%|██████████| 325/325 [00:00<00:00, 1709.59it/s]


geoext


100%|██████████| 85/85 [00:00<00:00, 6118.45it/s]


geohealthcheck


100%|██████████| 28/28 [00:00<00:00, 17074.81it/s]


geomajas


100%|██████████| 42/42 [00:00<00:00, 11512.27it/s]


geomesa


100%|██████████| 105/105 [00:00<00:00, 4630.06it/s]


geomoose


100%|██████████| 33/33 [00:00<00:00, 16817.99it/s]


geonetwork


100%|██████████| 410/410 [00:00<00:00, 1223.30it/s]


geonode


100%|██████████| 1306/1306 [00:03<00:00, 384.83it/s]


geopaparazzi


100%|██████████| 67/67 [00:00<00:00, 7991.65it/s]


geos


100%|██████████| 570/570 [00:00<00:00, 822.08it/s]


geoserver


100%|██████████| 659/659 [00:00<00:00, 782.34it/s]


geoserver-client-php


100%|██████████| 4/4 [00:00<00:00, 12604.97it/s]


geostyler


100%|██████████| 60/60 [00:00<00:00, 4512.67it/s]


geotools


100%|██████████| 378/378 [00:00<00:00, 1399.97it/s]


geotrellis


100%|██████████| 173/173 [00:00<00:00, 3051.80it/s]


geowave


100%|██████████| 56/56 [00:00<00:00, 8851.41it/s]


geowebcache


100%|██████████| 90/90 [00:00<00:00, 5166.39it/s]


get


100%|██████████| 2/2 [00:00<00:00, 35394.97it/s]


get-it


100%|██████████| 29/29 [00:00<00:00, 18624.23it/s]


gisquick


100%|██████████| 18/18 [00:00<00:00, 17818.62it/s]


grass


100%|██████████| 3195/3195 [00:20<00:00, 154.58it/s]


gvsig


100%|██████████| 13/13 [00:00<00:00, 28106.16it/s]


istsos


100%|██████████| 25/25 [00:00<00:00, 21115.10it/s]


jtstopologysuite


100%|██████████| 75/75 [00:00<00:00, 5193.80it/s]


koop


100%|██████████| 182/182 [00:00<00:00, 2929.61it/s]


leaflet


100%|██████████| 961/961 [00:01<00:00, 535.78it/s]


lerc


100%|██████████| 25/25 [00:00<00:00, 20992.51it/s]


loader


100%|██████████| 12/12 [00:00<00:00, 31655.12it/s]


mapbender


100%|██████████| 331/331 [00:00<00:00, 1489.11it/s]


mapguide


100%|██████████| 298/298 [00:00<00:00, 1662.10it/s]


mapproxy


100%|██████████| 9/9 [00:00<00:00, 24851.04it/s]


mapserver


100%|██████████| 1156/1156 [00:02<00:00, 432.38it/s]


marble


100%|██████████| 118/118 [00:00<00:00, 4380.13it/s]


mdal


100%|██████████| 31/31 [00:00<00:00, 16469.08it/s]


metacrs


100%|██████████| 114/114 [00:00<00:00, 3631.46it/s]


mobilitydb


100%|██████████| 32/32 [00:00<00:00, 15543.45it/s]


nasaworldwind


100%|██████████| 15/15 [00:00<00:00, 11722.48it/s]


opendatacube


100%|██████████| 170/170 [00:00<00:00, 3022.79it/s]


openlayers


100%|██████████| 1878/1878 [00:06<00:00, 271.44it/s] 


orfeotoolbox


100%|██████████| 76/76 [00:00<00:00, 6599.05it/s]


osgeo4w


100%|██████████| 204/204 [00:00<00:00, 2253.00it/s]


osgeolive


100%|██████████| 550/550 [00:00<00:00, 830.70it/s]


oskari


100%|██████████| 135/135 [00:00<00:00, 3823.04it/s]


ossim


100%|██████████| 101/101 [00:00<00:00, 5565.08it/s]


owslib


100%|██████████| 153/153 [00:00<00:00, 3352.34it/s]


pdal


100%|██████████| 362/362 [00:00<00:00, 1256.33it/s]


perfecttin


100%|██████████| 1/1 [00:00<00:00, 11915.64it/s]


pgrouting


100%|██████████| 293/293 [00:00<00:00, 1501.58it/s]


portable-gis


100%|██████████| 2/2 [00:00<00:00, 31068.92it/s]


portablegis


100%|██████████| 2/2 [00:00<00:00, 11023.14it/s]


postgis


100%|██████████| 635/635 [00:00<00:00, 735.65it/s]


proj


100%|██████████| 1244/1244 [00:03<00:00, 383.98it/s]


proj-jni


100%|██████████| 6/6 [00:00<00:00, 47752.99it/s]


proj4j


100%|██████████| 27/27 [00:00<00:00, 18113.60it/s]


proj4php


100%|██████████| 1/1 [00:00<00:00, 17549.39it/s]


pronto raster


100%|██████████| 4/4 [00:00<00:00, 38304.15it/s]


pycsw


100%|██████████| 188/188 [00:00<00:00, 2319.52it/s]


pygeoapi


100%|██████████| 65/65 [00:00<00:00, 7523.31it/s]


pywps


100%|██████████| 125/125 [00:00<00:00, 3706.84it/s]


qgis


100%|██████████| 3904/3904 [00:30<00:00, 126.90it/s]


rasterframes


100%|██████████| 17/17 [00:00<00:00, 21865.43it/s]


teamengine


100%|██████████| 54/54 [00:00<00:00, 8419.48it/s]


tegola


100%|██████████| 29/29 [00:00<00:00, 15025.92it/s]


terraformer


100%|██████████| 15/15 [00:00<00:00, 25206.15it/s]


wradlib


100%|██████████| 34/34 [00:00<00:00, 12350.08it/s]


zoo


100%|██████████| 181/181 [00:00<00:00, 2153.58it/s]


In [39]:
projects

['fdo',
 'gdal',
 'geonode',
 'geos',
 'gisquick',
 'grass',
 'mapbender',
 'mapguide',
 'mapserver',
 'mdal',
 'mobilitydb',
 'openlayers',
 'osgeolive',
 'oskari',
 'owslib',
 'pdal',
 'pgrouting',
 'postgis',
 'proj',
 'pycsw',
 'pygeoapi',
 'pywps',
 'qgis',
 'zoo']