In [None]:
from pymongo import MongoClient
client = MongoClient()#(host='localhost', port=9999)
entries = client['usta']['tlinkentries']

In [None]:
entries.count()

In [None]:
names = entries.distinct('info.name')
len(names)

In [None]:
docs = list(entries.aggregate([
    { 
        "$group": { 
            "_id": { "name": "$info.name"}, 
            "uniqueIds": { "$addToSet": "$_id" },
            "count": { "$sum": 1 } 
        }
    }, 
    { "$match": { "count": { "$gt": 1 } } }
]))

In [None]:
from datetime import datetime
from operator import itemgetter
from pandas import DataFrame
from pandas import ExcelWriter
from collections import OrderedDict

In [None]:
def save_xls(dct, xls_path='year_end_ratings.xlsx'):
    writer = ExcelWriter(xls_path)
    for name, s in dct.items():
        s.to_excel(writer, sheet_name=name)
    writer.save()

In [None]:
levels = ['{:.1f}'.format(x/10.) for x in range(30, 60, 5)]
genders = ['M', 'F']
sheets = OrderedDict()

for gender in genders:
    for level in levels:
        lvl = int(float(level)*10)
        allowed_levels = ['{:.1f}'.format(x/10.) for x in range(lvl-5, lvl+10, 5)]
        key = '{}{}'.format(level, gender)
        print(key)
        data = OrderedDict([('Name', []), ('2018 Rating', []), ('Type', [])])

        for doc in docs:

            dups = list(entries.find({'_id': {'$in': doc['uniqueIds']}}))
            if dups[0]['info']['gender'] != gender:
                continue

            dups = [{
                'name': dup['info']['name'],
                'gender': dup['info']['gender'],
                'year_end_rating_level': dup['info']['year_end_rating_level'] if isinstance(
                        dup['info']['year_end_rating_level'], str
                    ) else '{:.1f}'.format(dup['info']['year_end_rating_level']),
                'year_end_rating_date': datetime.strptime(dup['info']['year_end_rating_date'], '%m/%d/%Y'),
                'rating_type': dup['info']['rating_type']
            } for dup in dups]
            dups = sorted(dups, key=itemgetter('year_end_rating_date'), reverse=True)[:2]
            
            new_year_end_rating_date = datetime.strptime('12/31/2017', '%m/%d/%Y')
            if not dups[0]['year_end_rating_level'] in allowed_levels or \
                dups[1]['year_end_rating_level'] != level or \
                dups[0]['year_end_rating_date'] != new_year_end_rating_date or \
                dups[0]['year_end_rating_level'] == dups[1]['year_end_rating_level']:
                continue

            data['Name'].append(dups[0]['name'])
            data['2018 Rating'].append(float(dups[0]['year_end_rating_level']))
            data['Type'].append(dups[0]['rating_type'])

        if data['Name']:
            sheets[key] = DataFrame(data=data)
            sheets[key].set_index('Name', inplace=True)
            sheets[key].sort_values(by=['2018 Rating'], inplace=True)

save_xls(sheets)
print('DONE')