In [15]:
import names
import numpy as np
import random
from ProductHuntScraper import ProductHunt
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore
import random
import gender_guesser.detector as gender

In [2]:
def initialize_db():
    cred = credentials.Certificate("new-website-prod-firebase-adminsdk-lni3g-ecda7ac2fe.json")
    firebase_admin.initialize_app(cred)
    db = firestore.client()
    return db

In [3]:
db = initialize_db()

In [4]:
def generate_keys_firebase():
    users_ref = db.collection(u'waitlist')
    docs = users_ref.stream()
    cache = {}
    for doc in docs:
        cache[doc.id] = 1
    return cache

In [5]:
def write_to_firebase(fake_people_and_their_points):
    cache = generate_keys_firebase()
    points_list = {}
    for user_info in fake_people_and_their_points:
        rand_int = random.randint(1,1000000)
        email = "generated_people{rand_int}@gmail.com".format(rand_int=str(rand_int))
        while email in cache:
            rand_int = random.randint(1,1000000)
            email = "generated_people{rand_int}@gmail.com".format(rand_int=str(rand_int))
            print(email)
        doc_ref = db.collection(u'waitlist').document(email)
        doc_ref.set({
            u'name': user_info['person_name'],
            u'referrals': int(user_info['person_referrals']),
            u'points': int(user_info['person_points'])
        })
        points_list[email] = {"email": email, "points": int(user_info['person_points']), "name": user_info['person_name'], "referrals": int(user_info['person_referrals'])}
        
    leaderboard_ref = db.collection(u'metadata').document(u'waitlist').update({
        u'leaderboard': points_list
    })

In [6]:
def write_to_firebase(fake_people_and_their_points):
    cache = generate_keys_firebase()
    points_dict = {}
    for user_info in fake_people_and_their_points:
        rand_int = random.randint(1,1000000)
        email = "generated_people{rand_int}@gmail.com".format(rand_int=str(rand_int))
        while email in cache:
            rand_int = random.randint(1,1000000)
            email = "generated_people{rand_int}@gmail.com".format(rand_int=str(rand_int))
            print(email)
#         doc_ref = db.collection(u'waitlist').document(email)
#         doc_ref.set({
#             u'name': user_info['person_name'],
#             u'referrals': int(user_info['person_referrals']),
#             u'points': int(user_info['person_points'])
#         })
        points_dict[email] = {"email": email, "points": int(user_info['person_points']), "name": user_info['person_name'], "referrals": int(user_info['person_referrals'])}
        
    leaderboard_ref = db.collection(u'metadata').document(u'waitlist').update({
        u'leaderboard': points_dict
    })

In [7]:
def generate_right_skewed_distribution(max_number_referrals):
    ''' takes in the ceiling of random referrals (max_number_referrals)
    and returns a right-skewed distribution up to max_number_referrals
    and its corresponding array. Input a value greater than 2. 
    '''
    pers = np.arange(0,max_number_referrals,1)

    prob = [10000] * (0) + [9.0]* (1) + [1.0] * (len(pers) - 1)
    prob /= np.sum(prob)
    return pers, prob


In [8]:
def generate_name_with_typo_sometimes(percentage):
    '''generate the name with some typos some percentage of the time'''
    name = names.get_full_name()
    typo_rand = random.randint(1, 100)
    if typo_rand <= percentage:
        name_len = len(name)
        introduce_typo_location = random.randint(0, name_len)
        error_type = random.randint(1,2)
        if error_type == 1:
            first_last = random.randint(1,2)
            typo_name = lower_nth(name, first_last)
        else:
            typo_name = capitalize_nth(name, introduce_typo_location)
        return typo_name
    return name

In [9]:
def lower_nth(s, n):
    return s[:n] + s[n:].lower() 

In [10]:
def capitalize_nth(s, n):
    return s[:n] + s[n:].capitalize() 

In [31]:
def generate_from_names_library(number_gen, max_number_referrals):
    '''generate a list of names up to number_gen people with each 
    referral maxed out at max_number_referrals using the names library
    '''
    
    d = gender.Detector(case_sensitive=False)
    num_males, num_females = 0, 0
    pers, right_distribution = generate_right_skewed_distribution(max_number_referrals)
    people = set()
    people_referrals = []
    
    while len(people) != number_gen:
        name = generate_name_with_typo_sometimes(10)
        if random.randint(0, 10) < 7:
            name = name.split(" ")[0]
        if num_males > num_females * (0.75):
            if d.get_gender(name) == 'female':
                num_females += 1
                people.add(name) 
            continue
        else:
            if d.get_gender(name) == 'male':
                num_males += 1
            elif d.get_gender(name) == 'female':
                num_females += 1
            people.add(name)            
    for person in people:
        number = np.random.choice(pers, 1, p=right_distribution)[0]
        people_referrals.append((person, number))
        
    return people_referrals

In [12]:
def generate_from_names_product_hunt(number_gen, max_number_referrals):
    '''generate a list of names up to number_gen people with each 
    referral maxed out at max_number_referrals by scraping ProductHunt
    '''
    
    PH = ProductHunt()
    makers, hunters = PH.find_makers_and_hunters()
    pers, right_distribution = generate_right_skewed_distribution(max_number_referrals)
    people = set()
    people_referrals = []

    people.update(makers)
    people.update(hunters)
    
    while len(people) < number_gen:
        name = generate_name_with_typo_sometimes(20)
        people.add(name)
    
    for person in people:
        if len(people_referrals) == number_gen:
            break
        number = np.random.choice(pers, 1, p = right_distribution)[0]
        people_referrals.append((person, number))
        
    return people_referrals
        

In [13]:
def generate_fake_points_people(people_referrals):
    '''
    takes in a tuples of (names, referrals) and generates their 
    point allocation
    '''
    
    all_peoples_data = []
    for people_referral in people_referrals:
#         pers, distribution = generate_fake_points_distribution(people_referral[1], max_points)
#         number = np.random.choice(pers, 1, p = distribution)[0]
        name = people_referral[0]
        num_referrals = people_referral[1]
        points = num_referrals * 100 + 50
        person_data = {
            'person_name': name,
            'person_referrals': num_referrals,
            'person_points': points + np.random.normal(20, 8, 1)[0]
        }
        all_peoples_data.append(person_data)
        
    return all_peoples_data

In [15]:
# print("generating refs")
# refs = generate_from_names_library(5007, 15)
# print("generating people")
# fake_people = generate_fake_points_people(refs)
# print("writing")
# write_to_firebase(fake_people[0:1000])
print("done")

done


In [32]:
refs = generate_from_names_library(48, 15)


In [33]:
refs

[('Douglas', 1),
 ('James', 0),
 ('Mary', 4),
 ('Curtis', 6),
 ('Carl Pease', 0),
 ('Cindy Schroeder', 0),
 ('Kati', 7),
 ('Tina', 2),
 ('Deborah', 9),
 ('David', 7),
 ('James Richards', 0),
 ('Joseph Grammer', 8),
 ('Tim', 9),
 ('Joseph', 6),
 ('Amy', 0),
 ('Susannah Thompson', 4),
 ('Robert', 0),
 ('Marvin', 9),
 ('Zita', 6),
 ('Harry HagLer', 0),
 ('Miguel Bilyeu', 0),
 ('Martha', 14),
 ('Nicole', 7),
 ('Jason', 0),
 ('Margaret', 14),
 ('Willie', 3),
 ('Carole', 0),
 ('Franklin Myers', 12),
 ('Cynthia Dudley', 10),
 ('JeNna', 9),
 ('Frank Varley', 12),
 ('Linda', 3),
 ('Beverly', 12),
 ('Christopher Doyle', 3),
 ('William', 0),
 ('Kathleen', 9),
 ('Steve', 3),
 ('Sadie', 10),
 ('Blanche Kendall', 4),
 ('Michelle Schweitzer', 2),
 ('Frank Canady', 11),
 ('Paula', 0),
 ('Donald', 11),
 ('Doris', 13),
 ('Tiffany', 0),
 ('Alice Dietrich', 0),
 ('Ernesto Bell', 7),
 ('Angela', 0)]