In [1]:
import pandas as pd
import random
from faker import Faker

class FakeDataGenerator:
    
    def __init__(self, df = None, rows=5):
        """
        This class generates fake data
        
        If data frame is passed in it will add the fake data to it. 
        Otherwise, it will create a new data frame
        
        Parameters:
        -----------
        df: 
          DataFrame default is None
          
        rows:
          If df None, this parameter will be used to create a data frame with `row` length
        """
        # code goes under this line. 
        # I will deduct points if any code is written above the comments!
          
        self.fake = Faker()
        
        if df is not None:
            self.df = df
            self.rows = len(df)
        else:
            self.df = pd.DataFrame()
            self.rows = rows
        
        
    def add_random_number(self, column_name, small_value, high_value): # optional: add probabilities 
        """
        adds fake numbers to the data frame with `column_name`
        
        Returns
        -------
        data frame
        """
        
        self.df[column_name] = [random.randint(small_value,high_value) for i in range(self.rows)]
        
        return self.df       
    
        
    def add_name(self, column_name):
        """
        adds fake name to the data frame with `column_name`
        
        Returns
        -------
        data frame
        """

        self.df[column_name] = [self.fake.first_name() for i in range(self.rows)]
        
        return self.df
    
    
    def add_last_name(self, column_name):
        """
        adds fake last name to the data frame with `column_name`
        
        Returns
        -------
        data frame
        """
        
        self.df[column_name] = [self.fake.last_name() for i in range(self.rows)]
        
        return self.df
    
    
    def add_email(self, column_name):
        """
        adds fake email to the data frame with `column_name`
        
        Returns
        -------
        data frame
        """

        self.df[column_name] = [self.fake.email() for i in range(self.rows)]
        
        return self.df
    
    
    def add_phone_number(self, column_name):
        """
        adds fake phone number to the data frame with `column_name`
        
        Returns
        -------
        data frame
        """
        
        self.df[column_name] = [self.fake.phone_number() for i in range(self.rows)]
        
        return self.df
    
    
    def add_address(self, column_name):
        """
        adds fake address to the data frame with `column_name`
        
        Returns
        -------
        data frame
        """
        
        self.df[column_name] = [self.fake.address() for i in range(self.rows)]
        
        return self.df


In [2]:
import pandas as pd

# Case 1 - existing data frame 
df = pd.DataFrame()
df['product_id'] = [i + 123 for i in range(25)]
df.head()

Unnamed: 0,product_id
0,123
1,124
2,125
3,126
4,127


In [3]:
fakeGen = FakeDataGenerator(df) # I'm creating instance using existing df
fakeGen.add_name('name')
fakeGen.add_last_name('lastname')
fakeGen.add_address('shipping_address')
fakeGen.add_address('billing_address')
fakeGen.add_phone_number('customer_phone')

Unnamed: 0,product_id,name,lastname,shipping_address,billing_address,customer_phone
0,123,Sandra,Dyer,"7575 Maria Grove\nMillsfurt, NM 82065",USNS Stone\nFPO AE 38583,001-202-323-4057x14729
1,124,Sarah,Thomas,"1977 Garza Land\nOrtizville, NC 50500","25632 Hubbard Heights\nLisabury, NJ 14147",001-744-377-6564
2,125,Anita,Wilson,"69941 Taylor Avenue\nRushton, IA 90035","58675 Mathis Pike\nSouth Anthony, AK 62983",+1-011-619-9337
3,126,Robert,Hawkins,"93685 Williams Flat\nStephaniefort, VT 96274","41781 Frye Greens Suite 415\nParkerton, DE 89744",(363)607-2352x655
4,127,Michael,Lloyd,"264 Bailey Ridge\nPort Joseph, NV 42600","84139 Potter Path\nBensonland, MN 05925",231.028.6955x0621
5,128,Stacey,Adams,Unit 7232 Box 6220\nDPO AP 65309,"616 Christopher Plaza\nNew Mark, WV 79591",919.908.6781x296
6,129,Donna,Thomas,"07258 Davis Streets\nPort Larrychester, WI 64002","218 Casey Village\nNorth Ambermouth, NV 98777",001-609-793-3754x60619
7,130,Monica,Nguyen,"43027 Thomas Club Apt. 655\nGonzaleztown, NJ 6...","22026 Page Pine\nNew Jeffreyhaven, WV 43809",571.178.0521
8,131,Crystal,Bailey,"162 Thornton Heights\nGwendolynstad, CO 86676","67817 Megan Circle\nPort Ginaborough, NJ 05970",996-402-4410x13998
9,132,Justin,Spencer,USCGC Peters\nFPO AE 94027,"519 Hardy Green Apt. 163\nHaleyhaven, NC 00904",001-601-849-1106x95940


In [4]:
# Case 2 - no dataframe

fakeGen = FakeDataGenerator() # no df
fakeGen.add_address('shipping_address')
fakeGen.add_address('billing_address')
fakeGen.add_phone_number('customer_phone')
fakeGen.add_random_number('sale_count', 10, 100)

Unnamed: 0,shipping_address,billing_address,customer_phone,sale_count
0,USS Webb\nFPO AA 76331,"08841 Boone Centers\nNorth Sherryville, KS 67968",+1-024-889-3676x1590,14
1,"85611 Nixon Forks\nTammyport, AK 32296","13103 Day Cliffs\nBrianstad, AR 48555",001-585-824-7564x77972,56
2,"73596 Guy Lane\nLake Nathanielside, CT 15162","931 Brittany Parks\nNorth Evanport, SD 32754",(709)948-6035,99
3,"6137 Anderson Springs Apt. 159\nRichardburgh, ...","80534 Hebert Bypass Suite 981\nGonzalezhaven, ...",(061)688-8903x5229,38
4,"18180 Mann Keys\nPort Anna, SD 88978","342 Isaiah Squares Suite 881\nPort Sandra, NY ...",6430170602,58
