In [43]:
import pandas as pd
import numpy as np
import random

In [44]:
# generate some claim numbers - between 1000 to 5000 as string
# padd it with 2 leading zeros (7 digits total)

claim_numbers = random.sample(range(1, 1000), 10)

In [45]:
# a list with claim number (integers)
print(claim_numbers)

[279, 81, 731, 759, 362, 755, 991, 175, 478, 291]


In [46]:
claim_numbers = [("0000000" + str(number))[-7:] for number in claim_numbers]

In [47]:
claim_numbers

['0000279',
 '0000081',
 '0000731',
 '0000759',
 '0000362',
 '0000755',
 '0000991',
 '0000175',
 '0000478',
 '0000291']

In [54]:
for i in range(len(claim_numbers)):
    # this does not work
    # python strings are immutable
    # claim_numbers[i][0] = '3'
    
    claim_numbers[i] = '3' + claim_numbers[i][1:]

In [56]:
# updated with leading digit 3
claim_numbers

['3000279',
 '3000081',
 '3000731',
 '3000759',
 '3000362',
 '3000755',
 '3000991',
 '3000175',
 '3000478',
 '3000291']

In [61]:
# or if you know what you need, you can do this in advance all
# all in one step together.
claim_numbers_2 = random.sample(range(3000000, 3999999), 10)
claim_numbers_2 = [str(number) for number in claim_numbers_2]
claim_numbers_2

['3937749',
 '3049894',
 '3482021',
 '3601649',
 '3725435',
 '3052299',
 '3995130',
 '3345617',
 '3425295',
 '3067101']

In [77]:
# create a sample claim number with status
# status is random between 2 values with 0 or 1
df = pd.DataFrame(np.random.randint(0, 2, size=(10, 1)),
                  columns=['status'],
                  index=claim_numbers_2)

In [78]:
# review our sample
df

Unnamed: 0,status
3937749,1
3049894,1
3482021,0
3601649,1
3725435,0
3052299,0
3995130,0
3345617,1
3425295,0
3067101,1


In [75]:
# https://medium.com/@evelynli_30748/map-apply-applymap-with-the-lambda-function-5e83028be759
# but this is only a series
df['status'].map({0: 'REJECT', 1: 'ACCEPT'})

3937749    REJECT
3049894    ACCEPT
3482021    ACCEPT
3601649    ACCEPT
3725435    REJECT
3052299    REJECT
3995130    REJECT
3345617    REJECT
3425295    REJECT
3067101    ACCEPT
Name: status, dtype: object

In [81]:
# https://stackoverflow.com/a/12152759
df['status'].replace({0: 'REJECT', 1: 'ACCEPT'}, inplace=True)

In [82]:
# this worked really nicely!
df

Unnamed: 0,status
3937749,ACCEPT
3049894,ACCEPT
3482021,REJECT
3601649,ACCEPT
3725435,REJECT
3052299,REJECT
3995130,REJECT
3345617,ACCEPT
3425295,REJECT
3067101,ACCEPT


In [84]:
claim_numbers_n = random.sample(range(1000000000, 4000000000), 1000000)
claim_numbers_n = [str(number) for number in claim_numbers_n]

In [85]:
df = pd.DataFrame(np.random.randint(0, 2, size=(1000000, 1)),
                  columns=['status'],
                  index=claim_numbers_n)

In [88]:
# 1M rows of claim numbers with status
df.shape

(1000000, 1)

In [89]:
# this is a great way to provide a final table
# after analysis is fully completed at the end.
%timeit df['status'].replace({0: 'REJECT', 1: 'ACCEPT'}, inplace=True)

40.3 ms ± 631 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [90]:
df.head()

Unnamed: 0,status
3261376278,REJECT
1632058789,ACCEPT
1439631111,REJECT
2401611405,REJECT
1697848697,ACCEPT
