In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import re

#import plotly
import plotly as py
import plotly.graph_objs as go
py.offline.init_notebook_mode(connected=True)
import plotly.io as pio
%config InlineBackend.figure_format = 'svg'

In [2]:
# First, get the published year and readjust difference
samsungDF=pd.read_csv('../data/samsung_final.csv')
samsungDF.columns=['Model', 'Carrier','android_os', 'Release_Date', 'Bulletin_Level', 'difference','locked']
samsungDF['Year_Patch_Release']=samsungDF.Release_Date.copy()
samsungDF['Year_Patch_Release']=samsungDF.Year_Patch_Release.apply(lambda x: x[:4])

bulletinPublished=pd.read_csv('../data/androidBulletin_release.csv',header=None)
bulletinPublished.columns=['Bulletin','Bulletin_Publish_Date']
bulletinPublished['Bulletin_Level']=bulletinPublished.Bulletin.str.replace(r"^([^ ]*) ([^ ]*)$",r"\1 1, \2")
bulletinPublished['Bulletin_Level']=pd.to_datetime(bulletinPublished['Bulletin_Level'])
bulletinPublished['Bulletin_Publish_Date']=pd.to_datetime(bulletinPublished['Bulletin_Publish_Date'])
bulletinPublished.drop(columns=['Bulletin'],inplace=True)
bulletinPublished['Bulletin_Level']=bulletinPublished['Bulletin_Level'].astype(str)
bulletinPublished=bulletinPublished[bulletinPublished.Bulletin_Level!="2020-02-01"]
bulletinPublished=bulletinPublished[bulletinPublished.Bulletin_Level!="2020-01-01"]
bulletinPublished['Bulletin_Publish_Date']=pd.to_datetime(bulletinPublished['Bulletin_Publish_Date'])

##### Recalculate the difference #####
samsungDF=pd.merge(samsungDF,bulletinPublished,on="Bulletin_Level",how='outer') # Merge properly

# Remove extraneous bulletins not in corpus
samsungDF.Model=samsungDF.Model.astype(str)
samsungDF=samsungDF[samsungDF.Model!="nan"]

# # Calculate the new difference
samsungDF.Bulletin_Publish_Date=pd.to_datetime(samsungDF.Bulletin_Publish_Date)
samsungDF.Release_Date=pd.to_datetime(samsungDF.Release_Date)

newDif=samsungDF.Release_Date-samsungDF.Bulletin_Publish_Date
newDif=newDif.apply(lambda x: x.days)
samsungDF['difference']=newDif.copy()


print(samsungDF.head())

            Model  Carrier android_os Release_Date Bulletin_Level  difference  \
0  Galaxy S7 edge  TMobile      8.0.0   2019-04-03     2019-03-01        30.0   
1       Galaxy S9  TMobile          9   2019-03-22     2019-03-01        18.0   
2   Galaxy Note 8   Sprint          9   2019-03-26     2019-03-01        22.0   
3      Galaxy S9+   Sprint          9   2019-04-08     2019-03-01        35.0   
4     Galaxy S10+  Verizon          9   2019-04-12     2019-03-01        39.0   

  locked Year_Patch_Release Bulletin_Publish_Date  
0   True               2019            2019-03-04  
1   True               2019            2019-03-04  
2   True               2019            2019-03-04  
3   True               2019            2019-03-04  
4  False               2019            2019-03-04  


In [3]:

print("-----------------------------")
# Filter to only T-Mobile, Sprint and Unbranded
samsungDF=samsungDF[samsungDF.Carrier!="AT&T"]
samsungDF=samsungDF[samsungDF.Carrier!="Verizon"]
samsungDF=samsungDF[samsungDF.Bulletin_Level!="2020-01-01"] # remove 2020-01-01
allCarriers=samsungDF.Carrier.unique()
allModels_Sam=samsungDF.Model.unique()
allBulletin_Sam=samsungDF.Bulletin_Level.unique()
allPatchYears_Sam=samsungDF.Year_Patch_Release.unique()
print(samsungDF.head())
print("---------------------------------------------------------------------------------")
print("---------------------------------------------------------------------------------")
print("SAMSUNG: All Updates: "+ str(len(samsungDF)))
print("SAMSUNG: Unique Phones : "+ str(len(allModels_Sam)))


samsungDF_locked=samsungDF[samsungDF.locked==True]
samsungDF_unlocked=samsungDF[samsungDF.locked==False]

-----------------------------
            Model  Carrier android_os Release_Date Bulletin_Level  difference  \
0  Galaxy S7 edge  TMobile      8.0.0   2019-04-03     2019-03-01        30.0   
1       Galaxy S9  TMobile          9   2019-03-22     2019-03-01        18.0   
2   Galaxy Note 8   Sprint          9   2019-03-26     2019-03-01        22.0   
3      Galaxy S9+   Sprint          9   2019-04-08     2019-03-01        35.0   
6     Galaxy S10+   Sprint          9   2019-04-12     2019-03-01        39.0   

  locked Year_Patch_Release Bulletin_Publish_Date  
0   True               2019            2019-03-04  
1   True               2019            2019-03-04  
2   True               2019            2019-03-04  
3   True               2019            2019-03-04  
6  False               2019            2019-03-04  
---------------------------------------------------------------------------------
---------------------------------------------------------------------------------
SAMSUNG

In [4]:
print(allModels_Sam)

['Galaxy S7 edge' 'Galaxy S9' 'Galaxy Note 8' 'Galaxy S9+' 'Galaxy S10+'
 'Galaxy S8' 'Galaxy Note9' 'Galaxy S10' 'Galaxy S8+' 'Galaxy S7'
 'Galaxy A10e' 'Galaxy Note 10' 'Galaxy A50' 'Galaxy Note 10+'
 'Galaxy A20']


In [5]:
############### Stats for Samsung Locked vs Unlocked ###############
samsungDF.locked=samsungDF.locked.astype('bool')
allRows=[]
length=0
for carrier in allCarriers:
    for lock in [True,False]:
        tmpdf=samsungDF[samsungDF.locked==lock]
        tmpdf=tmpdf[tmpdf.Carrier==carrier]
        if len(tmpdf)>0:
            length=len(tmpdf)
            allRows.append([str(carrier),lock,length])
    
statsDF_Sam_filtered=pd.DataFrame(allRows,columns=['Carrier','locked','Num_Updates'])

newCol=[]
length=0
for carrier in allCarriers:
    for lock in [True,False]:
        dfTemp=samsungDF[samsungDF.Carrier==carrier]
        dfTemp=dfTemp[dfTemp.locked==lock]
        if len(dfTemp)>0:
            length=len(dfTemp.Model.unique())
            newCol.append(length)

statsDF_Sam_filtered['Unique_Models']=newCol.copy()

# Unique Number of Bulletins
newCol=[]
length=0
for carrier in allCarriers:
    for lock in [True,False]:
        dfTemp=samsungDF[samsungDF.Carrier==carrier]
        dfTemp=dfTemp[dfTemp.locked==lock]
        if len(dfTemp)>0:
            length=len(dfTemp.Bulletin_Level.unique())
            newCol.append(length)

statsDF_Sam_filtered['Unique_Bulletins']=newCol.copy()

# Timeline: Earliest Release date
newCol=[]
length=0
for carrier in allCarriers:
    for lock in [True,False]:
        tempDF=samsungDF[samsungDF.Carrier==carrier]
        tempDF=tempDF[tempDF.locked==lock]
        if len(tempDF)>0: 
            minDifIndex=np.argmin(tempDF.Release_Date.values)
            newCol.append(tempDF.iloc[minDifIndex].Release_Date)
            
    
statsDF_Sam_filtered['Earliest_Release']=newCol.copy()

# Timeline: Latest Release date
newCol=[]
length=0
for carrier in allCarriers:
    for lock in [True,False]:
        tempDF=samsungDF[samsungDF.Carrier==carrier]
        tempDF=tempDF[tempDF.locked==lock]
        if len(tempDF)>0: 
            minDifIndex=np.argmax(tempDF.Release_Date.values)
            newCol.append(tempDF.iloc[minDifIndex].Release_Date)
    
statsDF_Sam_filtered['Latest_Release']=newCol.copy()


# Timeline: Earliest Bulletin date
newCol=[]
length=0
for carrier in allCarriers:
    for lock in [True,False]:
        tempDF=samsungDF[samsungDF.Carrier==carrier]
        tempDF=tempDF[tempDF.locked==lock]
        if len(tempDF)>0: 
            minDifIndex=np.argmin(tempDF.Bulletin_Level.values)
            newCol.append(tempDF.iloc[minDifIndex].Bulletin_Level)
    
statsDF_Sam_filtered['Earliest_Bulletin']=newCol.copy()

# Timeline: Latest Bulletin date
newCol=[]
length=0
for carrier in allCarriers:
    for lock in [True,False]:
        tempDF=samsungDF[samsungDF.Carrier==carrier]
        tempDF=tempDF[tempDF.locked==lock]
        if len(tempDF)>0: 
            minDifIndex=np.argmax(tempDF.Bulletin_Level.values)
            newCol.append(tempDF.iloc[minDifIndex].Bulletin_Level)
    
statsDF_Sam_filtered['Latest_Bulletin']=newCol.copy()
    
# Average bulletins per model
newCol=[]
length=0
for carrier in allCarriers:
    for lock in [True,False]:
        tempDF=statsDF_Sam_filtered[statsDF_Sam_filtered.Carrier==carrier]
        tempDF=tempDF[tempDF.locked==lock]
        if len(tempDF)>0: 
            calc=tempDF.Unique_Bulletins/tempDF.Unique_Models
            newCol.append(calc.iloc[0])

statsDF_Sam_filtered['Bulletins_per_Model']=newCol.copy()


print(statsDF_Sam_filtered.head())

     Carrier  locked  Num_Updates  Unique_Models  Unique_Bulletins  \
0    TMobile    True          129             14                18   
1    TMobile   False          124             14                18   
2     Sprint    True          143             15                18   
3     Sprint   False          154             14                18   
4  Unbranded   False          155             14                18   

  Earliest_Release Latest_Release Earliest_Bulletin Latest_Bulletin  \
0       2018-08-13     2019-12-30        2018-07-01      2019-12-01   
1       2018-07-25     2020-01-31        2018-07-01      2019-12-01   
2       2018-08-06     2020-01-02        2018-07-01      2019-12-01   
3       2018-07-25     2020-01-14        2018-07-01      2019-12-01   
4       2018-07-25     2020-01-14        2018-07-01      2019-12-01   

   Bulletins_per_Model  
0             1.285714  
1             1.285714  
2             1.200000  
3             1.285714  
4             1.285714  


In [6]:
# Let's look at security bulletins address by only both locked and unlocked

# for each phone device, only consider updates on both locked and unlocked entities

samsungDF=samsungDF.reset_index(drop=True)
goodindices=[]
for model in samsungDF.Model.unique():
    tmp=samsungDF[samsungDF.Model==model]
    
    if "TMobile" in tmp.Carrier.unique():
        tmp2=tmp[tmp.Carrier=="TMobile"]
        if len(tmp2.locked.unique())>1: # Meaning locked and unlocked present
            lockedLst=set(tmp2[tmp2.locked==True].Bulletin_Level.unique()) # get locked
            unlockedLst=set(tmp2[tmp2.locked==False].Bulletin_Level.unique()) # get unlocked
            result=list(lockedLst & unlockedLst) # Locate only same bulletins covered
            for bul2 in result: # Record only those bulletins
                tmp4=tmp2[tmp2.Bulletin_Level==bul2]
                goodindices.extend(tmp4.index)
            
            
    if "Sprint" in tmp.Carrier.unique():
        tmp2=tmp[tmp.Carrier=="Sprint"]
        if len(tmp2.locked.unique())>1: # Meaning locked and unlocked present
            lockedLst=set(tmp2[tmp2.locked==True].Bulletin_Level.unique())
            unlockedLst=set(tmp2[tmp2.locked==False].Bulletin_Level.unique())
            result=list(lockedLst & unlockedLst) # Locate only same bulletins covered
            for bul2 in result:  # Record only those bulletins
                tmp4=tmp2[tmp2.Bulletin_Level==bul2]
                goodindices.extend(tmp4.index)
              
            
print(len(samsungDF))
goodindices=list(set(goodindices))
normalizedDF=samsungDF.iloc[goodindices]
print("Number of Updates: "+str(len(goodindices)))
print("Unique Models: "+str(len(normalizedDF.Model.unique())))
print("Unique Bulletins: "+str(len(normalizedDF.Bulletin_Level.unique())))
print(normalizedDF.head())



705
Number of Updates: 465
Unique Models: 14
Unique Bulletins: 18
            Model  Carrier android_os Release_Date Bulletin_Level  difference  \
0  Galaxy S7 edge  TMobile      8.0.0   2019-04-03     2019-03-01        30.0   
3      Galaxy S9+   Sprint          9   2019-04-08     2019-03-01        35.0   
4     Galaxy S10+   Sprint          9   2019-04-12     2019-03-01        39.0   
6  Galaxy S7 edge   Sprint      8.0.0   2019-04-29     2019-03-01        56.0   
7    Galaxy Note9  TMobile          9   2019-04-15     2019-03-01        42.0   

   locked Year_Patch_Release Bulletin_Publish_Date  
0    True               2019            2019-03-04  
3    True               2019            2019-03-04  
4   False               2019            2019-03-04  
6    True               2019            2019-03-04  
7    True               2019            2019-03-04  


In [7]:
# print("Unique Models: "+str(len(normalizedDF.Model.unique())))
# print("Unique Bulletins: "+str(len(normalizedDF.Bulletin_Level.unique())))
carrierNormDF=samsungDF.copy()
lockedIndex=carrierNormDF[(carrierNormDF.locked==False) & (carrierNormDF.Carrier=="Sprint")].index
carrierNormDF.at[lockedIndex,'Carrier']="Sprint_U"
lockedIndex=carrierNormDF[(carrierNormDF.locked==False) & (carrierNormDF.Carrier=="TMobile")].index
carrierNormDF.at[lockedIndex,'Carrier']="Tmobile_U"
print(carrierNormDF.Carrier.unique())
goodModels=[]
for model in samsungDF.Model.unique():
    tmp2=carrierNormDF[carrierNormDF.Model==model]
    if len(tmp2.Carrier.unique())==5: # if all the phones were updated
        goodModels.append(model)
#             if len(tmp2)>5:
#                 print(tmp2)
            


dftmob_U=samsungDF[samsungDF.Carrier=='TMobile']
dftmob=dftmob_U[dftmob_U.locked==True]
dftmob_U=dftmob_U[dftmob_U.locked==False]

dfsprnt_U=samsungDF[samsungDF.Carrier=='Sprint']
dfsprnt=dfsprnt_U[dfsprnt_U.locked==True]
dfsprnt_U=dfsprnt_U[dfsprnt_U.locked==False]

dfunlocked_U=samsungDF[samsungDF.Carrier=='Unbranded']
dfunlocked_U=dfunlocked_U[dfunlocked_U.locked==False]


######################################################
######## Without time restriction ########
######################################################
tmobile=[[] for _ in range(2)]
spt=[[] for _ in range(2)]

tmobile_U=[[] for _ in range(2)]
spt_U=[[] for _ in range(2)]
unbrand_U=[[] for _ in range(2)]

for model in goodModels:
    print(model)
    # check tmobile
    newtmb=dftmob[dftmob.Model==model]
    tmobile[0].extend(newtmb.difference.values)
    tmobile[1].extend(newtmb.Model)
    
    newtmb_U=dftmob_U[dftmob_U.Model==model]
    tmobile_U[0].extend(newtmb_U.difference.values)
    tmobile_U[1].extend(newtmb_U.Model)
    
    
    # check sprint
    newspt=dfsprnt[dfsprnt.Model==model]
    spt[0].extend(newspt.difference.values)
    spt[1].extend(newspt.Model)
    
    newspt_U=dfsprnt_U[dfsprnt_U.Model==model]
    spt_U[0].extend(newspt_U.difference.values)
    spt_U[1].extend(newspt_U.Model)
    
    # check unbranded
    newunlock_U=dfunlocked_U[dfunlocked_U.Model==model]
    unbrand_U[0].extend(newunlock_U.difference.values)
    unbrand_U[1].extend(newunlock_U.Model)



# Print counts
print("----------------------")
print("Total Amount of Values")
print("T-Mobile: locked(" + str(len(tmobile[0])) + "), unlocked(" + str(len(tmobile_U[0]))+")")
print("Sprint: locked(" + str(len(spt[0])) + "), unlocked(" + str(len(spt_U[0]))+")")
print("Unbranded: " + str(len(unbrand_U[0])))
print("Unique Phones: " + str(len(goodModels)))
print("----------------------")
        
##### Plotting #####
data=[]

data.append(go.Box(
    #x=tmobile[1],
    y=tmobile[0],
    name="T-Mobile",
    boxpoints = 'all',
    marker_color='rgb(204, 61, 202)',
    showlegend=False
))
data.append(go.Box(
    #x=tmobile_U[1],
    y=tmobile_U[0],
    name="T-Mobile Unlocked",
    boxpoints = 'all',
    marker_color='rgb(235, 122, 233)',
    showlegend=False
))

data.append(go.Box(
    #x=spt[1],
    y=spt[0],
    name="Sprint",
    boxpoints = 'all',
    marker_color='rgb(212, 164, 21)',
    showlegend=False
))
data.append(go.Box(
    #x=spt_U[1],
    y=spt_U[0],
    name="Sprint Unlocked",
    boxpoints = 'all',
    marker_color='rgb(252, 211, 86)',
    showlegend=False
))

data.append(go.Box(
    #x=unbrand_U[1],
    y=unbrand_U[0],
    name="Unbranded",
    boxpoints = 'all',
    marker_color='rgb(30, 179, 70)',
    showlegend=False
))


# Edit the layout
layout = dict(#title = "Average Days from Bulletins Samsung Release Dates",
              yaxis = dict(title = 'Days',showgrid=True, gridcolor='rgb(219, 219, 219)'),
              #xaxis=dict(title="Carrier Locked or Unlocked"),
              plot_bgcolor='rgba(0,0,0,0)',
              legend_orientation="h",
              legend=dict(x=0, y=-.25),
              font=dict(size=20)
              
              )

fig = dict(data=data, layout=layout)
py.offline.iplot(fig)
# 
# pio.write_image(fig, 'samsung-locked-vs-unlocked-allSamsung.pdf', width=1100, height=600)


import statistics

locked=statistics.mean(tmobile[0])
locked+=statistics.mean(spt[0])
locked=locked/2

unlocked=statistics.mean(tmobile_U[0])
unlocked+=statistics.mean(spt_U[0])
# unlocked+=statistics.mean(unbrand_U[0])
unlocked=unlocked/2

print(statistics.mean(tmobile[0]))
print(statistics.mean(tmobile_U[0]))
print(statistics.mean(spt[0]))
print(statistics.mean(spt_U[0]))
# print(statistics.mean(unbrand_U[0]))

print()     
print("Average locked")
print(locked)

print("Average unlocked")
print(unlocked)

similarBulsDF=normalizedDF.copy()


['TMobile' 'Sprint' 'Sprint_U' 'Unbranded' 'Tmobile_U']
Galaxy S7 edge
Galaxy S9
Galaxy Note 8
Galaxy S9+
Galaxy S10+
Galaxy S8
Galaxy Note9
Galaxy S10
Galaxy S8+
Galaxy S7
Galaxy Note 10
Galaxy Note 10+
Galaxy A20
----------------------
Total Amount of Values
T-Mobile: locked(127), unlocked(120)
Sprint: locked(136), unlocked(150)
Unbranded: 151
Unique Phones: 13
----------------------


25.236220472440944
29.441666666666666
23.022058823529413
26.35333333333333

Average locked
24.129139647985177
Average unlocked
27.8975


In [8]:
definiteIndex=[]
for model in goodModels:
    definiteIndex.extend(samsungDF[samsungDF.Model==model].index)
    
final=samsungDF.iloc[definiteIndex,:]
print(len(final))



684


In [9]:
############### Stats for Samsung Locked vs Unlocked ###############
final.locked=final.locked.astype('bool')
allRows=[]
length=0
for carrier in allCarriers:
    for lock in [True,False]:
        tmpdf=final[final.locked==lock]
        tmpdf=tmpdf[tmpdf.Carrier==carrier]
        if len(tmpdf)>0:
            length=len(tmpdf)
            allRows.append([str(carrier),lock,length])
    
statsDF_Sam_filtered=pd.DataFrame(allRows,columns=['Carrier','locked','Num_Updates'])

uniqModels=[]
uniqBuls=[]
earlyRels=[]
lateRels=[]
earlyBuls=[]
lateBuls=[]
length=0
for carrier in allCarriers:
    for lock in [True,False]:
        dfTemp=final[final.Carrier==carrier]
        dfTemp=dfTemp[dfTemp.locked==lock]
        if len(dfTemp)>0:
            length=len(dfTemp.Model.unique())
            uniqModels.append(length)
            length=len(dfTemp.Bulletin_Level.unique())
            uniqBuls.append(length)
            minDifIndex=np.argmin(dfTemp.Release_Date.values)
            earlyRels.append(dfTemp.iloc[minDifIndex].Release_Date)
            minDifIndex=np.argmax(dfTemp.Release_Date.values)
            lateRels.append(dfTemp.iloc[minDifIndex].Release_Date)
            minDifIndex=np.argmin(dfTemp.Bulletin_Level.values)
            earlyBuls.append(dfTemp.iloc[minDifIndex].Bulletin_Level)
            minDifIndex=np.argmax(dfTemp.Bulletin_Level.values)
            lateBuls.append(dfTemp.iloc[minDifIndex].Bulletin_Level)
            
statsDF_Sam_filtered['Unique_Models']=uniqModels
statsDF_Sam_filtered['Unique_Bulletins']=uniqBuls
statsDF_Sam_filtered['Earliest_Release']=earlyRels
statsDF_Sam_filtered['Latest_Release']=lateRels
statsDF_Sam_filtered['Earliest_Bulletin']=earlyBuls
statsDF_Sam_filtered['Latest_Bulletin']=lateBuls

    
# Average bulletins per model
newCol=[]
length=0
for carrier in allCarriers:
    for lock in [True,False]:
        tempDF=statsDF_Sam_filtered[statsDF_Sam_filtered.Carrier==carrier]
        tempDF=tempDF[tempDF.locked==lock]
        if len(tempDF)>0: 
            calc=tempDF.Unique_Bulletins/tempDF.Unique_Models
            newCol.append(calc.iloc[0])

statsDF_Sam_filtered['Bulletins_per_Model']=newCol.copy()


print(statsDF_Sam_filtered.loc[:,["Carrier","locked","Latest_Release","Latest_Bulletin"]].head())

     Carrier  locked Latest_Release Latest_Bulletin
0    TMobile    True     2019-12-30      2019-12-01
1    TMobile   False     2020-01-31      2019-12-01
2     Sprint    True     2020-01-02      2019-12-01
3     Sprint   False     2020-01-14      2019-12-01
4  Unbranded   False     2020-01-14      2019-12-01




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [10]:
# No calculate frequency for the first four years on the market meaning introduce the month and year released
normalizedDF2=samsungDF.copy()

# Split out carriers for better plotting and logic
indices=normalizedDF2[(normalizedDF2.Carrier=="Sprint") & (normalizedDF2.locked==False)].index
normalizedDF2.at[indices,'Carrier']="Sprint_U"
indices=normalizedDF2[(normalizedDF2.Carrier=="TMobile") & (normalizedDF2.locked==False)].index
normalizedDF2.at[indices,'Carrier']="TMobile_U"

normalizedDF2=normalizedDF2.sort_values(by=['Carrier','Model','Bulletin_Level','Release_Date'])# Sort to properly drop duplicates if bulletins were normalized to the same (e.g., level 1 and 5 were both patched thus keep the first date)
normalizedDF2.drop_duplicates(subset=['Carrier','Model','Bulletin_Level'],keep='first', inplace=True ) # drop the duplicates and keep the lowest difference


# Grab similar across all five carrier devices only 
normalizedDF2.reset_index(drop=True,inplace=True)
goodindices=[]
for model in normalizedDF2.Model.unique():
    tmp2=normalizedDF2[normalizedDF2.Model==model]
    if len(tmp2.Carrier.unique())==5: # if all the phones were updated
        goodindices.extend(tmp2.index)
normalizedDF2=normalizedDF2.iloc[goodindices]

# Normalize as needed for merge
normalizedDF2.Model=normalizedDF2.Model.str.replace(r'Galaxy Note ([0-9]{1,2})',r'Galaxy Note\1')
normalizedDF2.Model=normalizedDF2.Model.str.replace(r'Galaxy S7 edge',r'Galaxy S7 Edge')
print(normalizedDF2.Model.unique())

# Get dataframe with release years
carrierDF=pd.read_csv('../data/allCarrierData_final.csv')
carrierDFSam=carrierDF[carrierDF.Manufacture=="Samsung"]

# Set release years
allReleaseYears={}
allReleaseMonth={}
for model in normalizedDF2.Model.unique(): # get the release years for each model
    releaseYr=carrierDFSam[carrierDFSam.Model==model] 
    if len(releaseYr)==0:
        print(model)
    else:
        allReleaseYears[model]=releaseYr.iloc[0].Year_Phone_Release
        allReleaseMonth[model]=releaseYr.iloc[0].Month_Phone_Release

normalizedDF2['Year_Phone_Release']=normalizedDF2.Model.apply(lambda x: allReleaseYears[x])
normalizedDF2['Month_Phone_Release']=normalizedDF2.Model.apply(lambda x: allReleaseMonth[x])


['Galaxy A20' 'Galaxy Note10' 'Galaxy Note10+' 'Galaxy Note8'
 'Galaxy Note9' 'Galaxy S10' 'Galaxy S10+' 'Galaxy S7' 'Galaxy S7 Edge'
 'Galaxy S8' 'Galaxy S8+' 'Galaxy S9' 'Galaxy S9+']


In [11]:
print(normalizedDF2.head())

          Model   Carrier android_os Release_Date Bulletin_Level  difference  \
3    Galaxy A20    Sprint          9   2019-07-25     2019-06-01        52.0   
4    Galaxy A20    Sprint          9   2019-09-23     2019-09-01        20.0   
5    Galaxy A20    Sprint          9   2019-12-11     2019-11-01        37.0   
138  Galaxy A20  Sprint_U          9   2019-10-29     2019-08-01        85.0   
139  Galaxy A20  Sprint_U          9   2019-12-17     2019-11-01        43.0   

     locked Year_Patch_Release Bulletin_Publish_Date  Year_Phone_Release  \
3      True               2019            2019-06-03                2019   
4      True               2019            2019-09-03                2019   
5      True               2019            2019-11-04                2019   
138   False               2019            2019-08-05                2019   
139   False               2019            2019-11-04                2019   

     Month_Phone_Release  
3                      4  
4       

In [12]:
# NEW METHODOLOGY FOR FREQUENCY
freqDF=normalizedDF2.copy()


print(len(normalizedDF2[normalizedDF2.Carrier=="Sprint"]))
print(len(normalizedDF2[normalizedDF2.Carrier=="Sprint_U"]))
print(len(normalizedDF2[normalizedDF2.Carrier=="TMobile"]))
print(len(normalizedDF2[normalizedDF2.Carrier=="TMobile_U"]))


# Map released times to devices
def mapPublish(row):
    if row.Model=='Galaxy A20': return "2019/04/01"
    elif row.Model=='Galaxy Note10': return "2019/08/01"
    elif row.Model=='Galaxy Note10+': return "2019/08/01"
    elif row.Model== 'Galaxy Note8': return "2017/08/01"
    elif row.Model=='Galaxy Note9': return "2018/08/01"
    elif row.Model=='Galaxy S10' : return "2019/03/01"
    elif row.Model=='Galaxy S10+': return "2019/03/01"
    elif row.Model=='Galaxy S7' : return "2016/03/01"
    elif row.Model=='Galaxy S7 Edge': return "2016/03/01"
    elif row.Model=='Galaxy S8': return "2017/04/01"
    elif row.Model=='Galaxy S8+': return "2017/04/01"
    elif row.Model=='Galaxy S9': return "2018/03/01"
    elif row.Model=='Galaxy S9+': return "2018/03/01"


# Append the new stuff
appendMe=[]
for index,row in freqDF.iterrows():
    appendMe.append(mapPublish(row))


freqDF["Phone_Published"]=appendMe.copy()
freqDF["Phone_Published"]=pd.to_datetime(freqDF["Phone_Published"])
freqDF["Release_Date"]=pd.to_datetime(freqDF["Release_Date"])

# freqDF=freqDF[freqDF.Phone_Published>=pd.to_datetime("2015/08/01")] # Remove models released before the first bulletin
freqDF=freqDF[freqDF.Phone_Published<=freqDF.Release_Date] # Remove updates happening prior to phone release

counts=[]
for car in freqDF.Carrier.unique():
    tmp=freqDF[freqDF.Carrier==car]
    for mod in tmp.Model.unique():
        tmp3=tmp[tmp.Model==mod]
        tmp3.sort_values(by=['Release_Date'],inplace=True,ascending=False) 
        totalReceived=len(tmp3) # total number of updates received for the model
        shouldReceive=tmp3.iloc[0,:] # grab the first row
        shouldReceive=(shouldReceive.Release_Date.year-shouldReceive.Phone_Published.year)*12.0+(shouldReceive.Release_Date.month+1)-shouldReceive.Phone_Published.month # account for the WHOLE last month
        if shouldReceive>0: # we skip any updates released prior to phone's release
            counts.append([car,mod,totalReceived,shouldReceive])
            if totalReceived>shouldReceive:
                print(shouldReceive)
                print(tmp3)
                print()
        elif shouldReceive==0: 
            counts.append([car,mod,totalReceived,1]) # account for less than a month...
#         else:
#             print(shouldReceive)
#             print(tmp3)
#             print()

                
counts=pd.DataFrame(counts,columns=['Carrier','Model','Actual','Potential'])
counts['Actual']=counts['Actual'].astype(int)
counts['Potential']=counts['Potential'].astype(int)
counts['Received_Rate']=counts['Actual']/counts['Potential']


# For each manu
tempoMan=counts.copy()
# tempoMan.sort_values(by=['Carrier','Potential'],inplace=True,ascending=True)
tempoMan.sort_values(by=['Carrier','Received_Rate'],inplace=True,ascending=True)
# tempoMan.sort_values(by=['Carrier','Actual'],inplace=True,ascending=True)
data=[]
for car in tempoMan.Carrier.unique():
    tmp=tempoMan[tempoMan.Carrier==car]
    y_value=[(i+1)/len(tmp) for i in range(len(tmp))]
    
    if car=="Sprint": coler= 'rgb(212, 164, 21)'
    elif car=="Sprint_U": coler='rgb(252, 211, 86)'
    elif car=="TMobile": coler='rgb(204, 61, 202)'
    elif car=="TMobile_U": coler='rgb(235, 122, 233)'
    elif car=="Unbranded": coler='rgb(30, 179, 70)'

    data.append(go.Scatter(
#             x=tmp.Potential,
            x=tmp.Received_Rate,
#             x = tmp.Actual,
            y=y_value.copy(),
            name=str(car),
            marker=dict(color=coler)
        ))

# Edit the layout
layout = dict(#title = "Number of Updates per 6 Month Period after Release per Carrier",
#               xaxis= dict(range=[0,1.0],dtick=0.1),
              yaxis = dict(title = 'Cumulative Fraction of Models',showgrid=True, gridcolor='rgb(219, 219, 219)'),
              #xaxis = dict(title = '6 Month Period'),
              plot_bgcolor='rgba(0,0,0,0)',
              boxmode='group',
              violinmode='group',
              legend_orientation="h",
              legend=dict(x=0.25, y=1.1),
              font=dict(size=20)
              )

fig = dict(data=data, layout=layout)
py.offline.iplot(fig, filename='release-years-frequency-per-carrier-median.png')
pio.write_image(fig, 'samsung-data-cumulative-frequency.pdf', width=1000, height=600)
        
        

131
127
116
104
10.0
          Model  Carrier android_os Release_Date Bulletin_Level  difference  \
324  Galaxy S10  TMobile         10   2019-12-16     2019-12-01        14.0   
323  Galaxy S10  TMobile          9   2019-11-02     2019-11-01        -2.0   
322  Galaxy S10  TMobile          9   2019-10-28     2019-10-01        21.0   
321  Galaxy S10  TMobile          9   2019-09-17     2019-09-01        14.0   
320  Galaxy S10  TMobile          9   2019-08-30     2019-08-01        25.0   
319  Galaxy S10  TMobile          9   2019-08-10     2019-07-01        40.0   
318  Galaxy S10  TMobile          9   2019-06-28     2019-06-01        25.0   
317  Galaxy S10  TMobile          9   2019-05-17     2019-05-01        11.0   
316  Galaxy S10  TMobile          9   2019-05-10     2019-04-01        39.0   
315  Galaxy S10  TMobile          9   2019-04-06     2019-03-01        33.0   
314  Galaxy S10  TMobile          9   2019-03-06     2019-02-01        30.0   

     locked Year_Patch_Release



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [13]:
# For each manu
tempoMan=counts.copy()
# tempoMan.sort_values(by=['Carrier','Potential'],inplace=True,ascending=True)
tempoMan.sort_values(by=['Carrier','Received_Rate'],inplace=True,ascending=True)
# tempoMan.sort_values(by=['Carrier','Actual'],inplace=True,ascending=True)



data=[]
for car in tempoMan.Carrier.unique():
    tmp=tempoMan[tempoMan.Carrier==car]

    data.append(go.Box(
#             x=tmp.Potential,
#             x=tmp.Received_Rate,
#             x = tmp.Actual,
            y=tmp.Received_Rate,
            name=str(car),
#             marker=dict(color='rgb'+str(lvlColor[car]))
        ))

# Edit the layout
layout = dict(#title = "Number of Updates per 6 Month Period after Release per Carrier",
#               xaxis= dict(range=[0,1.0],dtick=0.1),
              yaxis = dict(title = 'Received Rate',showgrid=True, gridcolor='rgb(219, 219, 219)'),
              #xaxis = dict(title = '6 Month Period'),
              plot_bgcolor='rgba(0,0,0,0)',
              boxmode='group',
              violinmode='group',
              legend_orientation="h",
              legend=dict(x=0.25, y=1.1),
              font=dict(size=20)
              )

fig = dict(data=data, layout=layout)
py.offline.iplot(fig, filename='release-years-frequency-per-carrier-median.png')
# pio.write_image(fig, 'samsung-data-cumulative-frequency.pdf', width=1000, height=600)
        