# Obsolescence des smartphones

---

Jérémie GANTET INSA Informatique & Réseaux 😏 <br>
21 juillet 2020

---

### Librairies

In [154]:
# dates/utils
import datetime
from datetime import date
import pandas as pd
import math
import copy
# scipy
import scipy.stats as stat
from scipy.stats import norm
from scipy.stats import alpha
from scipy.stats import lognorm
from scipy.stats import exponweib
from scipy.optimize import minimize
from scipy.stats import weibull_min
from scipy.stats import triang
from scipy.stats import tvar
from scipy.stats import tstd
# sklearn
from sklearn import dummy
from sklearn import neighbors
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
# beautiful print/plot
from sty import fg, bg, ef, rs
import seaborn as sns
%pylab inline
sns.set()
pd.options.mode.chained_assignment = None 
# jupyter GUI
import ipywidgets as widgets
from IPython.display import clear_output
from IPython.display import display
%config InlineBackend.close_figures=False 

Populating the interactive namespace from numpy and matplotlib


`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"


### Importation des fichiers par marque de smartphone
- dates de début commercialisation 
- nombres de ventes par trimestre

In [78]:
dates = pd.read_csv('E:/eDocuments/obso/dates.csv', sep=';')
ventes = pd.read_csv('E:/eDocuments/obso/vente_tout_constructeurs.csv', sep=';')
medianes = dates.groupby(["marque"]).median()
diff = dates["différence"]

# remplissage median des différences allons-y franchement quoi
for i,diff in enumerate(dates["différence"]):
    if diff != diff: # NaN test
        dates["différence"][i] = medianes["différence"][dates["marque"][i]]

# formatage des données    
for i,debut in enumerate(dates["date"]):
    if not(isinstance(debut, datetime.date)): 
        if debut != debut:
            continue
        if debut.find("/")<0:
            continue
        dates["date"][i] = datetime.date(int(debut.split("/")[2]),int(debut.split("/")[1]),int(debut.split("/")[0]))
    if dates["date_fin"][i] != dates["date_fin"][i]: # NaN test
        #dates["date_fin"][i] = dates["date"][i] + datetime.date(int(medianes["différence"][dates["marque"][i]]//12),int(medianes["différence"][dates["marque"][i]]%12+1),int((medianes["différence"][dates["marque"][i]]%int(medianes["différence"][dates["marque"][i]]))*60+1))
        dates["date_fin"][i] = dates["date"][i] + datetime.timedelta(days=medianes["différence"][dates["marque"][i]]*30)
    if not(isinstance(dates["date_fin"][i], datetime.date)):
        if dates["date_fin"][i].find("/")<0:
            continue
        dates["date_fin"][i] = datetime.date(int(dates["date_fin"][i].split("/")[2]),int(dates["date_fin"][i].split("/")[1]),int(dates["date_fin"][i].split("/")[0]))

# catégorisation par marque des dates de début, de fin et des noms des modèles
touteslesmarques = ["Apple", "Samsung", "OnePlus", "Xiaomi", "Oppo", "Vivo", "Sony", "Huawei", "Google", "Motorola"]
for marque in touteslesmarques:
    globals()["debut_"+marque] = []
    globals()["fin_"+marque] = []
    globals()["modele_"+marque] = []
    for i,d in enumerate(dates["date"]):
        if dates["marque"][i] == marque:
            globals()["debut_"+marque].append(d)
            globals()["fin_"+marque].append(dates["date_fin"][i])
            globals()["modele_"+marque].append(dates["modèle"][i])
            
# axe des x : temps discrétisé en trimestre 
trimestre = [datetime.date(int(d.split("/")[2]),int(d.split("/")[1]),int(d.split("/")[0])).toordinal() for d in ventes["Trimestre"]]
limites = [date.fromordinal(trimestre[0]),date.fromordinal(trimestre[-1])]

data = pd.read_csv('E:/eDocuments/obsolescence_prediction/data.csv', sep=';')

### Code modèles de distribution

In [430]:
ladistribution = 'alpha'
nb_sample = 1
leseuil = 80 #%
# fonction de distribution
def distrib(x,debut,scale,var,offset):
    """modèles de distribution"""
    global ladistribution
    if ladistribution == "alpha":
        return alpha.pdf(x, 0.8, debut-scale/10, scale)*var*scale  
    elif ladistribution == "gauss":
        return stat.norm.pdf(x,debut,scale)*var*scale
    elif ladistribution == "triangle":
        return triang.pdf(x,0,debut,scale)*var*scale 
    elif ladistribution == "weibull":
        return weibull_min.pdf(x, 1.1, debut+offset, scale)*var*scale
    
def distrib_cumulated(x,scale,var):
    """modèles de distribution cumulées"""
    global ladistribution
    if ladistribution == "alpha":
        return alpha.cdf(x, 0.8, 0, scale) 
    elif ladistribution == "gauss":
        return stat.norm.cdf(x,0,scale)
    elif ladistribution == "triangle":
        return triang.cdf(x,0,0,scale)
    elif ladistribution == "weibull":
        return weibull_min.cdf(x, 1.1, 0, scale) 
    
def distrib_quantile(q,scale):
    """modèles de distribution pour partager en quantile"""
    global ladistribution
    if ladistribution == "alpha":
        return alpha.ppf(q, 0.8, 0, scale)  
    elif ladistribution == "gauss":
        return stat.norm.ppf(q,0,scale)
    elif ladistribution == "triangle":
        return triang.ppf(q,0,0,scale) 
    elif ladistribution == "weibull":
        return weibull_min.ppf(q, 1.1, 0, scale)
    
# fonctions de mises à jour    
def update_samples(b):
    flatui = ["#3498db", "#9b59b6", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"]
    global nb_sample, leseuil
    nb_sample = b['new']
    ax.clear()
    x = np.arange(-3, 30, 0.05)
    for i in range(b['new']):
        scale = i+0.2*10
        y = distrib(x,0,scale,1,0)
        ax.plot(x,y, color=flatui[i%len(flatui)])    
        ax.axvline(x=distrib_quantile(leseuil/100,scale),color=flatui[i%len(flatui)])        
    ax.set_xlim([-3,30])
    ax.set_title("allure de la courbe de distribution de ventes d'un modèle de smartphone pour différentes largeurs")
    with out_samples:
        clear_output(wait=True)
        display(ax.figure)

def update_seuil(b):
    global leseuil,nb_sample
    leseuil = b['new']
    update_samples({'new':nb_sample})
    
def change_distrib(b):
    global ladistribution,nb_sample
    ladistribution = b['new']
    update_samples({'new':nb_sample})
    
# fonction principale d'affichage 
def show_samples():
    globals()["out_samples"] = widgets.Output(layout=Layout(height='400px', width = '1000px'))
    globals()["int_range"] = widgets.IntSlider(value=1, description="Aperçus", max=25)
    globals()["seuil_range"] = widgets.IntSlider(value=80, description="seuil (%)", max=99)
    globals()["d"] = widgets.Dropdown(options=['alpha', 'weibull', 'triangle', 'gauss']) 
    plt.ioff()
    ax=plt.gca()
    display(d)
    display(seuil_range)
    display(int_range, out_samples)
    seuil_range.observe(update_seuil, names='value')
    int_range.observe(update_samples, names='value')
    d.observe(change_distrib, names='value')
    change_distrib({'new':'alpha'})
    update_seuil({'new':80})   
    update_samples({'new':1})   


### Code optimisation

In [461]:
lamarque = ['Apple']
methode = 'Powell'
renverse = False
lanorme = "norme 1"
def mini_obj(X, args, ventes_totales, abscisses):
    #print(X)
    cumul = [0 for i in range(len(abscisses))]
    for i in range(int(len(X)/3)):
        largeur, hauteur, offset = X[i*3], X[i*3+1], X[i*3+2]
        date, ponderation = args[i*2], args[i*2+1]
        y=distrib(abscisses, date, largeur, hauteur, offset)
        cumul = [cumul[i]+x for i,x in enumerate(y)]    #*ponderation for i,x in enumerate(y)]             
    return cumul

def montre_solution(X, args, ventes_totales, abscisses, agregat_modeles):
    global limites, leseuil
    cumul = [0 for i in range(len(abscisses))]
    fig, ax = plt.subplots(figsize=(12,6))
    for i in range(int(len(X)/3)):
        largeur, hauteur, offset = X[i*3], X[i*3+1], X[i*3+2]
        date, ponderation = args[i*2], args[i*2+1]
        y=distrib(abscisses, date, largeur, hauteur, offset)
        cumul = [cumul[i]+x for i,x in enumerate(y)]    #*ponderation for i,x in enumerate(y)]           
        ax.plot(abscisses, y, label=agregat_modeles[i]+"\nlargeur : "+str(round(largeur,1))+" hauteur : "+str(round(hauteur,1)))    #distrib_quantile(leseuil/100,largeur),1))+" hauteur : "+str(round(hauteur,1)))    
    ax.plot(abscisses, cumul, 'k', label="ventes cumulees")
    ax.plot(abscisses, ventes_totales, 'b', label="ventes totales")
    ax.legend(bbox_to_anchor=(1.5, 1))
    ax.set_xlim(limites)
    plt.show()

def objectif(X, args, ventes_totales, abscisses):
    global lanorme
    ventes_cumulees = mini_obj(X, args, ventes_totales, abscisses)
    if lanorme == "norme 1":
        result = float(abs(sum([abs(x-y) for x,y in zip(ventes_cumulees, ventes_totales)])))
    elif lanorme == "norme 2":
        result = float(abs(sum([abs(x-y) for x,y in zip(ventes_cumulees, ventes_totales)])))
    #print(result)
    return result

def obj(X):
    global dates, args, trimestre, ventes_totales 
    return objectif(X, args, [v for v in ventes_totales], [t for t in trimestre])

def choose_brand(marque):
    global lamarque
    if marque == "Apple":
        lamarque = ['Apple']
    elif marque == "Samsung":
        lamarque = ['Samsung']  
    elif marque == "Xiaomi":
        lamarque = ['Xiaomi'] 
    elif marque == "Oppo":
        lamarque = ['Oppo'] 
    elif marque == "Vivo":
        lamarque = ['Vivo']
    elif marque == "Sony":
        lamarque = ['Sony']
    elif marque == "Huawei":
        lamarque = ['Huawei']
    elif marque == "Toutes":
        lamarque = ["Apple", "Samsung", "Xiaomi", "Oppo", "Vivo", "Sony", "Huawei"]

def optimiseur():
    globals()['args'] = []
    global renverse, methode, lamarque, args, ladistribution
    plt.close('all')
    for marque in lamarque:
        
        agregat_modeles = dates.loc[lambda df: dates['marque'] == marque, :].groupby('date').apply(lambda x: ', '.join(x.modèle))
        ventes_totales = [0 if v != v else float(v.replace(",","."))*1000000  for v in ventes[marque]]
        df_args =  dates.loc[lambda df: dates['marque'] == marque, :].groupby('date').count()["marque"]
        x1 = []
        bnds = ()
        for date in df_args.index:        
            if isinstance(date, datetime.date):
                if not renverse:                    
                    args.append(date.toordinal())
                    args.append(df_args[date])
                    x1 = x1+[60,1000000,0]
                else:
                    args.append(df_args[date])
                    args.append(date.toordinal())
                    x1 = x1+[0,1000000,60]                                        
                bnds = bnds + ((60, 9*30*1.138591143459572),(500000, 60000000),(-60,180),)#(500000/df_args[date], 60000000/df_args[date]),)
        if renverse:
            args = args[::-1]
            x1 = x1[::-1]

        result = minimize(obj, x1, method = methode, bounds=bnds)
        montre_solution(result.x, args, ventes_totales, trimestre, agregat_modeles)
        chronooupaschrono = "chronologique" if not renverse else "anti chronologique"
        params = "["+ladistribution+", "+methode+", "+marque+", "+chronooupaschrono+"]"
        ligne_resultat = params+" "*(50-len(params))+" écart : "+bg.red+fg.white+" "+str(int(obj(result.x)))+" "+fg.rs+bg.rs
        return ligne_resultat
        #for i in range(int(len(result.x)/3)):
        #    for modele in agregat_modeles[i].split(", "):
        #        print(modele+";"+str(result.x[i*3]).replace(".",","))
        
# fonction principale d'affichage 
def optimisation():
    globals()["button1"] = widgets.Button(description="Lancer optimisation")
    globals()["out_optimizer"] = widgets.Output()
    globals()["out_historique"] = widgets.Output()    
    globals()["button2"] = widgets.Button(description="Renverser chrono")
    globals()["out_renverse"] = widgets.Output()
    globals()["d_methode"] = widgets.Dropdown(options=['Powell', 'L-BFGS-B', 'TNC', 'SLSQP']) 
    globals()["d_norme"] = widgets.Dropdown(options=['norme 1', 'norme 2']) 
    globals()["d_brand"] = widgets.Dropdown(options=["Apple", "Samsung", "Xiaomi", "Oppo", "Vivo", "Sony", "Huawei",'Toutes']) 
    
    plt.ioff()
    ax=plt.gca()
    
    display(d_methode)
    display(d_brand)
    display(d_norme)
    display(button2, out_renverse) 
    display(button1, out_optimizer,out_historique) 
    
    button1.on_click(on_button1_clicked)
    button2.on_click(on_button2_clicked)
    d_methode.observe(change_method, names='value')
    d_brand.observe(change_brand, names='value')
    d_norme.observe(change_norme, names='value')
 
    
def on_button1_clicked(b):
    with out_optimizer:
        clear_output(wait=True)
        string = optimiseur()
    with out_historique:
        print(string)
        
def on_button2_clicked(b):
    global renverse
    renverse = not renverse    
    with out_renverse:
        clear_output(wait=True)
        print("les modèles sont dans l'ordre",("ANTI-" if renverse else "")+"chronologique")

def change_norme(b):
    global lanorme 
    lanorme = b['new']
        
def change_method(b):
    global methode 
    methode = b['new']
    
def change_brand(b):
    global brand 
    choose_brand(b['new'])

# Cliquodrome 

## Modélisation
de la courbe des ventes d'un modèle de smartphone

In [431]:
show_samples()

## Optimisation 
des paramètres de largeur et de hauteur des modèles de distributions d'un modèle de smartphone <br>
Objectif : la somme des courbes doit correspondre trimestre par trimestre au nombre de ventes totales de smartphones vendus par la marque


In [462]:
optimisation()