In [1]:
import pandas as pd
hours_df = pd.read_csv("hours_may_sep.csv")
hours_df['/1000_normalised_interpolated_facebook_interactions'] = hours_df['interpolated_facebook_interactions']/1000
hours_df['/100_normalised_interpolated_facebook_interactions'] = hours_df['interpolated_facebook_interactions']/100
hours_df['/10_normalised_interpolated_facebook_interactions'] = hours_df['interpolated_facebook_interactions']/10

In [2]:
articles_6_days = hours_df.loc[hours_df['day'] >= 6]['link'].unique()
hours_df = hours_df.loc[hours_df['link'].isin(articles_6_days)]

In [3]:
hours_df.columns

Index(['Unnamed: 0.1', 'Unnamed: 0', 'day', 'published', 'headline', 'summary',
       'link', 'domain', 'facebook_interactions', 'date_extracted',
       'article_theme', 'article_index', 'label', 'hours_active',
       'delta_facebook_interactions', 'total_day_hours', 'missed_hours',
       'interpolated_facebook_interactions',
       'normalised_interpolated_facebook_interactions'],
      dtype='object')

In [5]:
len(hours_df.loc[hours_df['day'] == 5]['link'].unique())

7291

In [6]:
len(hours_df.loc[hours_df['day'] == 6]['link'].unique())

7291

In [7]:
hours_df.shape

(48547, 19)

In [14]:
# Import curve fitting package from scipy
from scipy.optimize import curve_fit
import plotly.graph_objects as go
import math
import numpy as np

def exponential(x, a, b):
    return (-a * (np.exp(-b * x)) + a)

def get_exp_coeff_df(hours_df):

    unique_articles = hours_df['link'].unique()
    exp_coeff_df = pd.DataFrame()

    for article in unique_articles:

        temp = hours_df.loc[hours_df['link'] == article]
        x = temp['day']
        y1 = temp['interpolated_facebook_interactions']
        y2 = temp['/1000_normalised_interpolated_facebook_interactions']
        y3 = temp['/100_normalised_interpolated_facebook_interactions']
        y4 = temp['/10_normalised_interpolated_facebook_interactions']
        
        popt1, pcov1 = curve_fit(exponential, x, y1, maxfev=10000)
        popt2, pcov2 = curve_fit(exponential, x, y2, maxfev=10000)
        popt3, pcov3 = curve_fit(exponential, x, y3, maxfev=10000)
        popt4, pcov4 = curve_fit(exponential, x, y4, maxfev=10000)
        
        temp['interpolated_exp_coeff_a'] = [popt1[0]]*temp.shape[0]
        temp['interpolated_exp_coeff_b'] = [popt1[1]]*temp.shape[0]
        temp['fitted_interpolated_facebook_interactions'] = -temp['interpolated_exp_coeff_a'] * (np.exp(-temp['interpolated_exp_coeff_b'] * temp['day'])) + temp['interpolated_exp_coeff_a']

        temp['/1000_interpolated_exp_coeff_a'] = [popt2[0]]*temp.shape[0]
        temp['/1000_interpolated_exp_coeff_b'] = [popt2[1]]*temp.shape[0]
        temp['fitted_interpolated_facebook_interactions'] = -temp['/1000_interpolated_exp_coeff_a'] * (np.exp(-temp['/1000_interpolated_exp_coeff_b'] * temp['day'])) + temp['/1000_interpolated_exp_coeff_a']

        temp['/100_interpolated_exp_coeff_a'] = [popt3[0]]*temp.shape[0]
        temp['/100_interpolated_exp_coeff_b'] = [popt3[1]]*temp.shape[0]
        temp['fitted_interpolated_facebook_interactions'] = -temp['/100_interpolated_exp_coeff_a'] * (np.exp(-temp['/100_interpolated_exp_coeff_b'] * temp['day'])) + temp['/100_interpolated_exp_coeff_a']

        temp['/10_interpolated_exp_coeff_a'] = [popt4[0]]*temp.shape[0]
        temp['/10_interpolated_exp_coeff_b'] = [popt4[1]]*temp.shape[0]
        temp['fitted_interpolated_facebook_interactions'] = -temp['/10_interpolated_exp_coeff_a'] * (np.exp(-temp['/10_interpolated_exp_coeff_b'] * temp['day'])) + temp['/10_interpolated_exp_coeff_a']


        exp_coeff_df = exp_coeff_df.append(temp)

    return exp_coeff_df

In [15]:
exp_coeff_df = get_exp_coeff_df(hours_df)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['exp_coeff_a'] = [popt[0]]*temp.shape[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['exp_coeff_b'] = [popt[1]]*temp.shape[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['fitted_interpolated_facebook_interactions'

In [20]:
exp_coeff_df.to_csv("may_sep_exp_coeff.csv")

In [17]:
# Import curve fitting package from scipy
from scipy.optimize import curve_fit
import plotly.graph_objects as go

def logarithm(x, a, b, c):
    return a * np.log(b * x) + c

In [18]:
def get_log_coeff_df(hours_df):

    unique_articles = hours_df['link'].unique()
    log_coeff_df = pd.DataFrame()

    for article in unique_articles:

        temp = hours_df.loc[hours_df['link'] == article]
        x = temp['day']
        y = temp['normalised_interpolated_facebook_interactions']
        popt, pcov = curve_fit(logarithm, x, y)
        temp['log_coeff_a'] = [popt[0]]*temp.shape[0]
        temp['log_coeff_b'] = [popt[1]]*temp.shape[0]
        temp['log_coeff_c'] = [popt[2]]*temp.shape[0]
        temp['fitted_interpolated_facebook_interactions'] = temp['log_coeff_a'] * np.log(temp['log_coeff_b'] * temp['day']) + temp['log_coeff_c']

        log_coeff_df = log_coeff_df.append(temp)

    return log_coeff_df

In [19]:
log_coeff_df = get_log_coeff_df(hours_df)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['fitted_interpolated_facebook_interactions'] = temp['log_coeff_a'] * np.log(temp['log_coeff_b'] * temp['day']) + temp['log_coeff_c']
  log_coeff_df = log_coeff_df.append(temp)
  result = getattr(ufunc, method)(*inputs, **kwargs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['log_coeff_a'] = [popt[0]]*temp.shape[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pand

In [21]:
log_coeff_df.to_csv("may_sep_log_coeff.csv")