In [2]:
import scipy as sp

In [3]:
data = sp.genfromtxt("./ch01/data/web_traffic.tsv", delimiter="\t")

In [4]:
print(data)

[[  1.00000000e+00   2.27200000e+03]
 [  2.00000000e+00              nan]
 [  3.00000000e+00   1.38600000e+03]
 ..., 
 [  7.41000000e+02   5.39200000e+03]
 [  7.42000000e+02   5.90600000e+03]
 [  7.43000000e+02   4.88100000e+03]]


In [5]:
print(data.shape)

(743, 2)


In [6]:
x = data[:,0]
y = data[:,1]
print(x[0], y[0]) #Get first column
print(x[1], y[1]) #Get second column
print(data[0,:]) #Get first row

(1.0, 2272.0)
(2.0, nan)
[  1.00000000e+00   2.27200000e+03]


In [7]:
sp.sum(sp.isnan(y)) #Find invalid

8

In [8]:
#Clean vectors by constructing vector of which numbers are NaN and 
#using that to index the vector
x = x[~sp.isnan(y)]
y = y[~sp.isnan(y)]
sp.sum(sp.isnan(y)) #No more invalid!

0

In [37]:
import matplotlib.pyplot as plt #Time for some plotting
# plot the (x,y) points with dots of size 10
plt.scatter(x, y, s=10)
plt.title("Web traffic over the last month")
plt.xlabel("Time")
plt.ylabel("Hits/hour")
plt.xticks([w*7*24 for w in range(10)], ['week %i' % w for w in range(10)])
plt.autoscale(tight=True)

# draw a slightly opaque, dashed grid
plt.grid(True, linestyle='-', color='0.75')
plt.show()
print("Done")

Done


In [15]:
#Model Error: squared distance of model prediction to real data
def error(f, x, y):
    return sp.sum((f(x)-y)**2)

# Do the maths to create our model params
fp1, residuals, rank, sv, rcond = sp.polyfit(x, y, 1, full=True)

# Meaning our model is f(x) = 2.59619213 * x + 989.02487106
print("Model Parameters: %s" % fp1)
print(residuals)

#Create model from model params:
f1 = sp.poly1d(fp1)
print(error(f1, x, y))

Model Parameters: [   2.59619213  989.02487106]
[  3.17389767e+08]
317389767.34


In [31]:
# Plot the new model against the data
fx = sp.linspace(0,x[-1], 1000) # generate X-values for plotting
plt.plot(fx, f1(fx), linewidth=4)
plt.legend(["d=%i" % f1.order], loc="upper left")
# plt.show()

<matplotlib.legend.Legend at 0x10ce47d10>

In [33]:
# Time for some polynomial moddeling
f2p = sp.polyfit(x, y, 2)
print(f2p)

f2 = sp.poly1d(f2p)
print(error(f2, x, y))

[  1.05322215e-02  -5.26545650e+00   1.97476082e+03]
179983507.878


In [36]:
# Plot the poly model
fx = sp.linspace(0,x[-1], 1000) # generate X-values for plotting
plt.plot(fx, f2(fx), linewidth=4)
plt.legend(["d=%i" % f1.order], loc="upper left")

<matplotlib.legend.Legend at 0x10a9e55d0>

In [38]:
inflection = 3.5*7*24 # inflection point in hours
xa = x[:inflection]
ya = y[:inflection]
xb = x[inflection:]
yb = y[inflection:]

fa = sp.poly1d(sp.polyfit(xa, ya, 1))
fb = sp.poly1d(sp.polyfit(xb, yb, 1))

fa_error = error(fa, xa, ya)
fb_error = error(fb, xb, yb)

print("Error inflection=%f" % (fa_error + fb_error))


Error inflection=132950348.197616
