Skip to content

Commit b8b97e3

Browse files
committed
change as_matrix to values
1 parent 94c4328 commit b8b97e3

File tree

16 files changed

+34
-25
lines changed

16 files changed

+34
-25
lines changed

Diff for: ab_testing/client.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616
df = pd.read_csv('advertisement_clicks.csv')
1717
a = df[df['advertisement_id'] == 'A']
1818
b = df[df['advertisement_id'] == 'B']
19-
a = a['action'].as_matrix()
20-
b = b['action'].as_matrix()
19+
a = a['action'].values
20+
b = b['action'].values
2121

2222
print("a.mean:", a.mean())
2323
print("b.mean:", b.mean())

Diff for: ann_logistic_extra/process.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def get_data():
2121
# df.head()
2222

2323
# easier to work with numpy array
24-
data = df.as_matrix()
24+
data = df.values
2525

2626
# shuffle it
2727
np.random.shuffle(data)

Diff for: cnn_class2/class_activation_maps.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,10 @@
66
# Note: you may need to update your version of future
77
# sudo pip install -U future
88

9-
from keras.layers import Input, Lambda, Dense, Flatten
109
from keras.models import Model
1110
from keras.applications.resnet50 import ResNet50, preprocess_input, decode_predictions
12-
# from keras.applications.inception_v3 import InceptionV3, preprocess_input
1311
from keras.preprocessing import image
14-
from keras.preprocessing.image import ImageDataGenerator
1512

16-
from sklearn.metrics import confusion_matrix
1713
import numpy as np
1814
import scipy as sp
1915
import matplotlib.pyplot as plt
@@ -24,7 +20,7 @@
2420

2521

2622

27-
# useful for getting number of files
23+
# get the image files
2824
image_files = glob('../large_files/256_ObjectCategories/*/*.jp*g')
2925
image_files += glob('../large_files/101_ObjectCategories/*/*.jp*g')
3026

@@ -72,6 +68,7 @@
7268
cam = fmaps.dot(w)
7369

7470
# upsample to 224 x 224
71+
# 7 x 32 = 224
7572
cam = sp.ndimage.zoom(cam, (32, 32), order=1)
7673

7774
plt.subplot(1,2,1)

Diff for: cnn_class2/fashion.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def y2indicator(Y):
2626
# get the data
2727
# https://www.kaggle.com/zalando-research/fashionmnist
2828
data = pd.read_csv('../large_files/fashionmnist/fashion-mnist_train.csv')
29-
data = data.as_matrix()
29+
data = data.values
3030
np.random.shuffle(data)
3131

3232
X = data[:, 1:].reshape(-1, 28, 28, 1) / 255.0

Diff for: cnn_class2/fashion2.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def y2indicator(Y):
2626
# get the data
2727
# https://www.kaggle.com/zalando-research/fashionmnist
2828
data = pd.read_csv('../large_files/fashionmnist/fashion-mnist_train.csv')
29-
data = data.as_matrix()
29+
data = data.values
3030
np.random.shuffle(data)
3131

3232
X = data[:, 1:].reshape(-1, 28, 28, 1) / 255.0

Diff for: hmm_class/hmmd.py

+4
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import numpy as np
1212
import matplotlib.pyplot as plt
13+
from datetime import datetime
1314

1415

1516
def random_normalized(d1, d2):
@@ -22,6 +23,7 @@ def __init__(self, M):
2223
self.M = M # number of hidden states
2324

2425
def fit(self, X, max_iter=30):
26+
t0 = datetime.now()
2527
np.random.seed(123)
2628
# train the HMM model using the Baum-Welch algorithm
2729
# a specific instance of the expectation-maximization algorithm
@@ -136,6 +138,8 @@ def fit(self, X, max_iter=30):
136138
print("B:", self.B)
137139
print("pi:", self.pi)
138140

141+
print("Fit duration:", (datetime.now() - t0))
142+
139143
plt.plot(costs)
140144
plt.show()
141145

Diff for: linear_regression_class/systolic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
import pandas as pd
2121

2222
df = pd.read_excel('mlr02.xls')
23-
X = df.as_matrix()
23+
X = df.values
2424

2525
# using age to predict systolic blood pressure
2626
plt.scatter(X[:,1], X[:,0])

Diff for: nlp_class/nb.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
# it will work for other types of "counts", like tf-idf, so it should
1919
# also work for our "word proportions"
2020

21-
data = pd.read_csv('spambase.data').as_matrix() # use pandas for convenience
21+
data = pd.read_csv('spambase.data').values # use pandas for convenience
2222
np.random.shuffle(data) # shuffle each row in-place, but preserve the row
2323

2424
X = data[:,:48]

Diff for: nlp_class/spam2.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
1515
from sklearn.model_selection import train_test_split
1616
from sklearn.naive_bayes import MultinomialNB
17+
from sklearn.svm import SVC
1718
from wordcloud import WordCloud
1819

1920

@@ -32,7 +33,7 @@
3233

3334
# create binary labels
3435
df['b_labels'] = df['labels'].map({'ham': 0, 'spam': 1})
35-
Y = df['b_labels'].as_matrix()
36+
Y = df['b_labels'].values
3637

3738
# try multiple ways of calculating features
3839
# tfidf = TfidfVectorizer(decode_error='ignore')
@@ -49,7 +50,7 @@
4950
model.fit(Xtrain, Ytrain)
5051
print("train score:", model.score(Xtrain, Ytrain))
5152
print("test score:", model.score(Xtest, Ytest))
52-
53+
exit()
5354

5455

5556
# visualize the data

Diff for: nlp_class3/bilstm_mnist.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def get_mnist(limit=None):
2828

2929
print("Reading in and transforming data...")
3030
df = pd.read_csv('../large_files/train.csv')
31-
data = df.as_matrix()
31+
data = df.values
3232
np.random.shuffle(data)
3333
X = data[:, 1:].reshape(-1, 28, 28) / 255.0 # data is from 0..255
3434
Y = data[:, 0]

Diff for: supervised_class/bayes.py

+7
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010

1111
import numpy as np
12+
import matplotlib.pyplot as plt
1213
from util import get_data
1314
from datetime import datetime
1415
from scipy.stats import norm
@@ -60,3 +61,9 @@ def predict(self, X):
6061
t0 = datetime.now()
6162
print("Test accuracy:", model.score(Xtest, Ytest))
6263
print("Time to compute test accuracy:", (datetime.now() - t0), "Test size:", len(Ytest))
64+
65+
# plot the mean of each class
66+
for c, g in iteritems(model.gaussians):
67+
plt.imshow(g['mean'].reshape(28, 28))
68+
plt.title(c)
69+
plt.show()

Diff for: supervised_class/util.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
def get_data(limit=None):
1313
print("Reading in and transforming data...")
1414
df = pd.read_csv('../large_files/train.csv')
15-
data = df.as_matrix()
15+
data = df.values
1616
np.random.shuffle(data)
1717
X = data[:, 1:] / 255.0 # data is from 0..255
1818
Y = data[:, 0]

Diff for: supervised_class2/rf_classification.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def transform(self, df):
5555
X = np.zeros((N, self.D))
5656
i = 0
5757
for col, scaler in iteritems(self.scalers):
58-
X[:,i] = scaler.transform(df[col].as_matrix().reshape(-1, 1)).flatten()
58+
X[:,i] = scaler.transform(df[col].values.reshape(-1, 1)).flatten()
5959
i += 1
6060

6161
for col, encoder in iteritems(self.labelEncoders):
@@ -98,7 +98,7 @@ def get_data():
9898
transformer = DataTransformer()
9999

100100
X = transformer.fit_transform(df)
101-
Y = df[0].as_matrix()
101+
Y = df[0].values
102102
return X, Y
103103

104104

Diff for: supervised_class2/rf_regression.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def fit(self, df):
4444
self.scalers = {}
4545
for col in NUMERICAL_COLS:
4646
scaler = StandardScaler()
47-
scaler.fit(df[col].as_matrix().reshape(-1, 1))
47+
scaler.fit(df[col].values.reshape(-1, 1))
4848
self.scalers[col] = scaler
4949

5050
def transform(self, df):
@@ -53,7 +53,7 @@ def transform(self, df):
5353
X = np.zeros((N, D))
5454
i = 0
5555
for col, scaler in iteritems(self.scalers):
56-
X[:,i] = scaler.transform(df[col].as_matrix().reshape(-1, 1)).flatten()
56+
X[:,i] = scaler.transform(df[col].values.reshape(-1, 1)).flatten()
5757
i += 1
5858
for col in NO_TRANSFORM:
5959
X[:,i] = df[col]
@@ -96,9 +96,9 @@ def get_data():
9696
df_test = df.loc[test_idx]
9797

9898
Xtrain = transformer.fit_transform(df_train)
99-
Ytrain = np.log(df_train['medv'].as_matrix())
99+
Ytrain = np.log(df_train['medv'].values)
100100
Xtest = transformer.transform(df_test)
101-
Ytest = np.log(df_test['medv'].as_matrix())
101+
Ytest = np.log(df_test['medv'].values)
102102
return Xtrain, Ytrain, Xtest, Ytest
103103

104104

Diff for: unsupervised_class/kmeans_mnist.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,13 @@
1616
import numpy as np
1717
import pandas as pd
1818
import matplotlib.pyplot as plt
19-
from .kmeans import plot_k_means, get_simple_data
19+
from kmeans import plot_k_means, get_simple_data
2020
from datetime import datetime
2121

2222
def get_data(limit=None):
2323
print("Reading in and transforming data...")
2424
df = pd.read_csv('../large_files/train.csv')
25-
data = df.as_matrix()
25+
data = df.values
2626
np.random.shuffle(data)
2727
X = data[:, 1:] / 255.0 # data is from 0..255
2828
Y = data[:, 0]

Diff for: unsupervised_class2/util.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def getKaggleMNIST():
2323
# column 0 is labels
2424
# column 1-785 is data, with values 0 .. 255
2525
# total size of CSV: (42000, 1, 28, 28)
26-
train = pd.read_csv('../large_files/train.csv').as_matrix().astype(np.float32)
26+
train = pd.read_csv('../large_files/train.csv').values.astype(np.float32)
2727
train = shuffle(train)
2828

2929
Xtrain = train[:-1000,1:] / 255

0 commit comments

Comments
 (0)