# Numpy, Matplotlib and Sklearn Tutorial

We often use numpy to handle high dimensional arrays.

Let's try the basic operation of numpy:

In [None]:
import numpy as np

a = np.array([[1,2,3], [2,3,4]])
print(a.ndim, a.shape, a.size, a.dtype, type(a))

b = np.zeros((3,4))
c = np.ones((3,4))
d = np.random.randn(2,3)
e = np.array([[1,2], [2,3], [3,4]])
f = b*2 - c*3
g = 2*c*f
h = np.dot(a,e)
i = d.mean()
j = d.max(axis=1)
k = a[-1][:2]

# You can print a to k for details

运行结果为：2 (2, 3) 6 int32 <class 'numpy.ndarray'>

In [None]:
import matplotlib.pyplot as plt

x = np.arange(2, 10, 0.2)

plt.plot(x, x**1.5*.5, 'r-', x, np.log(x)*5, 'g--', x, x, 'b.')
plt.show()

运行结果为图片
![image.png](attachment:image.png)

In [None]:
def f(x):
    return np.sin(np.pi*x)

x1 = np.arange(0, 5, 0.1)
x2 = np.arange(0, 5, 0.01)

plt.subplot(211)
plt.plot(x1, f(x1), 'go', x2, f(x2-1))

plt.subplot(212)
plt.plot(x2, f(x2), 'r--')
plt.show()

运行图片结果为：
![image.png](attachment:image.png)

In [None]:
img = np.arange(0, 1, 1/32/32) # define an 1D array with 32x32 elements gradually increasing
img = img.reshape(32, 32) # reshape it into 32x32 array, the array represents a 32x32 image,
                          # each element represents the corresponding pixel of the image
plt.imshow(img, cmap='gray')
plt.show()

运行图片结果为：
![image.png](attachment:image.png)

In [None]:
from sklearn.datasets import fetch_mldata

# download and read mnist
mnist = fetch_mldata('MNIST original', data_home='./')

# 'mnist.data' is 70k x 784 array, each row represents the pixels from a 28x28=784 image
# 'mnist.target' is 70k x 1 array, each row represents the target class of the corresponding image
images = mnist.data
targets = mnist.target

# make the value of pixels from [0, 255] to [0, 1] for further process
X = mnist.data / 255.
Y = mnist.target

# print the first image of the dataset
img1 = X[0].reshape(28, 28)
plt.imshow(img1, cmap='gray')
plt.show()

# print the images after simple transformation
img2 = 1 - img1
plt.imshow(img2, cmap='gray')
plt.show()

img3 = img1.transpose()
plt.imshow(img3, cmap='gray')
plt.show()


![image.png](attachment:image.png)
![image.png](attachment:image.png)
![image.png](attachment:image.png)

#### Q1:
Please use the logistic regression(default parameters) in sklearn to classify the data above, and print the training accuracy and test accuracy.

In [None]:
# TODO:use logistic regression
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

if __name__=='__main__':
    # 选择模块。。。。
    cls = LogisticRegression(solver = 'lbfgs',max_iter = 10000)
    
    # 训练
    cls.fit(X_train, Y_train)
    
    prediction_train=cls.predict(X_train)        
    prediction_test=cls.predict(X_test)
      
    train_accuracy = metrics.accuracy_score(prediction_train, Y_train)
    test_accuracy = metrics.accuracy_score(prediction_test, Y_test)
   
    print('Training accuracy: %0.2f%%' % (train_accuracy*100))
    print('Testing accuracy: %0.2f%%' % (test_accuracy*100))

运行结果为：
C:\Program Files\Python37\lib\site-packages\sklearn\utils\deprecation.py:85: DeprecationWarning: Function fetch_mldata is deprecated; fetch_mldata was deprecated in version 0.20 and will be removed in version 0.22. Please use fetch_openml.
  warnings.warn(msg, category=DeprecationWarning)
C:\Program Files\Python37\lib\site-packages\sklearn\utils\deprecation.py:85: DeprecationWarning: Function mldata_filename is deprecated; mldata_filename was deprecated in version 0.20 and will be removed in version 0.22. Please use fetch_openml.
  warnings.warn(msg, category=DeprecationWarning)
C:\Program Files\Python37\lib\site-packages\sklearn\linear_model\logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.
  "this warning.", FutureWarning)
Training accuracy: 95.75%
Testing accuracy: 89.50%
[Finished in 29.1s]

#### Q2:
Please use the naive bayes(Bernoulli, default parameters) in sklearn to classify the data above, and print the training accuracy and test accuracy.

In [None]:
# TODO:use naive bayes
from sklearn.naive_bayes import BernoulliNB

if __name__=='__main__':
    # 选择模块
    cls = BernoulliNB()

    #训练
    cls.fit(X_train, Y_train)
    
    prediction_train=cls.predict(X_train)        
    prediction_test=cls.predict(X_test)
      
    train_accuracy = metrics.accuracy_score(prediction_train, Y_train)
    test_accuracy = metrics.accuracy_score(prediction_test, Y_test)
   
    print('Training accuracy: %0.2f%%' % (train_accuracy*100))
    print('Testing accuracy: %0.2f%%' % (test_accuracy*100))


运行结果为：
C:\Program Files\Python37\lib\site-packages\sklearn\utils\deprecation.py:85: DeprecationWarning: Function fetch_mldata is deprecated; fetch_mldata was deprecated in version 0.20 and will be removed in version 0.22. Please use fetch_openml.
  warnings.warn(msg, category=DeprecationWarning)
C:\Program Files\Python37\lib\site-packages\sklearn\utils\deprecation.py:85: DeprecationWarning: Function mldata_filename is deprecated; mldata_filename was deprecated in version 0.20 and will be removed in version 0.22. Please use fetch_openml.
  warnings.warn(msg, category=DeprecationWarning)
Training accuracy: 83.93%
Testing accuracy: 82.00%    


#### Q3:
Please use the support vector machine(default parameters) in sklearn to classify the data above, and print the training accuracy and test accuracy.

In [None]:
# TODO:use support vector machine
from sklearn.svm import LinearSVC

if __name__=='__main__':
    # 选择模块
    cls = LinearSVC()
    
    # 训练
    cls.fit(X_train, Y_train)
    
    prediction_train=cls.predict(X_train)        
    prediction_test=cls.predict(X_test)
      
    train_accuracy = metrics.accuracy_score(prediction_train, Y_train)
    test_accuracy = metrics.accuracy_score(prediction_test, Y_test)
   
    print('Training accuracy: %0.2f%%' % (train_accuracy*100))
    print('Testing accuracy: %0.2f%%' % (test_accuracy*100))


C:\Program Files\Python37\lib\site-packages\sklearn\utils\deprecation.py:85: DeprecationWarning: Function fetch_mldata is deprecated; fetch_mldata was deprecated in version 0.20 and will be removed in version 0.22. Please use fetch_openml.
  warnings.warn(msg, category=DeprecationWarning)
C:\Program Files\Python37\lib\site-packages\sklearn\utils\deprecation.py:85: DeprecationWarning: Function mldata_filename is deprecated; mldata_filename was deprecated in version 0.20 and will be removed in version 0.22. Please use fetch_openml.
  warnings.warn(msg, category=DeprecationWarning)
C:\Program Files\Python37\lib\site-packages\sklearn\svm\base.py:929: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  "the number of iterations.", ConvergenceWarning)
Training accuracy: 98.05%
Testing accuracy: 87.40%


#### Q4:
Please adjust the parameters of SVM to increase the testing accuracy, and print the training accuracy and test accuracy.

In [None]:
# TODO:use SVM with another group of parameters

from sklearn.svm import SVC
from sklearn import metrics

if __name__=='__main__':
    # choose module
    cls = SVC(kernel='rbf',class_weight='balanced',gamma='scale')
    
    # train
    cls.fit(X_train, Y_train)
    
    prediction_train=cls.predict(X_train)        
    prediction_test=cls.predict(X_test)
      
    train_accuracy = metrics.accuracy_score(prediction_train, Y_train)
    test_accuracy = metrics.accuracy_score(prediction_test, Y_test)
   
    print('Training accuracy: %0.2f%%' % (train_accuracy*100))
    print('Testing accuracy: %0.2f%%' % (test_accuracy*100))

C:\Program Files\Python37\lib\site-packages\sklearn\utils\deprecation.py:85: DeprecationWarning: Function fetch_mldata is deprecated; fetch_mldata was deprecated in version 0.20 and will be removed in version 0.22. Please use fetch_openml.
  warnings.warn(msg, category=DeprecationWarning)
C:\Program Files\Python37\lib\site-packages\sklearn\utils\deprecation.py:85: DeprecationWarning: Function mldata_filename is deprecated; mldata_filename was deprecated in version 0.20 and will be removed in version 0.22. Please use fetch_openml.
  warnings.warn(msg, category=DeprecationWarning)
Training accuracy: 98.30%
Testing accuracy: 95.60%