In [1]:
from tensorflow.keras import layers, models, Model, Sequential

In [7]:
class Inception(layers.Layer):
    def __init__(self, ch1x1, ch3x3red, ch3x3, ch5x5red, ch5x5, pool_proj, **kwargs):
        super(Inception, self).__init__(**kwargs)
        self.branch1 = layers.Conv2D(ch1x1, kernel_size=1, activation="relu")

        self.branch2 = Sequential([
            layers.Conv2D(ch3x3red, kernel_size=1, activation="relu"),
            layers.Conv2D(ch3x3, kernel_size=3, padding="SAME", activation="relu")])      # output_size= input_size

        self.branch3 = Sequential([
            layers.Conv2D(ch5x5red, kernel_size=1, activation="relu"),
            layers.Conv2D(ch5x5, kernel_size=5, padding="SAME", activation="relu")])      # output_size= input_size

        self.branch4 = Sequential([
            layers.MaxPool2D(pool_size=3, strides=1, padding="SAME"),  # caution: default strides==pool_size
            layers.Conv2D(pool_proj, kernel_size=1, activation="relu")])                  # output_size= input_size

    def call(self, inputs, **kwargs):
        branch1 = self.branch1(inputs)
        branch2 = self.branch2(inputs)
        branch3 = self.branch3(inputs)
        branch4 = self.branch4(inputs)
        outputs = layers.concatenate([branch1, branch2, branch3, branch4])
        return outputs
Inception(192, 96, 208, 16, 48, 64, name="inception_4a")

<__main__.Inception at 0x23d6921fd90>

In [8]:
def GoogLeNet(im_height=224, im_width=224, class_num=1000, aux_logits=False):
    # tensorflow中的tensor通道排序是NHWC
    input_image = layers.Input(shape=(im_height, im_width, 3), dtype="float32")
    # (None, 224, 224, 3)
    x = layers.Conv2D(64, kernel_size=7, strides=2, padding="SAME", activation="relu", name="conv2d_1")(input_image)
    # (None, 112, 112, 64)
    x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool_1")(x)
    # (None, 56, 56, 64)
    x = layers.Conv2D(64, kernel_size=1, activation="relu", name="conv2d_2")(x)
    # (None, 56, 56, 64)
    x = layers.Conv2D(192, kernel_size=3, padding="SAME", activation="relu", name="conv2d_3")(x)
    # (None, 56, 56, 192)
    x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool_2")(x)

    # (None, 28, 28, 192)
    x = Inception(64, 96, 128, 16, 32, 32, name="inception_3a")(x)
    # (None, 28, 28, 256)
    x = Inception(128, 128, 192, 32, 96, 64, name="inception_3b")(x)

    # (None, 28, 28, 480)
    x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool_3")(x)
    # (None, 14, 14, 480)
    x = Inception(192, 96, 208, 16, 48, 64, name="inception_4a")(x)
    if aux_logits:
        aux1 = InceptionAux(class_num, name="aux_1")(x)

    # (None, 14, 14, 512)
    x = Inception(160, 112, 224, 24, 64, 64, name="inception_4b")(x)
    # (None, 14, 14, 512)
    x = Inception(128, 128, 256, 24, 64, 64, name="inception_4c")(x)
    # (None, 14, 14, 512)
    x = Inception(112, 144, 288, 32, 64, 64, name="inception_4d")(x)
    if aux_logits:
        aux2 = InceptionAux(class_num, name="aux_2")(x)

    # (None, 14, 14, 528)
    x = Inception(256, 160, 320, 32, 128, 128, name="inception_4e")(x)
    # (None, 14, 14, 532)
    x = layers.MaxPool2D(pool_size=3, strides=2, padding="SAME", name="maxpool_4")(x)

    # (None, 7, 7, 832)
    x = Inception(256, 160, 320, 32, 128, 128, name="inception_5a")(x)
    # (None, 7, 7, 832)
    x = Inception(384, 192, 384, 48, 128, 128, name="inception_5b")(x)
    # (None, 7, 7, 1024)
    x = layers.AvgPool2D(pool_size=7, strides=1, name="avgpool_1")(x)

    # (None, 1, 1, 1024)
    x = layers.Flatten(name="output_flatten")(x)
    # (None, 1024)
    x = layers.Dropout(rate=0.4, name="output_dropout")(x)
    x = layers.Dense(class_num, name="output_dense")(x)
    # (None, class_num)
    aux3 = layers.Softmax(name="aux_3")(x)

    if aux_logits:
        model = models.Model(inputs=input_image, outputs=[aux1, aux2, aux3])
    else:
        model = models.Model(inputs=input_image, outputs=aux3)
    return model

network=GoogLeNet()
network.summary()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 112, 112, 64)      9472      
_________________________________________________________________
maxpool_1 (MaxPooling2D)     (None, 56, 56, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 56, 56, 64)        4160      
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 56, 56, 192)       110784    
_________________________________________________________________
maxpool_2 (MaxPooling2D)     (None, 28, 28, 192)       0         
_________________________________________________________________
inception_3a (Inception)     (None, 28, 28, 256)      

In [9]:
class InceptionAux(layers.Layer):
    def __init__(self, num_classes, **kwargs):
        super(InceptionAux, self).__init__(**kwargs)
        self.averagePool = layers.AvgPool2D(pool_size=5, strides=3)
        self.conv = layers.Conv2D(128, kernel_size=1, activation="relu")

        self.fc1 = layers.Dense(1024, activation="relu")
        self.fc2 = layers.Dense(num_classes)
        self.softmax = layers.Softmax()

    def call(self, inputs, **kwargs):
        # aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14
        x = self.averagePool(inputs)
        # aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4
        x = self.conv(x)
        # N x 128 x 4 x 4
        x = layers.Flatten()(x)
        x = layers.Dropout(rate=0.5)(x)
        # N x 2048
        x = self.fc1(x)
        x = layers.Dropout(rate=0.5)(x)
        # N x 1024
        x = self.fc2(x)
        # N x num_classes
        x = self.softmax(x)

        return x
InceptionAux(32)

<__main__.InceptionAux at 0x23d71076a90>

In [18]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import tensorflow as tf
import json
import os
import numpy as np

In [20]:
def main():
    data_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # get data root path
    image_path = os.path.join(data_root, "data_set", "flower_data")  # flower data set path
    train_dir = os.path.join(image_path, "train")
    validation_dir = os.path.join(image_path, "val")
    assert os.path.exists(train_dir), "cannot find {}".format(train_dir)
    assert os.path.exists(validation_dir), "cannot find {}".format(validation_dir)

    # create direction for saving weights
    if not os.path.exists("save_weights"):
        os.makedirs("save_weights")

    im_height = 224
    im_width = 224
    batch_size = 32
    epochs = 30
    
    def pre_function(img):
        # img = im.open('test.jpg')
        # img = np.array(img).astype(np.float32)
        img = img / 255.
        img = (img - 0.5) * 2.0

        return img

    # data generator with data augmentation
    train_image_generator = ImageDataGenerator(preprocessing_function=pre_function,
                                               horizontal_flip=True)
    validation_image_generator = ImageDataGenerator(preprocessing_function=pre_function)

    train_data_gen = train_image_generator.flow_from_directory(directory=train_dir,
                                                               batch_size=batch_size,
                                                               shuffle=True,
                                                               target_size=(im_height, im_width),
                                                               class_mode='categorical')
    total_train = train_data_gen.n

    # get class dict
    class_indices = train_data_gen.class_indices

    # transform value and key of dict
    inverse_dict = dict((val, key) for key, val in class_indices.items())
    # write dict into json file
    json_str = json.dumps(inverse_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    val_data_gen = validation_image_generator.flow_from_directory(directory=validation_dir,
                                                                  batch_size=batch_size,
                                                                  shuffle=False,
                                                                  target_size=(im_height, im_width),
                                                                  class_mode='categorical')
    total_val = val_data_gen.n
    print("using {} images for training, {} images for validation.".format(total_train,
                                                                           total_val))

    model = GoogLeNet(im_height=im_height, im_width=im_width, class_num=5, aux_logits=True)
    # model.build((batch_size, 224, 224, 3))  # when using subclass model
    model.summary()

    # using keras low level api for training
    loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False)
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0003)

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')

    test_loss = tf.keras.metrics.Mean(name='test_loss')
    test_accuracy = tf.keras.metrics.CategoricalAccuracy(name='test_accuracy')
main()

AssertionError: cannot find D:\jupyter\data_set\flower_data\train

In [19]:
def pre_deal(data):
    pre_data=[]
    for i in data:
        one_data=[]
        #均值
        da_mean=i.mean()
        one_data.append(da_mean)
        #方差
        da_var=i.var()
        one_data.append(da_var)
        #标准差
        da_std=i.std()
        one_data.append(da_std)
        #均方根
        da_msv=da_mean ** 2 + da_var
        da_rms = da_msv ** 0.5
        one_data.append(da_rms)
        #波形指标
        x_ = abs(i).mean()
        xp = max(i)
        da_waveform = da_rms / x_
        one_data.append(da_waveform)
        #峰值指标
        da_peak = xp / da_rms
        one_data.append(da_peak)
        #脉冲指标
        da_impluse = xp / x_
        one_data.append(da_impluse)
        #裕度指标
        da_clearance = xp / (sum([math.sqrt(i) for i in abs(i)]) / len(i)) ** 2
        one_data.append(da_clearance)
        #偏斜度指标
        da_alpha = pd.Series(i).skew()
        da_skew = da_alpha / da_std ** 3
        one_data.append(da_skew)
        #峭度指标
        da_beta = pd.Series(i).kurt()
        da_kurt = da_beta / da_std ** 4
        one_data.append(da_kurt)
        
        pre_data.append(one_data)
    return np.array(pre_data)

pre_deal(np.array([2,5,6,47,89,8,5,5,4,6,7,5,2,3,4,5,0,9,6,9]))

TypeError: 'numpy.int32' object is not iterable

In [21]:
from sklearn import svm
from sklearn.decomposition import PCA #降维（）
from sklearn.model_selection import train_test_split

#PCA 主成分分析
pca=PCA(n_components=0.9,whiten=True)#n_components: 保留的主成分;whiten 归一化
# X_pca 可以代表原来的数据，但是经过矩阵运算，数据值已经变化（脱敏数据），属性也就没有实际的物理意义
X_pca_train=pca.fit_transform(X)

X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2)

clf2=SVC(C=1.0,kernel='rbf')
clf2.fit(X_train,Y_train)
clf2.score(X_test,Y_test)

NameError: name 'X' is not defined

In [2]:
from bs4 import BeautifulSoup #数据获取，网页解析
import re  #正则表达式，文字匹配
import urllib.request,urllib.error  #制定url，获取网页数据
import xlwt #进行excle操作
import sqlite3 #进行数据库操作

In [5]:
def main():
    baseurl='https://movie.douban.com/top250?start='
    #1.爬取网页
    datalist=getdata(baseurl)
    
    
    #3.保存数据
    savepath='./douban/top250.xls'
    savedata(savepath)
    return

# 电影连接
findLink=re.compile(r'<a href="(.*?)">')
# 图片连接
findImgSrc=re.compile(r'<img.*src="(.*?)"',re.S)
# 电影名字
findTitle=re.compile(r'<span class="title">(.*)</span>')
# 评分
findRating=re.compile(r'<span class="rating_num" property="v:average">(.*)</span>')
# 品论人数
findnum=re.compile(r'<span>(\d*)人评价</span>')
# 评价
findInq=re.compile(r'<span class="inq">(.*)</span>')
# 相关内容
findDb=re.compile(r'<p class="">(.*)</p>',re.S)

#获取网页
def getdata(baseurl):
    datalist=[]
    for i in range(0,10):

        url=baseurl+str(i*25)
        html=askurl(url)
    
        #2.逐一解析数据
        soup=BeautifulSoup(html,'lxml')
        for item in soup.find_all('div',class_='item'):
            data=[]
 
            item=str(item)
            
            # 获取影片的链接
            link=re.findall(findLink,item)[0] #给一个查找字符串规则
            data.append(link)
            # 获取图片
            imgsrc=re.findall(findImgSrc,item)[0]
            data.append(imgsrc)
            # 电影名字
            name=re.findall(findTitle,item)
            if (len(name)==2):
                ctitle=name[0]
                data.append(ctitle)
                otitle=name[1].replace("/","")
                data.append(otitle)
            else:
                data.append(name[0])
                data.append(' ')#留空
            # 评分
            rate=re.findall(findRating,item)[0]
            data.append(rate)
            # 品论人数
            people_num=re.findall(findnum,item)[0]
            data.append(people_num)
            # 评价
            Inq=re.findall(findInq,item)
            if len(Inq)!=0:
                Inq=Inq[0].replace('。','')
                data.append(Inq)
            else:
                data.append(' ')
            # 相关内容
            Db=re.findall(findDb,item)[0]
            Db=re.sub('<br(\s+)?/>(\s+)?',' ',Db)#替换<br(\s+)?/>(\s+)?
            Db=re.sub('/',' ',Db)
            data.append(Db.strip())#.strip(),去掉Db内的空格
            
            
            datalist.append(data)
    
    return datalist


In [4]:
def askurl(url):
    #模拟浏览器头部信息，向豆瓣服务器发送信息
    header={
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) \
        AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
    }# 用户代理，表示告诉豆瓣服务器，我们是什么类型的机型，浏览器
    
    request=urllib.request.Request(url,headers=header)
    html=''
    try:
        response=urllib.request.urlopen(request)
        html=response.read().decode('utf-8')

    except urllib.error.URLError as e:
        # 返回对象是否具有给定名称的属性
        if hasattr(e,'code'):
            print(e.code)
        if hasattr(e,'reason'):
            print(e.reason)

    return html



# 保存数据
def savedata(savepath):
    return 

In [6]:
datalist=getdata('https://movie.douban.com/top250?start=')
datalist[0]

['https://movie.douban.com/subject/1292052/',
 'https://img2.doubanio.com/view/photo/s_ratio_poster/public/p480747492.jpg',
 '肖申克的救赎',
 '\xa0\xa0The Shawshank Redemption',
 '9.7',
 '2231662',
 '希望让人自由',
 '导演: 弗兰克·德拉邦特 Frank Darabont\xa0\xa0\xa0主演: 蒂姆·罗宾斯 Tim Robbins  ... 1994\xa0 \xa0美国\xa0 \xa0犯罪 剧情\n                        < p>\n<div class="star">\n<span class="rating5-t">< span>\n<span class="rating_num" property="v:average">9.7< span>\n<span content="10.0" property="v:best">< span>\n<span>2231662人评价< span>\n< div>\n<p class="quote">\n<span class="inq">希望让人自由。< span>']