In [None]:
import argparse

import shutil

import sys

import tempfile
import pandas as pd
import csv
import numpy as np
import tensorflow as tf



CSV_COLUMNS = ["msno", "song_id", "target"]

FEATURES = ["msno", "song_id"]

LABEL = "target"

msno = tf.feature_column.categorical_column_with_hash_bucket(

    "msno", hash_bucket_size=1000)

song_id = tf.feature_column.categorical_column_with_hash_bucket(

    "song_id", hash_bucket_size=1000)

deep_columns = [    

    # To show an example of embedding

    tf.feature_column.embedding_column(msno, dimension=8),

    tf.feature_column.embedding_column(song_id, dimension=8)    

]


def build_estimator(model_dir, model_type):

    """Build an estimator."""
    print('model_dir:',model_dir)
    print('model_type:',model_type)

    if model_type == "wide":

        m = tf.estimator.LinearClassifier(

            model_dir=model_dir, feature_columns=base_columns + crossed_columns)

    elif model_type == "deep":
        
        print('building deep model')

        m = tf.estimator.DNNClassifier(

            model_dir=model_dir,

            feature_columns=deep_columns,

            hidden_units=[100, 50])

    else:

        m = tf.estimator.DNNLinearCombinedClassifier(

            model_dir=model_dir,

            linear_feature_columns=crossed_columns,

            dnn_feature_columns=deep_columns,

            dnn_hidden_units=[100, 50])

    return m

def input_fn(data_file, is_train, num_epochs, shuffle):

    """Input builder function."""

    df_data = pd.read_csv(

      tf.gfile.Open(data_file),

      names=CSV_COLUMNS,

      skipinitialspace=True,

      engine="python",

      skiprows=1)

  # remove NaN elements
    print(df_data.shape)
    #df_data = df_data.dropna(how="any", axis=0)    
    df_data = df_data.fillna(value='unknown')    
    
    #labels = df_data["target"]
    #labels.head()
    if is_train:
        
        return tf.estimator.inputs.pandas_input_fn(
            
          x = pd.DataFrame({k:df_data[k].values for k in FEATURES}),
            
          y = pd.Series(df_data[LABEL].values),     

          batch_size=100,

          num_epochs=num_epochs,

          shuffle=shuffle,

          num_threads=3)
    else:
        
        return tf.estimator.inputs.pandas_input_fn(
            
          x = pd.DataFrame({k:df_data[k].values for k in FEATURES}),
            
          y = pd.Series(df_data[LABEL].values),     

          batch_size=100,

          num_epochs=num_epochs,

          shuffle=shuffle,

          num_threads=1)

def train_and_eval(model_dir, model_type, train_steps, train_data, test_data):

    """Train and evaluate the model."""

    #train_file_name = 'dataset/train.csv/train1.csv'
    #test_file_name = 'dataset/test.csv/test1.csv'

  # Specify file path below if want to find the output easily
    print('tempfile')
    model_dir = tempfile.mkdtemp() if not model_dir else model_dir
    

    print('build_estimator')
    m = build_estimator(model_dir, model_type)

  # set num_epochs to None to get infinite stream of data.
    
    print('train start')
    m.train(

      input_fn=input_fn(train_data, True, num_epochs=1, shuffle=True),

      steps=train_steps)

  # set steps to None to run evaluation until all data consumed.
    print('predict start')    
    results = m.predict(input_fn = input_fn(test_data, False, num_epochs=1, shuffle=False))
    #l = list(results)
    #print(len(l))
    #for r in results:
        #print(np.argmax(r['probabilities']))
    
    with open("submission.csv",'w',newline='') as f:
        wr = csv.writer(f,dialect='excel')        
        wr.writerow(["id","target"])
        i = 0
        for r in results:
            #print([i, np.argmax(r['probabilities'])])
            wr.writerow([str(i), str(np.argmax(r['probabilities']))])
            i+=1
    
    
    #results = list(results.'probabilities')
    #print('results:',results)
    #for x in range(10):
        #print(results[i])
    #for i, p in enumerate(results):
    #    print("Prediction %s: %s" % (i + 1, p["targets"]))
    # Manual cleanup

    shutil.rmtree(model_dir,ignore_errors=True)
    print('end!')


FLAGS = None


def main(_):

    train_and_eval(FLAGS.model_dir, FLAGS.model_type, FLAGS.train_steps,

                 FLAGS.train_data, FLAGS.test_data)





if __name__ == "__main__":

    parser = argparse.ArgumentParser()

    parser.register("type", "bool", lambda v: v.lower() == "true")

    parser.add_argument(

      "--model_dir",

      type=str,

      default="",

      help="Base directory for output models."

  )

    parser.add_argument(

      "--model_type",

      type=str,

      default="deep",

      help="Valid model types: {'wide', 'deep', 'wide_n_deep'}."

  )

    parser.add_argument(

      "--train_steps",

      type=int,

      default=20,

      help="Number of training steps."

  )

    parser.add_argument(

      "--train_data",

      type=str,

      default="dataset/train.csv/train.csv",

      help="Path to the training data."

  )

    parser.add_argument(

      "--test_data",

      type=str,

      default="dataset/test.csv/test.csv",

      help="Path to the test data."

  )

    FLAGS, unparsed = parser.parse_known_args()

    tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)

tempfile
build_estimator
model_dir: C:\Users\40712\AppData\Local\Temp\tmpoy7rxpam
model_type: deep
building deep model
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\40712\\AppData\\Local\\Temp\\tmpoy7rxpam', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000016B022BF5F8>, '_task_type': 'worker', '_task_id': 0, '_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
train start
(7377418, 3)
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into C:\Users\40712\AppData\Local\Temp\tmpoy7rxpam\model.ckpt.
INFO:tensorflow:loss = 71.0419, step = 1
INFO:tensorflow:Saving checkpoints for 20 into C:

In [19]:
x = ['V8ruy7SGk7tDm3zA51DPpn6qutt+vmKMBKa21dp54uM=','WmHKgKMlp1lQMecNdNvDMkvIycZYHnFwDT72I5sIssc=']
y = m.predict(x)
print(y)

NameError: name 'm' is not defined

In [45]:
with open("test.csv","w",newline='') as csvfile: 
    writer = csv.writer(csvfile,dialect='excel')
    #先写入columns_name
    writer.writerow(["index","a_name","b_name"])
    #写入多行用writerows
    writer.writerows([[0,1,3],[1,2,3],[2,3,4]])


In [13]:
import pandas as pd
test = pd.read_csv('dataset/test.csv/test1.csv')
s = pd.read_csv('submission.csv')
print(test.shape)
print(s.shape)

(9, 6)
(8, 2)
