In [1]:
import argparse

# argparse  解析命令参数和选项

## 整体框架
> 创建一个解析对象： parser = argparse.ArgumentParser(description="your script description") 

> 向该对象中添加你要关注的命令行参数和选项： parser.add_argument()

> 进行解析：args = parser.parse_args()

## add_argument
add_argument(name or flags…[, action][, nargs][, const][, default][, type][, choices][, required][, help][, metavar][, dest])   
> name or flags：指定参数的形式，一般写两个（-短参数 --长参数）
>> "-"为短参数，"--"为长参数

> nargs ：指定这个参数后面的value有多少个
>> N：确切个数 | ？：0或1个参数 | *：0或多个参数 | +：至少一个参数

> type：指定参数类型

> choices：设置参数值的范围

> required：是否必选

> metavar ：参数的名字，在显示帮助信息时才用到

> help：设置帮助信息

> dest：解析出来的对应属性

> action：指定属性对应的处理逻辑，默认为store，append可以添加多个参数

> default：如果命令行没有出现这个选项，那么使用default指定的默认值

In [None]:
parser = argparse.ArgumentParser(description='Process some integers.')  

parser.add_argument('integers', metavar='N', type=int, nargs='+', help='an integer for the accumulator')  
parser.add_argument('-sum', dest='accumulate', action='store_const',const=sum, 
                    default=max,  help='sum the integers (default: find the max)')  
parser.add_argument("-square", help="display a square of a given number", type=int) 

args = parser.parse_args()  

print(args.accumulate(args.integers), args.square**2)

In [None]:
python prog.py 4 5 6 -square 7
#输出6  49

python prog.py -sum 4 5 6 --square 7
#输出15  49

In [None]:
parser = argparse.ArgumentParser(description='Search some files')

parser.add_argument(dest='filenames',metavar='filename', nargs='*')
#下面的参数说明允许某个参数重复出现多次，并将它们追加到一个列表中去
parser.add_argument('-p', '--pat',metavar='pattern', required=True, dest='patterns', 
                    action='append',help='text pattern to search for')
#根据参数是否存在来设置一个 Boolean 标志
parser.add_argument('-v', dest='verbose', action='store_true',help='verbose mode')
#参数接受一个单独值并将其存储为一个字符串
parser.add_argument('-o', dest='outfile', action='store',help='output file')
#接受一个值，但是会将其和可能的选择值做比较，以检测其合法性
parser.add_argument('--speed', dest='speed', action='store',choices={'slow','fast'}, 
                    default='slow',help='search speed')

args = parser.parse_args()

In [None]:
python3 search.py -v -p spam --pat=eggs foo.txt bar.txt

#输出
filenames = ['foo.txt', 'bar.txt']
patterns  = ['spam', 'eggs']
verbose   = True
outfile   = None
speed     = slow

## 示例： SVM

In [None]:
import pandas as pd
import re
from sklearn import svm
from sklearn.model_selection import train_test_split
from collections import defaultdict
import sys
import os
import argparse
import pickle 
import matplotlib.pyplot as plt

ap = argparse.ArgumentParser()
ap.add_argument("-k","--kernel",  help="kernel used for training SVM, choose any one from 'linear', 'poly', 'rbf', 'sigmoid'; if no choice made, all 4 kernels will be used")
ap.add_argument("-o",'--out_prefix', help='ouput file prefix')
ap.add_argument('-a','--accuracy_estimation', action='store_true', help='-a  perform accuracy estimation with known modified status from --predict file')
ap.add_argument('-M','--model' , help="pre-trained model that can ben used for prediction; if this is not available SVM model will be trained and dumped; there can be multiple models, which should be in the same order as kernels applied")
ap.add_argument("-t","--train",  help="file name of feature table used for training")
ap.add_argument('-mc','--modification_status_column',help = "column number from (input file1, i.e, traing file) that contains modification status information")

requiredGrp = ap.add_argument_group('required arguments')
requiredGrp.add_argument("-p","--predict", required=True, help="file name of feature table used for making predictions or testing accuracy.\nwhen this file is the same the one used for training, half of the data will be chosen for training.")
requiredGrp.add_argument('-cl','--columns',required=True,help = "comma seperated column number(s) that contain features used for training and prediciton")
args = vars(ap.parse_args())       ### 没有使用parse_args而是直接用vars

def evaluate_on_test_data (test, predicitons):
    correct_classifications = 0
    for i in range (len(y_test)):
        if predictions[i] == test[i]:
            correct_classifications += 1
    accuracy = correct_classifications * 100 / len (test)
    return accuracy

def plot_ROC (y_test, probas,fig_out,kn, parameter):
    fpr, tpr, thresholds = roc_curve(y_test,probas[:,1])
    roc_auc = auc(fpr,tpr)
#    plt.clf()
    plt.plot (fpr, tpr, label = 'ROC Curve for ' + 'kernel '+kn +' (Area under ROC = %0.2f)'% roc_auc)
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.0])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC of Modification Prediction based On ' + parameter)
    plt.legend(loc="lower right")
    plt.savefig(fig_out)

cols = []
cols_in = args['columns']
for c in cols_in.split(','):
    if re.search (r'-',c):
        c1,c2 = c.split('-')
        cols +=  list (range(int(c1), int(c2)+1))
    elif re.search(r':',c):
        c1,c2 = c.split(':')
        cols += list (range(int(c1), int(c2)+1))
    else:
        cols.append(int(c))
cols.sort()
cols = list (set(cols))
cols = [x-1 for x in cols]
mod_col = int (args['modification_status_column']) - 1 if args['modification_status_column'] else None

# training set
m_u_var_df = pd.DataFrame()
X = pd.Series()
Y = pd.Series()
indices = []
if args['train']:
    m_u_var = args['train'] 
    m_u_var_df = pd.read_csv(m_u_var)
    names = list (m_u_var_df.columns[cols])
    ## prepare X and Y for training
    df_tmp = m_u_var_df.dropna (subset=names)
    df_tmp = df_tmp.reset_index(drop=True) # do not insert index as extra column
    X = df_tmp.iloc[:,cols]
    indices = df_tmp.index.tolist()
    Y = df_tmp.iloc[:,mod_col]

## prediction set
old_header = ''
predict_tmp = pd.DataFrame()
if 'predict' in args:
    predict_df = pd.read_csv (args['predict'])
    names = list (predict_df.columns[cols])
    predict_tmp = predict_df.dropna(subset=names)
    predict_tmp = predict_tmp.reset_index(drop=True)
    old_header = ",".join (predict_tmp.columns.values)

### print out column numnbers and names
# output file name
m_u_var="SVM_out"
if args['out_prefix']:
    out_prefix = args['out_prefix'] + '.'+'.'.join (names) + '.SVM'
else:
    out_prefix = m_u_var.split('/')[-1].split('.')[0]+'.'+args['predict'].split('/')[-1].split('.')[0] +'.'+'.'.join (names) + '.SVM'
print ("Colunms-used: ",cols_in, "output: ", out_prefix) 

##########
if args['model']:
    X_test = predict_tmp.iloc[:,cols] 
    indices_test = X_test.index.tolist()
else:
    if args['predict'] == args['train']:
        X_train, X_test, y_train, y_test, indices_train, indices_test = train_test_split(X,Y.values.ravel(), indices, test_size=0.5, random_state= 100)
    else:
        X_train, _, y_train, _, indices_train, _ = train_test_split(X,Y.values.ravel(), indices, test_size=0, random_state= 100)
        X_test = predict_tmp.iloc[:,cols] 
        indices_test = X_test.index.tolist()

##### prepare for accuracy estimation
if args['accuracy_estimation']:
    if args['predict'] != args['train']:
        y_test = predict_tmp.iloc[:,mod_col]
        y_test = y_test.values

### SVM train and prediction
kernels = ('linear', 'poly', 'rbf', 'sigmoid')  if args['kernel'] is None else (args['kernel'],)
accuracies = {}
probabilities = defaultdict (list)

if args['model']:
    for _, m in enumerate(args['model'].strip().split(',')):
        loaded_model = pickle.load (open (m,'rb'))
        predictions = loaded_model.predict (X_test)
        outh = open (out_prefix+'.'+os.path.basename (m) + '.csv','w')
        print (old_header + ',prediction'+',dist,ProbM,ProbU',file=outh)
        for t in range (len(indices_test)):
            idx = indices_test[t]  
            original_line = ",".join(map(str, df_tmp.iloc[idx].values)) if args['predict'] == args['train'] else ",".join(map(str, predict_tmp.iloc[idx].values))
            dist = map(str,loaded_model.decision_function([X_test.iloc[t]]))
            probM, probU = map (str,loaded_model.predict_proba ([X_test.iloc[t]])[0])
            print (original_line + ',' + predictions[t] +',' + ",".join (dist) + ',' +  probM + ',' + probU, file = outh)
        if args['accuracy_estimation']:
            for _, kn in enumerate (kernels):
                accuracies[kn] = evaluate_on_test_data(y_test, predictions)
else:
    for _, kn in enumerate (kernels):
        model = svm.SVC(kernel=kn, probability=True)
        model_fit = model.fit (X_train, y_train)
        out_model = out_prefix + '.' + kn + '.model.dump'
        pickle.dump (model,open (out_model,'wb'))
        predictions = model_fit.predict (X_test) #if sys.argv[5] == '0' else model_fit.predict()
        outh = open(out_prefix+'.kernel.' + kn + '.csv','w')
        print (old_header + ',prediction'+',dist,ProbM,ProbU',file=outh)
        for t in range (len(indices_test)):
            idx = indices_test[t]  
            original_line = ",".join(map(str, df_tmp.iloc[idx].values)) if args['predict'] == args['train'] else ",".join(map(str, predict_tmp.iloc[idx].values))
            dist = map(str,model.decision_function([X_test.iloc[t]]))
            probM, probU = map (str,model.predict_proba ([X_test.iloc[t]])[0])
            print (original_line + ',' + predictions[t] +',' + ",".join (dist) + ',' +  probM + ',' + probU, file = outh)
        if args['accuracy_estimation']:
            accuracies[kn] = evaluate_on_test_data(y_test, predictions)
        
if args['accuracy_estimation']:
    acc_sort = sorted (accuracies.items(), key = lambda kv:kv[1])
    best_kn  = acc_sort[-1][0]
    best_acc = acc_sort[-1][1]
    best_prediciton = out_prefix + '.best-kernel.' + best_kn + '.accuracy' 
    print ("Best accuracy {} %  obtained with kernel = {}".format(best_acc,best_kn), file = open (best_prediciton,'w'))
    del accuracies[best_kn]
    for k,v in accuracies.items ():
        print (" {} % accuracy obtained with kernel = {}".format(v,k), file = open (best_prediciton,'a'))
############# remove prdiciton results based on kernels that are less accurate ######
    for _, kn in enumerate(kernels):
        if kn != best_kn:
            if os.path.isfile (out_prefix +'.kernel.'+kn + '.csv'):
                os.remove(out_prefix +'.kernel.'+kn + '.csv')

# docopt 
代码的最开头使用 """ """文档注释的形式写出符合要求的文档，就会自动生成对应的parse

## 注释文档的格式
> Usage：和一个空行之间的文本都会被识别为一个命令组合
> #Usage 后的第一个字母将会被识别为这个程序的名字

> Options：可选项 

## 示例：12306_ticket 

In [None]:
"""
Usage:
    tickets [-gdtkz] <from> <to> <date>

Options:
    -h,--help   显示帮助菜单
    -g          高铁
    -d          动车
    -t          特快
    -k          快速
    -z          直达

Example:
    tickets 北京 上海 2016-10-10
    tickets -dg 成都 南京 2016-10-10
"""
"""
https://kyfw.12306.cn/otn/leftTicket/queryO?leftTicketDTO.train_date=2018-04-05&leftTicketDTO.from_station=OMH&leftTicketDTO.to_station=NKH&purpose_codes=ADULT
四个参数：出发日期，起始站，终点站，票类型
"""

from docopt import docopt
import requests
from prettytable import PrettyTable
from colorama import init, Fore
import re

init(autoreset=True)   # 通过使用autoreset参数可以让变色效果只对当前输出起作用，输出完成后颜色恢复默认设置

station_url = 'https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9050'
stations = dict(re.findall('([\u4e00-\u9fa5]+)\|([A-Z]+)', requests.get(station_url).text))

class TrainsCollection:
    header = '车次 车站 时间 历时 一等 二等 高级软卧 软卧 硬卧 硬座 无座'.split()

    def __init__(self, available_trains,available_place, options):
        """查询的火车班次集合
        :param available_trains: 一个列表, 包含可获得的火车班次, 每个
                                 火车班次是一个字典
        :param options: 查询的选项, 如高铁, 动车, etc...
        """
        self.available_trains = available_trains
        self.available_place = available_place
        self.options = options

    @property
    def trains(self):
        for raw_train in self.available_trains:
            raw_train_list = raw_train.split('|')
            train_no = raw_train_list[3]
            initial = train_no[0].lower()   #哪种列车
            duration = raw_train_list[10]
            if not self.options or initial in self.options:
                train = [
                    train_no,# 车次
                    '\n'.join([Fore.LIGHTGREEN_EX + self.available_place[raw_train_list[6]],
                               Fore.LIGHTRED_EX + self.available_place[raw_train_list[7]]]),  #车站
                    '\n'.join([Fore.LIGHTGREEN_EX + raw_train_list[8],      #时间
                               Fore.LIGHTRED_EX + raw_train_list[9]]),
                    duration,                                                             #历时
                    raw_train_list[-6] if raw_train_list[-6] else '--',                 #一等座
                    raw_train_list[-7] if raw_train_list[-7] else '--',                 #二等座
                    raw_train_list[-16] if raw_train_list[-16] else '--',               #高级软卧
                    raw_train_list[-14] if raw_train_list[-14] else '--',                 #软卧
                    raw_train_list[-9] if raw_train_list[-9] else '--',               #硬卧
                    raw_train_list[-8] if raw_train_list[-8] else '--',               #硬座
                    raw_train_list[-11] if raw_train_list[-11] else '--',                 #无座
                ]
                yield train

    def pretty_print(self):
        pt = PrettyTable()
        pt._set_field_names(self.header)
        for train in self.trains:
            pt.add_row(train)
        print(pt)


def cli():
    arguments = docopt(__doc__)
    from_station = stations.get(arguments['<from>'])
    to_station = stations.get(arguments['<to>'])
    date = arguments['<date>']

    url = ('https://kyfw.12306.cn/otn/leftTicket/queryO?'
           'leftTicketDTO.train_date={}&'
           'leftTicketDTO.from_station={}&leftTicketDTO.to_station={}&purpose_codes=ADULT').format(
                date, from_station, to_station
           )
    r = requests.get(url)
    available_trains = r.json()['data']['result']
    available_place = r.json()['data']['map']
    options = ''.join([
        key for key, value in arguments.items() if value is True
    ])
    TrainsCollection(available_trains,available_place, options).pretty_print()


if __name__ == '__main__':
    cli()

# getpass 安全的密码输入

In [None]:
import getpass

passwd=getpass.getpass("请输入密码：")

# configparser 配置文件操作
由一个或多个命名的节组成，每个节可以包含带有名称和值的单个选项

;或＃开头的行被视为注释

## 读取配置文件

In [1]:
from configparser import ConfigParser

parser = ConfigParser()
parser.read('C:/Users/86188/PythonPackages/data/config.ini')

['C:/Users/86188/PythonPackages/data/config.ini']

In [2]:
parser.get('bug_tracker', 'url')

'http://localhost:8080/bugs/'

### read() 方法也接受文件名列表
依次扫描每个名称，如果文件存在，则将其打开并读取

In [None]:
from configparser import ConfigParser
import glob

parser = ConfigParser()

candidates = ['does_not_exist.ini', 'also-does-not-exist.ini',
              'simple.ini', 'multisection.ini']

found = parser.read(candidates)

missing = set(candidates) - set(found)

print('Found config files:', sorted(found))
print('Missing files     :', sorted(missing))

## 访问配置设置
ections()、options()和items()

In [3]:
from configparser import ConfigParser

parser = ConfigParser()
parser.read('C:/Users/86188/PythonPackages/data/config.ini')

for section_name in parser.sections():
    print('Section:', section_name)
    print('  Options:', parser.options(section_name))
    for name, value in parser.items(section_name):
        print('  {} = {}'.format(name, value))
    print()

Section: bug_tracker
  Options: ['url', 'username', 'password']
  url = http://localhost:8080/bugs/
  username = dhellmann
  password = SECRET

Section: wiki
  Options: ['url', 'username', 'password']
  url = http://localhost:8080/wiki/
  username = dhellmann
  password = SECRET



### 测试值是否存在
has_section()、has_option()

In [4]:
from configparser import ConfigParser

parser = ConfigParser()
parser.read('C:/Users/86188/PythonPackages/data/config.ini')

for candidate in ['wiki', 'bug_tracker', 'dvcs']:
    print('{:<12}: {}'.format(
        candidate, parser.has_section(candidate)))

wiki        : True
bug_tracker : True
dvcs        : False


## 值类型
get：字符串

getint：整数

getfloat：浮点数

getboolean：布尔值

In [5]:
from configparser import ConfigParser

parser = ConfigParser()
parser.read('C:/Users/86188/PythonPackages/data/types.ini')

print('Integers:')
for name in parser.options('ints'):
    string_value = parser.get('ints', name)
    value = parser.getint('ints', name)
    print('  {:<12} : {!r:<7} -> {}'.format(
        name, string_value, value))

print('.Floats:')
for name in parser.options('floats'):
    string_value = parser.get('floats', name)
    value = parser.getfloat('floats', name)
    print('  {:<12} : {!r:<7} -> {:0.2f}'.format(
        name, string_value, value))

print('.Booleans:')
for name in parser.options('booleans'):
    string_value = parser.get('booleans', name)
    value = parser.getboolean('booleans', name)
    print('  {:<12} : {!r:<7} -> {}'.format(
        name, string_value, value))

Integers:
  positive     : '1'     -> 1
  negative     : '-5'    -> -5
.Floats:
  positive     : '0.2'   -> 0.20
  negative     : '-3.14' -> -3.14
.Booleans:
  number_true  : '1'     -> True
  number_false : '0'     -> False
  yn_true      : 'yes'   -> True
  yn_false     : 'no'    -> False
  tf_true      : 'true'  -> True
  tf_false     : 'false' -> False
  onoff_true   : 'on'    -> True
  onoff_false  : 'false' -> False


## 修改设置

### 添加
add_section：创建新节

set：添加或更改选项

In [7]:
from configparser import ConfigParser

parser = ConfigParser()

parser.add_section('bug_tracker')
parser.set('bug_tracker', 'url', 'http://localhost:8080/bugs')
parser.set('bug_tracker', 'username', 'dhellmann')
parser.set('bug_tracker', 'password', 'secret')

for section in parser.sections():
    print(section)
    for name, value in parser.items(section):
        print('  {} = {!r}'.format(name, value))

bug_tracker
  url = 'http://localhost:8080/bugs'
  username = 'dhellmann'
  password = 'secret'


### 删除
remove_section

remove_option

In [8]:
from configparser import ConfigParser

parser = ConfigParser()
parser.read('C:/Users/86188/PythonPackages/data/config.ini')

print('Read values:.')
for section in parser.sections():
    print(section)
    for name, value in parser.items(section):
        print('  {} = {!r}'.format(name, value))

parser.remove_option('bug_tracker', 'password')
parser.remove_section('wiki')

print('.Modified values:.')
for section in parser.sections():
    print(section)
    for name, value in parser.items(section):
        print('  {} = {!r}'.format(name, value))

Read values:.
bug_tracker
  url = 'http://localhost:8080/bugs/'
  username = 'dhellmann'
  password = 'SECRET'
wiki
  url = 'http://localhost:8080/wiki/'
  username = 'dhellmann'
  password = 'SECRET'
.Modified values:.
bug_tracker
  url = 'http://localhost:8080/bugs/'
  username = 'dhellmann'


## 保存配置文件
write

In [16]:
from configparser import ConfigParser
import sys

parser = ConfigParser()
parser.read('C:/Users/86188/PythonPackages/data/config.ini')

parser.add_section('tracker')
parser.set('tracker', 'url', 'http://localhost:8080/bugs')
parser.set('tracker', 'username', 'dhellmann')
parser.set('tracker', 'password', 'secret')

with open("C:/Users/86188/PythonPackages/data/config2.ini","w") as f:
    parser.write(f)