In [7]:
from omicron import init_db
from jqdatasdk import finance, query
import pickle
init_jq()

In [8]:
def get_fund_list(kind=[402001, 402004]):
    q_ = query(finance.FUND_MAIN_INFO).filter(finance.FUND_MAIN_INFO.underlying_asset_type_id.in_(kind))
    
    dfs = []
    for i in range(100):
        q = q_.limit(3000).offset(i * 3000)
        df = finance.run_query(q)
        if len(df) > 0:
            dfs.append(df)
        else:
            break
    
    return pd.concat(dfs)

def get_fund_codes(funds):
    return funds['main_code'].tolist()

def code2name(fund, code):
    return fund[fund['main_code'] == code].iloc[0]["name"]

def preprocess(all_funds):
    path = os.path.expanduser("~/data/fund.tsv")
    with open(path, "r") as f:
        lines = f.readlines()
        
    records = {}
    
    scope = set(all_funds["main_code"].tolist())
    for line in lines:
        code, sum_value, day, _ = line.split("\t")
        if code == "code" or not (code in scope):
            continue

        name = all_funds[all_funds["main_code"] == code].iloc[0]["name"]

        values = records.get(name, [])
        values.append((day, sum_value))
        records[name] = values
        
    for name, values in records.items():
        records[name] = sorted(values, key = lambda x: x[0])
        
    return records

    
def get_acc_value(code, n):
    q=query(finance.FUND_NET_VALUE).filter(finance.FUND_NET_VALUE.code == code).order_by(finance.FUND_NET_VALUE.day.desc()).limit(n)
    
    df=finance.run_query(q)
    return df['sum_value'].values

def batch_get_acc_value(codes, n):
    results = {}
    
    for code in codes:
        values = get_acc_value(code, n)
        results[code] = values
        
    return results

def fund_values_to_array(fund_records):
    codes = []
    X = []
    
    for code, values in fund_records.items():
        if len(values) < 30 or np.any(np.isnan(values.astype(float))):
            continue
        codes.append(code)
        X.append(values)
    return np.array(codes), np.array(X, dtype="<f4")

def _rank_array(X, W=None, ascending=False):
    """将二维数组X按行进行排序。
    
    先将数组按列求各单元在每列中的排名（从小到大），再按行对各列中的排名进行加权统计
    """
    
    if W is None:
        n = X.shape[1]
        W = np.arange(1,n+1) / (n+1)
        
    df = pd.DataFrame(X)
    rank = df.rank(pct=True).dot(W.transpose())
    
    return rank.sort_values(ascending=ascending)

def rank_fund(all_funds, net_values):
    codes, X = fund_values_to_array(net_values)
    
    rank = _rank_array(X)
    
    return [code2name(all_funds, code) for code in codes[rank.index]]
        
def fetch_fund_values(funds):
    codes = get_fund_codes(funds)
    
    values = batch_get_acc_value(codes, 30)
    return values
    
funds = get_fund_list()
fund_values = fetch_fund_values(funds)
rank_fund(funds, fund_values)

NameError: name 'all_funds' is not defined