In [1]:
# Analyze sentence encodings to make sure nearest neighbors are meaningful

import tensorflow.compat.v1 as tf
import numpy as np
import sys
import os
import glob
import time

sys.path.append("..")
from bert import optimization
from bert import modeling
from bert import tokenization

bert_paths = {
    "rbtl3": "/Users/zifei/Downloads/chinese-bert/chinese_rbtl3_L-3_H-1024_A-16",
    "rbt-ext": "/Users/zifei/Downloads/chinese-bert/chinese_roberta_wwm_ext_L-12_H-768_A-12",
}
bert_dir = bert_paths["rbtl3"]
tokenizer = tokenization.FullTokenizer(bert_dir + "/vocab.txt", do_lower_case=True)

tf.disable_v2_behavior()

Instructions for updating:
non-resource variables are not supported in the long term


In [115]:
encodings_file = '/Volumes/WDMPBlue/Files/chinese-bert/poem-data//sent_encodings_all.tsv'

# NUM_SENTS = 120000
NUM_SENTS = 2000000  # all (MEM?)
# FILTER BY DYNASTY
lineno = 0
sid_to_emb = {}
for line in open(encodings_file):
    lineno += 1
    if lineno > NUM_SENTS:
        break
        
    sid, emb = line.rstrip().split("\t")
    emb = np.array([float(x) for x in emb.split(",")])
    sid_to_emb[int(sid)] = emb

len(sid_to_emb)

1251299

In [6]:
encodings_file = '/Volumes/WDMPBlue/Files/chinese-bert/poem-data//sent_encodings_all.tsv'

# FILTER BY DYNASTY
DYNASTIES = {"唐"}

# some idxes do not have embeddings?
sid_to_sent = {}

lineno = 0
sents_file = "sents_all.tsv"
for line in open(sents_file):
    lineno += 1
    sid, pid, sent = line.rstrip().split("\t")
    if pid_to_dynasty[int(pid)] not in DYNASTIES:
        continue
    sid_to_sent[int(sid)] = (pid, sent)

print("Num sents:", len(sid_to_sent))

# FILTER BY DYNASTY
lineno = 0
sid_to_emb = {}
for line in open(encodings_file):
    lineno += 1
    if lineno % 100000 == 1:
        print("Processing emb line", lineno)
        
    sid, emb = line.rstrip().split("\t")
    if int(sid) not in sid_to_sent:
        continue
        
    emb = np.array([float(x) for x in emb.split(",")])
    sid_to_emb[int(sid)] = emb

print("Num embeddings:", len(sid_to_emb))

Num sents: 91517
Processing emb line 1
Processing emb line 100001
Processing emb line 200001
Processing emb line 300001
Processing emb line 400001
Processing emb line 500001
Processing emb line 600001
Processing emb line 700001
Processing emb line 800001
Processing emb line 900001
Processing emb line 1000001
Processing emb line 1100001
Processing emb line 1200001
Num embeddings: 90808


In [7]:
from sklearn.neighbors import NearestNeighbors

idxes = []
embeddings = []
for i, e in sid_to_emb.items():
    idxes.append(i)
    embeddings.append(e)
X = np.array(embeddings)
nbrs = NearestNeighbors(n_neighbors=10, algorithm='ball_tree').fit(X)

In [8]:
max(idxes), len(idxes)

(412139, 90808)

In [None]:
# some idxes do not have embeddings?
sid_to_sent = {}

max_idxes = max(idxes)
lineno = 0
sents_file = "sents_all.tsv"
for line in open(sents_file):
    lineno += 1
    if lineno > max_idxes:
        break
    sid, pid, sent = line.rstrip().split("\t")
    sid_to_sent[int(sid)] = (pid, sent)

len(sid_to_sent)

In [2]:
# To articles

# some idxes do not have embeddings?
pid_to_poem = {}
pid_to_dynasty = {}

lineno = 0
poems_file = "poems_all.tsv"
for line in open(poems_file):
    parts = line.rstrip().split("\t")
    pid_to_poem[int(parts[0])] = parts[1:]
    pid_to_dynasty[int(parts[0])] = parts[1]

len(pid_to_poem)

128708

In [12]:
def format_poem(pid):
    pid = int(pid)
    p = pid_to_poem[pid]
    return "{} ——{}".format(p[4], p[2])

format_poem(20)

'孤峰高万丈，松下尽流云。天风一来去，松子落纷纷。 ——景星岩（当代·陈伟强）'

In [49]:
len(X)

120000

In [None]:
import random
random.seed(1)

# Similar sents??
sample_size = 30
sample = random.sample(sid_to_emb.keys(), sample_size)

Q = [sid_to_emb[sid] for sid in sample]
all_distances, all_indices = nbrs.kneighbors(Q)

for sid, distances, indices in zip(sample, all_distances, all_indices):
    sent = sid_to_sent[sid]
    print("{}/{}: {}  Orig: {}".format(sid, sent[0], sent[1], format_poem(sent[0])))
    for d, i in zip(distances, indices):
        idx = idxes[i]
        sent = sid_to_sent[idx]
        print("- {} (idx = {}/{}, dist = {:.3f}). Orig: {}".format(sent[1], idx, sent[0], d, format_poem(sent[0])))
    print("")


In [22]:
# Dist threshold = 4

def print_similar(sample, dist_threshold=4.0):
    Q = [sid_to_emb[sid] for sid in sample]
    all_distances, all_indices = nbrs.kneighbors(Q)

    for sid, distances, indices in zip(sample, all_distances, all_indices):
        if all(d > dist_threshold or d == 0.0 for d in distances):
            continue
        sent = sid_to_sent[sid]
        print("{}/{}: {}".format(sid, sent[0], sent[1]))
        for d, i in zip(distances, indices):
            if d == 0.0:
                continue
            if d > dist_threshold:
                break
            idx = idxes[i]
            sent = sid_to_sent[idx]
            print("- {} (idx = {}/{}, dist = {:.3f})".format(sent[1], idx, sent[0], d))
        print("")


In [25]:
max(sid_to_sent.keys())

412139

In [56]:
random.seed(1)
sample_size = 50
sample = random.sample(sid_to_emb.keys(), sample_size)
print_similar(sample)


17749/1918: 篱前袒腹数飞鸿。
- 颠连人俟大河清。 (idx = 875/98, dist = 3.260)
- 白蘋洲畔欲移家。 (idx = 98817/11095, dist = 3.413)
- 帆樯两岸阴。 (idx = 79630/8892, dist = 3.663)
- 黄鹄天边并翼难。 (idx = 32909/3626, dist = 3.666)
- 芙蓉蹙沓倚檐多。 (idx = 72727/8105, dist = 3.671)
- 猿狖声多入棹哀。 (idx = 62888/7067, dist = 3.677)
- 画舫移歌入渺瀰。 (idx = 63862/7171, dist = 3.680)
- 长安放衙初岸帻。 (idx = 44824/5018, dist = 3.722)
- 残霭日犹扶。 (idx = 22635/2483, dist = 3.768)

105976/11907: 才见黄花放几枝。
- 又见黄花满径开。 (idx = 110710/12452, dist = 3.282)

100892/11336: 秋光祖席送孤蓬。
- 僧舍春寒忆酒旗。 (idx = 31701/3481, dist = 3.947)
- 清燕遥传葛令砂。 (idx = 40987/4576, dist = 3.948)

8336/884: 此道素相期。
- 此日重分茅。 (idx = 31486/3455, dist = 3.912)

15576/1646: 无月无风无雨。
- 无时无处无风雨。 (idx = 12206/1276, dist = 3.920)

100520/11295: 驽骑胜鸣驺。
- 螮蝀欲飞虹。 (idx = 115602/12997, dist = 3.714)
- 俛仰唐虞际。 (idx = 40386/4506, dist = 3.865)

59376/6649: 餐霞几百秋。
- 风送新凉。 (idx = 559/64, dist = 3.826)
- 波晚浴明霞。 (idx = 5507/621, dist = 3.919)
- 天上红云动。 (idx = 96041/10769, dist = 3.967)

62382/7008: 此地几登历。
- 所志在严壑。 (idx =

In [26]:
# TANG only
import random
random.seed(1)
sample_size = 50
sample = random.sample(sid_to_emb.keys(), sample_size)
print_similar(sample)


338372/37531: 来信应无已，
- 怀情方未已， (idx = 323079/35768, dist = 3.594)
- 至理无言了， (idx = 349385/38859, dist = 3.692)
- 所思终不来， (idx = 383478/43048, dist = 3.718)
- 握手言未毕， (idx = 407592/45913, dist = 3.811)
- 结言本同心， (idx = 409681/46156, dist = 3.852)
- 终当来其滨， (idx = 368772/41227, dist = 3.964)

395812/44521: 御楼初见赭黄衣。
- 溪笺惹御香。 (idx = 398385/44819, dist = 3.562)
- 徒见浦花繁。 (idx = 391791/44019, dist = 3.730)
- 醉倚王家玳瑁筵。 (idx = 361274/40358, dist = 3.802)
- 御苑听残莺。 (idx = 410654/46264, dist = 3.892)
- 愁杀陇头人。 (idx = 370787/41462, dist = 3.893)
- 秋庭惟见长莓苔。 (idx = 344217/38277, dist = 3.958)
- 黄花戍上雁长飞。 (idx = 355145/39580, dist = 3.962)
- 金銮徒候白榆风。 (idx = 384550/43174, dist = 3.964)
- 莺语禁林春。 (idx = 387680/43534, dist = 3.979)

354316/39477: 香名播宋朝。
- 西汉祖筵开。 (idx = 363217/40596, dist = 3.485)
- 雪中朝海神。 (idx = 410528/46245, dist = 3.511)
- 锡号纪鸿名。 (idx = 369149/41267, dist = 3.618)
- 世名谭子池。 (idx = 410961/46303, dist = 3.623)
- 李陵音信稀。 (idx = 340435/37792, dist = 3.719)
- 鹓鹭集朝伦。 (idx = 333651/37050, dist = 3.780)
-

In [9]:
# Try to encode an arbitrary sentence and search for poems
bert_config = modeling.BertConfig.from_json_file(bert_dir + "/bert_config_rbtl3.json")

def create_placeholders(batch_size=None, seq_length=16):
    features = {
        "input_ids": tf.placeholder(tf.int32, [batch_size, seq_length]),
        "input_mask": tf.placeholder(tf.int32, [batch_size, seq_length]),
    }
    return features

features = create_placeholders()

# CREATE BERT MODEL

model = modeling.BertModel(
  config=bert_config,
  is_training=False,
  input_ids=features["input_ids"],
  input_mask=features["input_mask"],
  token_type_ids=None,
  use_one_hot_embeddings=False)

pooled_output = model.get_pooled_output()


init_checkpoint = bert_dir + '/bert_model.ckpt'
use_tpu = False

tvars = tf.trainable_variables()
scaffold_fn = None
(assignment_map,
 initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(
     tvars, init_checkpoint)

# HACK for keras LayerNorm
for k in list(assignment_map.keys()):
    if k.endswith("/gamma"):
        del assignment_map[k]
    if k.endswith("/beta"):
        assignment_map[k[:-4]] = k[:-4]
        del assignment_map[k]
        
if use_tpu:
  def tpu_scaffold():
    tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
    return tf.train.Scaffold()
  scaffold_fn = tpu_scaffold
else:
  tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

tf.logging.info("**** Trainable Variables ****")
for var in tvars:
  init_string = ""
  if var.name in initialized_variable_names:
    init_string = ", *INIT_FROM_CKPT*"
  print("  name = %s, shape = %s%s" % (var.name, var.shape, init_string))


sess = tf.Session()
sess.run(tf.global_variables_initializer())

def tokenize(sents_batch, seq_len=16):
    batch_ids = []
    batch_masks = []
    for s in sents_batch:
        tokens = ["[CLS]"] + tokenizer.tokenize(s) + ["[SEP]"]
        ids = tokenizer.convert_tokens_to_ids(tokens)
        ids = ids[:seq_len]
        ids = ids + [0] * (seq_len - len(ids))
        masks = [int(i>0) for i in ids]
        batch_ids.append(ids)
        batch_masks.append(masks)
    return batch_ids, batch_masks


Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use keras.layers.Dense instead.
Instructions for updating:
Please use `layer.__call__` method instead.
INFO:tensorflow:**** Trainable Variables ****
  name = bert/embeddings/word_embeddings:0, shape = (21128, 1024), *INIT_FROM_CKPT*
  name = bert/embeddings/token_type_embeddings:0, shape = (2, 1024), *INIT_FROM_CKPT*
  name = bert/embeddings/position_embeddings:0, shape = (512, 1024), *INIT_FROM_CKPT*
  name = bert/embeddings/LayerNorm/gamma:0, shape = (1024,), *INIT_FROM_CKPT*
  name = bert/embeddings/LayerNorm/beta:0, shape = (1024,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_0/attention/self/query/kernel:0, shape = (1024, 1024), *INIT_FROM_CKPT*
  name = bert/encoder/layer_0/attention/self/query/bias:0, shape = (1024,), *INIT_FROM_CKPT*
  name = bert/encoder/layer_0/attention/self/key/kernel:0, shape = (1024, 1024), *INIT_FROM_CKPT*
  name = bert/encoder/layer_0/attention/s

In [10]:
def retrieve_by_sentence(sentence, verbose=True):
    ids, masks = tokenize([sentence])
    print(ids[0])
    print(tokenizer.convert_ids_to_tokens(ids[0]))
    embs = sess.run(pooled_output, feed_dict={
        features["input_ids"]: ids,
        features["input_mask"]: masks,
    })
    emb = embs[0]
    distances, indices = nbrs.kneighbors([emb])
    distances, indices = distances[0], indices[0]
    for d, i in zip(distances, indices):
        idx = idxes[i]
        sent = sid_to_sent[idx]
        print("- {} (idx = {}/{}, dist = {:.3f})".format(sent[1], idx, sent[0], d))
        if verbose: print("\t- {}".format(format_poem(sent[0])))


In [30]:
retrieve_by_sentence("洛阳亲友如相问，")  # tang only

[101, 3821, 7345, 779, 1351, 1963, 4685, 7309, 8024, 102, 0, 0, 0, 0, 0, 0]
['[CLS]', '洛', '阳', '亲', '友', '如', '相', '问', '，', '[SEP]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]']
- 邻舍女郎相借问， (idx = 388793/43668, dist = 4.444)
	- 暖日闲窗映碧纱，小池春水浸晴霞。数树海棠红欲尽，争忍，玉闺深掩过年华。独凭绣床方寸乱，肠断，泪珠穿破脸边花。邻舍女郎相借问，音信，教人羞道未还家。 ——定风波（唐·欧阳炯）
- 深谢名贤远相访， (idx = 362576/40513, dist = 4.496)
	- 罢修儒业罢修真，养拙藏愚春复春。到老不疏林里鹿，平生难见日边人。洞桃深处千林锦，岩雪铺时万草新。深谢名贤远相访，求闻难博凤为邻。 ——荅常学士（唐·李梦符）
- 思君一相访， (idx = 375629/42082, dist = 4.599)
	- 车马长安道，谁知大隐心。蛮僧留古镜，蜀客寄新琴。晒药竹斋暖，捣茶松院深。思君一相访，残雪似山阴。 ——寻戴处士（唐·许浑）
- 众知圣主搜贤相， (idx = 392801/44143, dist = 4.726)
	- 彩笔曾专造化权，道尊翻向宦途闲。端居有地唯栽药，静坐无时不忆山。德望旧悬霄汉外，政声新溢路歧间。众知圣主搜贤相，朝夕欲徵黄霸还。 ——随州献李侍御二首（唐·姚鹄）
- 南阳葛亮为友朋， (idx = 326901/36205, dist = 4.766)
	- 杜拾遗，名甫第二才甚奇。任生与君别，别来已多时，何尝一日不相思。杜拾遗，知不知，昨日有人诵得数篇黄绢词。吾怪异奇特借问，果然（一本无然字）称是杜二之所为。势攫虎豹，气腾蛟螭。沧海无风似鼓荡，华岳平地欲奔驰。曹刘俯仰惭大敌，沈谢逡巡称小儿。昔在帝城中，盛名君一个。诸人见所作，无不心胆破。郎官丛里作狂歌，丞相阁中常醉卧。前年皇帝归长安，承恩阔步青云端。积翠扈游花匼匝，披香寓直月团栾。英才特达承天眷，公卿无（一作谁）不相钦慕。只缘汲黯好直言，遂使安仁却为掾。如今避地锦城隅，幕下英僚每日相随（一作就）提玉壶。半醉起舞捋

In [112]:
retrieve_by_sentence("多情没法追。")

[101, 1914, 2658, 3766, 3791, 6841, 511, 102, 0, 0, 0, 0, 0, 0, 0, 0]
['[CLS]', '多', '情', '没', '法', '追', '。', '[SEP]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]']
- 多情无计追。 (idx = 114155/12829, dist = 3.618)
	- 谩说春归去，多情无计追。胜游须秉烛，余兴独临池。鸟怨花飞急，杯嫌月到迟。三眠杨柳足，正是浴蚕时。 ——春去（明·蔡羽）
- 奇情不易收。 (idx = 78089/8713, dist = 3.989)
	- 高深积气浮，水石怒相求。胜绝频宜顾，奇情不易收。苍凉难久立，浩荡复谁留。诗思江天涌，春云满益州。 ——过巫峡（明·许国佐）
- 力学未能忘。 (idx = 725/79, dist = 4.019)
	- 酒绿灯红夜，全家笑语忙。市声喧爆竹，庭籁掩笙簧。盘颂椒花瑞，杯传柏叶香。明朝贺元日，力学未能忘。 ——除夜（民国末当代初·吴未淳）
- 人心未可猜。 (idx = 46969/5262, dist = 4.160)
	- 绝顶南山上，斯楼亦壮哉！群峰排戟立，一水抱城来。地利虽云险，人心未可猜。若非得良牧，剑阁也罹灾。 ——登来熏楼（明·叶元玉）
- 临发复忘还。 (idx = 110552/12429, dist = 4.204)
	- 一道珠帘水，长悬苍翠间。冷风吹白日，急雨响空山。石濑林端齿，潭清镜里颜。采奇欣共赋，临发复忘还。 ——水帘飞瀑（明·薛侃）
- 私情可奈何。 (idx = 65715/7386, dist = 4.268)
	- 危崖通绝磴，枯木挂长萝。怀古夜郎道，劳神滴泪坡。阴晴咫尺异，冰雪涧沟多。岁暮仍行役，私情可奈何。 ——滴泪坡（明·王绅）
- 邀凉不下除。 (idx = 49682/5561, dist = 4.301)
	- 杖策寻丹壑，郊原正雨馀。老怀时自适，人事故相须。见月多佳句，邀凉不下除。白头长念我，还缉泮西庐。 ——次韵莫锦衣见月有怀（明·区越）
- 怕见问行藏。 (idx = 3787/438, dist = 4.315)
	- 枕上容予懒，春回昼渐

In [114]:
retrieve_by_sentence("仗剑离家方年少，")

[101, 801, 1187, 4895, 2157, 3175, 2399, 2208, 8024, 102, 0, 0, 0, 0, 0, 0]
['[CLS]', '仗', '剑', '离', '家', '方', '年', '少', '，', '[SEP]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]', '[PAD]']
- 仗剑出门年正少， (idx = 118213/13270, dist = 2.937)
	- 板扉泥径草深深。没人寻。有蛩吟。记得炙鸡絮酒、共谈心。仗剑出门年正少，家不顾，陆将沉。白头老母尚于今。伴荆簪。守寒砧。恨望长沙子弟、渡湘阴。料想令威仍化鹤，风月夜，返空林。 ——江城子 堵寅叔故居（明末清初·张夏）
- 百年事过风前烛， (idx = 112220/12599, dist = 3.235)
	- 欲向西风酹一樽，乾坤何处著英魂。百年事过风前烛，千古名传海上村。香火半龛谁地主，孙枝一叶是君恩。夕阳满地伤心泪，付与江流自吐吞。 ——谒侯城里有感二首 其二（明·谢铎）
- 抱瓮百年同过客， (idx = 6394/720, dist = 3.314)
	- 抱瓮百年同过客，稽天大浸入穷途。桃源恍隔人间世，蝶梦亲窥镜底吾。尚想中山千日醉，不辞业海一身孤。艰难怀药劳洴澼，敢虑瓠尊泛五湖。 ——坐叹叠归舟韵（近现代末当代初·张北海）
- 行迹中年半近妖， (idx = 16956/1830, dist = 3.589)
	- 行迹中年半近妖，黄金随手旧时豪。楚辞一卷和谐换，亦有风情似六朝。 ——自题小像（当代·姜军）
- 人家寒闭户， (idx = 52646/5874, dist = 3.654)
	- 主人閒馆肃，此日一阳生。树带葳蕤白，池含凛冽清。人家寒闭户，山县早关城。野衲复何事，烧炉向到明。 ——长至日题曾某馆壁（明·释今沼）
- 酒坐莫有少年客， (idx = 66278/7420, dist = 3.669)
	- 海天寒馆霜作威，指冠短发不受吹。老生未肯自称老，知我已非少年时。君少于予才二岁，四十封拜亦已迟。况俱失路乡里中，子犹豪健吾已衰。丈夫不能与世选，喙长三尺何处开。蹙缩眼眶髡须眉，我学蝇声控舌语，世人听之曰太奇。我欲作贼，贼不可为。我欲登仙，肉重难飞。我将为今世之文士，今世

In [None]:
# Sample in 40k

In [39]:
import random
random.seed(1)

# Similar sents??
sample_size = 10
sample = random.sample(sid_to_emb.keys(), sample_size)

Q = [sid_to_emb[sid] for sid in sample]
all_distances, all_indices = nbrs.kneighbors(Q)

for sid, distances, indices in zip(sample, all_distances, all_indices):
    sent = sid_to_sent[sid]
    print("{}/{}: {}".format(sid, sent[0], sent[1]))
    for d, i in zip(distances, indices):
        idx = idxes[i]
        sent = sid_to_sent[idx]
        print("- {} (idx = {}/{}, dist = {:.3f})".format(sent[1], idx, sent[0], d))
    print("")


8874/953: 旧盟断比红蕊，
- 旧盟断比红蕊， (idx = 8874/953, dist = 0.000)
- 绕舍乌成阵， (idx = 33313/3682, dist = 4.653)
- 露华向靳金茎下， (idx = 26958/2975, dist = 4.660)
- 两雏双白璧， (idx = 37961/4212, dist = 4.920)
- 履世能销几緉屐， (idx = 13210/1390, dist = 4.994)
- 绿径摇金菊， (idx = 11294/1166, dist = 5.015)
- 金波忽向杯中堕， (idx = 26968/2976, dist = 5.024)
- 两琴颉颃相荡摩， (idx = 16144/1715, dist = 5.066)
- 腐儒此夕南冠坐， (idx = 16038/1696, dist = 5.134)
- 旧札看残烛， (idx = 25829/2839, dist = 5.169)

37595/4172: 谁怜采凤阻千寻。
- 谁怜采凤阻千寻。 (idx = 37595/4172, dist = 0.000)
- 岂让革囊渡。 (idx = 7817/831, dist = 4.408)
- 檐雀啼风未忍闻。 (idx = 1087/132, dist = 4.561)
- 谁似暂幽沉。 (idx = 35185/3896, dist = 4.585)
- 青萍荡碧船自归。 (idx = 8911/954, dist = 4.617)
- 翠袖愁添竹露寒。 (idx = 8424/904, dist = 4.627)
- 暂逢幽赏莫踟蹰。 (idx = 30688/3362, dist = 4.677)
- 谁如朱祐功。 (idx = 21639/2379, dist = 4.680)
- 王孙春草竟如何。 (idx = 31963/3516, dist = 4.683)
- 岂堪攀折赠离人。 (idx = 24435/2671, dist = 4.700)

4168/464: 任重路迢递。
- 任重路迢递。 (idx = 4168/464, dist = 0.000)
- 修褉永和同。 (idx = 18427/2007, dist = 4.068)
- 一剑自

## Sample of similar sentences 

8874/953: 旧盟断比红蕊，
- 旧盟断比红蕊， (idx = 8874/953, dist = 0.000)
- 绕舍乌成阵， (idx = 33313/3682, dist = 4.653)
- 露华向靳金茎下， (idx = 26958/2975, dist = 4.660)
- 两雏双白璧， (idx = 37961/4212, dist = 4.920)
- 履世能销几緉屐， (idx = 13210/1390, dist = 4.994)
- 绿径摇金菊， (idx = 11294/1166, dist = 5.015)
- 金波忽向杯中堕， (idx = 26968/2976, dist = 5.024)
- 两琴颉颃相荡摩， (idx = 16144/1715, dist = 5.066)
- 腐儒此夕南冠坐， (idx = 16038/1696, dist = 5.134)
- 旧札看残烛， (idx = 25829/2839, dist = 5.169)

37595/4172: 谁怜采凤阻千寻。
- 谁怜采凤阻千寻。 (idx = 37595/4172, dist = 0.000)
- 岂让革囊渡。 (idx = 7817/831, dist = 4.408)
- 檐雀啼风未忍闻。 (idx = 1087/132, dist = 4.561)
- 谁似暂幽沉。 (idx = 35185/3896, dist = 4.585)
- 青萍荡碧船自归。 (idx = 8911/954, dist = 4.617)
- 翠袖愁添竹露寒。 (idx = 8424/904, dist = 4.627)
- 暂逢幽赏莫踟蹰。 (idx = 30688/3362, dist = 4.677)
- 谁如朱祐功。 (idx = 21639/2379, dist = 4.680)
- 王孙春草竟如何。 (idx = 31963/3516, dist = 4.683)
- 岂堪攀折赠离人。 (idx = 24435/2671, dist = 4.700)

4168/464: 任重路迢递。
- 任重路迢递。 (idx = 4168/464, dist = 0.000)
- 修褉永和同。 (idx = 18427/2007, dist = 4.068)
- 一剑自能任。 (idx = 39796/4444, dist = 4.112)
- 总负春归。 (idx = 11068/1148, dist = 4.301)
- 抗志罕所同。 (idx = 22445/2470, dist = 4.428)
- 应许仲由仁。 (idx = 16023/1694, dist = 4.437)
- 路途任两歧。 (idx = 8661/924, dist = 4.483)
- 路途任两歧。 (idx = 8334/884, dist = 4.483)
- 总波心易逝。 (idx = 9054/963, dist = 4.565)
- 鲁连安与论。 (idx = 2001/239, dist = 4.570)

16847/1812: 累息有危民。
- 累息有危民。 (idx = 16847/1812, dist = 0.000)
- 星多失地球。 (idx = 16402/1745, dist = 5.335)
- 有所丰收有所贫。 (idx = 37766/4188, dist = 5.413)
- 安危人定。 (idx = 3191/366, dist = 5.479)
- 人事有消亡。 (idx = 29401/3206, dist = 5.511)
- 都缘保障为斯民。 (idx = 32429/3560, dist = 5.533)
- 有约共沧洲。 (idx = 39165/4368, dist = 5.645)
- 消损。 (idx = 10936/1140, dist = 5.665)
- 经年良约负湔裙。 (idx = 7632/821, dist = 5.668)
- 新有脱贫村。 (idx = 12281/1285, dist = 5.689)

7788/830: 不妨唯诺。
- 不妨唯诺。 (idx = 7788/830, dist = 0.000)
- 服药未还童。 (idx = 18156/1980, dist = 5.144)
- 未妨同醉明月。 (idx = 15628/1649, dist = 5.227)
- 于兹莫不同。 (idx = 31809/3494, dist = 5.252)
- 能否换君愉。 (idx = 13897/1472, dist = 5.297)
- 堪问。 (idx = 9210/974, dist = 5.411)
- 旅况复何如。 (idx = 21769/2397, dist = 5.419)
- 不用再相陈。 (idx = 39356/4390, dist = 5.555)
- 轻若未能担。 (idx = 9844/1040, dist = 5.602)
- 经时谢应酬。 (idx = 38767/4312, dist = 5.686)

32722/3600: 冠制喜从周礼后，
- 冠制喜从周礼后， (idx = 32722/3600, dist = 0.000)
- 月下衣冠出， (idx = 26667/2938, dist = 3.876)
- 眼底白云来次第， (idx = 4633/510, dist = 4.142)
- 千里之行始足下， (idx = 22598/2479, dist = 4.195)
- 谈经依壁水， (idx = 24064/2624, dist = 4.257)
- 一片入中心， (idx = 15379/1622, dist = 4.260)
- 夙将三礼魁天下， (idx = 21632/2378, dist = 4.270)
- 明发成何计， (idx = 37323/4142, dist = 4.281)
- 灯负春前约， (idx = 32090/3532, dist = 4.283)
- 共对花前月下时， (idx = 18233/1990, dist = 4.322)

29688/3240: 翠岭连双驿，
- 翠岭连双驿， (idx = 29688/3240, dist = 0.000)
- 淄水穿原绿， (idx = 29420/3208, dist = 4.684)
- 江山晴入画， (idx = 11348/1173, dist = 4.700)
- 岛上田横客， (idx = 31730/3485, dist = 4.715)
- 山川绕红蓼， (idx = 39875/4454, dist = 4.744)
- 九州圆似掌， (idx = 16441/1750, dist = 4.750)
- 寒山到处客双屐， (idx = 30022/3284, dist = 4.756)
- 桃叶渡头三载客， (idx = 24610/2690, dist = 4.768)
- 别馆瀛洲丽， (idx = 33494/3706, dist = 4.794)
- 有鹤南飞新制曲， (idx = 11215/1156, dist = 4.807)

31191/3420: 花边小几听禽坐，
- 花边小几听禽坐， (idx = 31191/3420, dist = 0.000)
- 醉抱白头灯影下， (idx = 29064/3165, dist = 3.962)
- 送目雁边几忘世， (idx = 13202/1389, dist = 4.045)
- 携酒江边吹笛坐， (idx = 32820/3614, dist = 4.088)
- 弯弓月倚阑干上， (idx = 27192/3001, dist = 4.258)
- 手里支藤长过肩， (idx = 36624/4059, dist = 4.287)
- 离堆茗寮外， (idx = 17144/1854, dist = 4.305)
- 偶来空洞坐， (idx = 30867/3386, dist = 4.317)
- 轻寒搁住天边梦， (idx = 13024/1368, dist = 4.346)
- 掩卷山窗下， (idx = 25675/2819, dist = 4.362)

25073/2748: 猿啸惊还乱。
- 猿啸惊还乱。 (idx = 25073/2748, dist = 0.000)
- 薰风萦袖暗相随。 (idx = 1077/130, dist = 4.361)
- 怪蜃嘘云。 (idx = 4319/470, dist = 4.397)
- 竟夕倚秋声。 (idx = 36415/4032, dist = 4.494)
- 黉宇凄迷夜欲阑。 (idx = 9541/1010, dist = 4.560)
- 高春逼暮容。 (idx = 25868/2843, dist = 4.609)
- 两鬓禁愁短又苍。 (idx = 34530/3823, dist = 4.617)
- 心荒劫后沙。 (idx = 10284/1086, dist = 4.653)
- 涛怒太湖狂。 (idx = 12630/1327, dist = 4.729)
- 涛怒乱帆收。 (idx = 36667/4066, dist = 4.739)

13867/1469: 北照紫荆关。
- 北照紫荆关。 (idx = 13867/1469, dist = 0.000)
- 南客话寒天。 (idx = 26551/2922, dist = 3.385)
- 分水向彭城。 (idx = 38424/4269, dist = 3.634)
- 春星照剑明。 (idx = 39676/4428, dist = 3.756)
- 林下清溪冷。 (idx = 8467/909, dist = 3.758)
- 汤汤下海东。 (idx = 12339/1293, dist = 3.764)
- 五老集西郊。 (idx = 14390/1516, dist = 3.771)
- 黄叶客途赊。 (idx = 31142/3414, dist = 3.790)
- 同归竹林里。 (idx = 33060/3644, dist = 3.819)
- 霜月倚栏干。 (idx = 8358/888, dist = 3.828)
