In [1]:
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input
import numpy as np
import os

# 加载预训练的VGG16模型，不包括顶层（全连接层）
model = VGG16(weights='imagenet', include_top=False)

# 图像文件夹路径
left_image_folder = 'C:\\Users\\gaoyi\\Desktop\\CV\\assi\\finalproject\\COMP90086_2023_TLLdataset\\test\\left'
right_image_folder = 'C:\\Users\\gaoyi\\Desktop\\CV\\assi\\finalproject\\COMP90086_2023_TLLdataset\\test\\right'

# 初始化列表以保存提取的特征
left_features_list = []
right_features_list = []

# 遍历左图文件夹中的每张图像
for filename in os.listdir(left_image_folder):
    if filename.endswith('.jpg'):
        # 构建左图像路径
        left_image_path = os.path.join(left_image_folder, filename)

        # 加载并预处理左图像
        left_img = image.load_img(left_image_path)
        left_img = image.img_to_array(left_img)
        left_img = np.expand_dims(left_img, axis=0)
        left_img = preprocess_input(left_img)

        # 提取左图像特征
        left_features = model.predict(left_img)
        left_features = left_features.reshape((left_features.shape[0], -1))

        # 将左图像特征添加到列表中
        left_features_list.append(left_features)

# 遍历右图文件夹中的每张图像，采用相同的方法提取特征
for filename in os.listdir(right_image_folder):
    if filename.endswith('.jpg'):
        # 构建右图像路径
        right_image_path = os.path.join(right_image_folder, filename)

        # 加载并预处理右图像
        right_img = image.load_img(right_image_path)
        right_img = image.img_to_array(right_img)
        right_img = np.expand_dims(right_img, axis=0)
        right_img = preprocess_input(right_img)

        # 提取右图像特征
        right_features = model.predict(right_img)
        right_features = right_features.reshape((right_features.shape[0], -1))

        # 将右图像特征添加到列表中
        right_features_list.append(right_features)

# 将左图特征列表和右图特征列表转化为NumPy数组
left_features_array = np.vstack(left_features_list)
right_features_array = np.vstack(right_features_list)

# 打印左图和右图特征数组的形状
print("Left Features Shape:", left_features_array.shape)
print("Right Features Shape:", right_features_array.shape)



















































Left Features Shape: (2000, 21504)
Right Features Shape: (2000, 21504)


In [48]:
left_features_dict = {}
for i, filename in enumerate(os.listdir(left_image_folder)):
    if filename.endswith('.jpg'):
        left_image_name = os.path.splitext(filename)[0]
        left_feature_vector = left_features_array[i]
        left_features_dict[left_image_name] = left_feature_vector

# 创建右图特征字典
right_features_dict = {}
for i, filename in enumerate(os.listdir(right_image_folder)):
    if filename.endswith('.jpg'):
        right_image_name = os.path.splitext(filename)[0]
        right_feature_vector = right_features_array[i]
        right_features_dict[right_image_name] = right_feature_vector

In [2]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [39]:
left_features = left_features_array 
right_features = right_features_array 

In [4]:
similarity_matrix = cosine_similarity(left_features, right_features)

In [11]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

# 假设 left_features 和 right_features 分别是包含特征的列表

# 初始化一个空的列表来存储结果数据
result_data = []

# 遍历每张左图
for i in range(len(left_features)):
    left_image_name = f'left_image_{i}'  # 左图的名字
    left_feature = left_features[i]
    
    # 计算左图与所有右图的余弦相似度
    similarities = cosine_similarity([left_feature], right_features)[0]
    
    # 找到相似度最高的20张右图的索引
    top_indices = np.argsort(similarities)[-20:]
    
    # 获取这些右图的相似度分数
    similarity_scores = similarities[top_indices]
    
    # 构建一行数据，包括左图名字和相似度分数
    data = [left_image_name] + list(similarity_scores)
    
    # 将数据添加到结果数据列表
    result_data.append(data)

# 设定列名
columns = ['left'] + [f'c{i}' for i in range(20)]

# 创建结果DataFrame
result = pd.DataFrame(result_data, columns=columns)

# 保存结果到CSV文件
result.to_csv('similarities.csv', index=False)



In [44]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# 读取包含左图和20张右图信息的CSV文件
input_file = 'C:\\Users\\gaoyi\\Desktop\\CV\\assi\\finalproject\\COMP90086_2023_TLLdataset\\test_candidates.csv'
data = pd.read_csv(input_file)

# 假设左图的特征存储在 left_features 中，右图的特征存储在 right_features 中
left_features = left_features_array
right_features = right_features_array
# 初始化一个空的列表来存储结果数据
result_data = []

# 遍历每行数据，计算左图与20张右图的相似度
for index, row in data.iterrows():
    left_image_name = row['left']  # 左图的名字
    right_image_names = row.drop('left')  # 右图的名字
    
    # 获取左图的特征
    left_feature = left_features[index]
    
    # 获取对应的右图特征
    right_feature_indices = [int(name.split('_')[1]) for name in right_image_names]
    right_features_subset = [right_features[i] for i in right_feature_indices]
    
    # 计算左图与20张右图的余弦相似度
    similarities = cosine_similarity([left_feature], right_features_subset)[0]
    
    # 构建一行数据，包括左图名字和相似度分数
    data = [left_image_name] + list(similarities)
    
    # 将数据添加到结果数据列表
    result_data.append(data)

# 设定列名
columns = ['left'] + [f'c{i}' for i in range(20)]

# 创建结果DataFrame
result = pd.DataFrame(result_data, columns=columns)

# 保存结果到CSV文件
result.to_csv('output_similarity.csv', index=False)


AttributeError: 'numpy.ndarray' object has no attribute 'index'

In [34]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# 读取左图和右图特征数据


# 读取包含左图和右图名称的CSV文件
test_candidates = pd.read_csv("C:\\Users\\gaoyi\\Desktop\\CV\\assi\\finalproject\\COMP90086_2023_TLLdataset\\test_candidates.csv")

# 创建字典将左图和右图名称映射到它们在特征数组中的索引
left_image_name_to_index = {}
right_image_name_to_index = {}

# 填充左图名称到索引的字典，假设左图名称在左图特征中的索引是连续的
for i, left_image_name in enumerate(test_candidates["left"]):
    left_image_name_to_index[left_image_name] = i
    print(f"Left Image Name: {left_image_name}, Left Feature Index: {i}")

# 填充右图名称到索引的字典，假设右图名称在右图特征中的索引是连续的
for i, right_image_name in enumerate(test_candidates.columns[1:]):
    right_image_name_to_index[right_image_name] = i
    print(f"Added {right_image_name} to the dictionary with index {i}")


# 创建一个DataFrame来存储结果
result = pd.DataFrame(columns=["left"] + test_candidates.columns[1:].tolist())

# 遍历每一行，计算相似性并添加到结果DataFrame中
for index, row in test_candidates.iterrows():
    left_image_name = row["left"]
    left_feature_index = left_image_name_to_index[left_image_name]
    
    # 获取左图的特征
    left_feature = left_features[left_feature_index].reshape(1, -1)
    
    # 获取对应的右图名称
    right_image_names = row[1:].tolist()
    
    # 获取对应的右图特征的索引
    right_feature_indices = [right_image_name_to_index[name] for name in right_image_names]
    
    # 获取对应的右图特征
    right_features_subset = right_features[right_feature_indices]
    
    # 计算左图与20张右图的余弦相似度
    similarity_scores = cosine_similarity(left_feature, right_features_subset).flatten()
    
    # 替换右图名称为相似性分数
    row[1:] = similarity_scores

    # 添加到结果DataFrame
    result = result.append(row, ignore_index=True)

# 保存结果到CSV文件
result.to_csv("result.csv", index=False)


Left Image Name: abm, Left Feature Index: 0
Left Image Name: aci, Left Feature Index: 1
Left Image Name: acn, Left Feature Index: 2
Left Image Name: aco, Left Feature Index: 3
Left Image Name: acu, Left Feature Index: 4
Left Image Name: acw, Left Feature Index: 5
Left Image Name: adt, Left Feature Index: 6
Left Image Name: aei, Left Feature Index: 7
Left Image Name: aej, Left Feature Index: 8
Left Image Name: aem, Left Feature Index: 9
Left Image Name: aes, Left Feature Index: 10
Left Image Name: afd, Left Feature Index: 11
Left Image Name: afl, Left Feature Index: 12
Left Image Name: agb, Left Feature Index: 13
Left Image Name: agl, Left Feature Index: 14
Left Image Name: ahg, Left Feature Index: 15
Left Image Name: ahr, Left Feature Index: 16
Left Image Name: aik, Left Feature Index: 17
Left Image Name: aiv, Left Feature Index: 18
Left Image Name: ajf, Left Feature Index: 19
Left Image Name: ajk, Left Feature Index: 20
Left Image Name: aka, Left Feature Index: 21
Left Image Name: akg

KeyError: 'kyr'

In [50]:
from sklearn.metrics.pairwise import cosine_similarity

# 读取包含20列数据的CSV文件
test_filename = 'C:\\Users\\gaoyi\\Desktop\\CV\\assi\\finalproject\\COMP90086_2023_TLLdataset\\test_candidates.csv'
csv_data = pd.read_csv(test_filename)

# 左图特征列表和右图特征列表
# left_features = [...]  # 左图特征列表，每个元素是一个特征向量
# right_features = [...]  # 右图特征列表，每个元素是一个特征向量
left_features = left_features_list
right_features = right_features_list

results = []

for index, row in csv_data.iterrows():
    left_image_name = row['left']
    similarities = [left_image_name]

    if left_image_name in left_features_dict:
        left_feature = left_features_dict[left_image_name]

        for i in range(0, 20):
            right_image_column = f'c{i}'
            right_image_name = row[right_image_column]

            if right_image_name in right_features_dict:
                right_feature = right_features_dict[right_image_name]

                left_feature = left_feature.reshape(1, -1)
                right_feature = right_feature.reshape(1, -1)

                similarity_score = cosine_similarity(left_feature, right_feature)
                score = similarity_score[0][0]
                similarities.append(score)
            else:
                print(f"Right image {right_image_name} not found in features.")
    else:
        print(f"Left image {left_image_name} not found in features.")

    results.append(similarities)

output_csv_file = "similarity_scores2.csv"
output_df = pd.DataFrame(results, columns=['left'] + [f'c{i}' for i in range(0, 20)])
output_df.to_csv(output_csv_file, index=False)
