# Simalarity Analysis of Recommendations

In [None]:
#!pip install spacy
#!python -m spacy download en_core_web_md
#!pip install sentence_transformers
#!pip install transformers


### Load data and restructure the data into a dictionary
We do this because it organizes this information efficiently; each year is a key, and its value is another dictionary containing both the recommendations and the results, making it straightforward to manipulate or analyze the data later.

In [None]:
import pandas as pd

# Load the Excel file into a DataFrame
df = pd.read_excel('ACWV Recommendation (2).xlsx', engine='openpyxl')

# Create a dictionary where the key is the year and the value is another dictionary containing both recommendations and responses
recommendations_dict = {}
for year, group in df.groupby('Year'):
    recommendations_dict[year] = {
        'Recommendation': group['Recommendation'].tolist(),
        'Result': group['Result '].tolist()
    }

#print(recommendations_dict)


### Run Model

In [None]:
# Import necessary packages
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity


# Step 1: Initialize the Sentence Transformer model
# We use a pre-trained model for this purpose.
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Step 2: Prepare a container to store the calculated vectors
# We'll store the vectors by their corresponding years.
vectors_by_year = {}

# Step 3: Convert text recommendations to numerical vectors
# We loop through each year's recommendations.
for year, data in recommendations_dict.items():
    # Get the recommendations for that year
    recommendations = data['Recommendation']
    # Convert the text recommendations into vectors
    vectors_by_year[year] = model.encode(recommendations)

# Step 4: Intialize a DataFrame to store similar recommendations
# This DataFrame will hold the results.
similar_recommendations_df2 = pd.DataFrame(
    columns=['Year1', 'Rec1', 'Result1', 'Year2', 'Rec2', 'Result2', 'Similarity']
)

# Step 6: Calculate similarity between recommendations from different years
# Loop through each year's vectors for comparison.
for year1, vecs1 in vectors_by_year.items():
    for year2, vecs2 in vectors_by_year.items():
        # Skip if the year pairs are the same or have been compared already
        if year1 >= year2:
            continue

        # Calculate cosine similarity between vectors of year1 and year2
        similarities = cosine_similarity(vecs1, vecs2)

        # Loop through the similarity matrix to find highly similar pairs
        # Using enumerate to loop through each row of the similarity matrix.
        # This provides both the index 'i' and the row 'sim_row', making it easier
        # to access corresponding recommendations and results in the original 'recommendations_dict'.
        for i, sim_row in enumerate(similarities):
            for j, similarity in enumerate(sim_row):
                # If the similarity score is greater than 0.8, it's considered similar
                if similarity > 0.80:
                    # Create a new row with relevant information
                    new_row = {
                        'Year1': year1,
                        'Rec1': recommendations_dict[year1]['Recommendation'][i],
                        'Result1': recommendations_dict[year1]['Result'][i],
                        'Year2': year2,
                        'Rec2': recommendations_dict[year2]['Recommendation'][j],
                        'Result2': recommendations_dict[year2]['Result'][j],
                        'Similarity': similarity
                    }
                    # Add this new row to the DataFrame
                    similar_recommendations_df2 = similar_recommendations_df2.append(new_row, ignore_index=True)


In [None]:
# Initialize an empty list to hold the new rows
new_rows = []

# Initialize a dictionary to keep track of chains of similar recommendations
similar_recommendations_chain = {}

# Loop through each year's vectors for comparison
for year1, vecs1 in vectors_by_year.items():
    for year2, vecs2 in vectors_by_year.items():
        if year1 >= year2:
            continue

        # Calculate cosine similarity between vectors of year1 and year2
        similarities = cosine_similarity(vecs1, vecs2)

        # Loop through the similarity matrix to find highly similar pairs
        for i, sim_row in enumerate(similarities):
            for j, similarity in enumerate(sim_row):
                if similarity > 0.80:
                    # Create unique keys for the recommendations to keep track of them
                    key1 = f"{year1}_{i}"
                    key2 = f"{year2}_{j}"

                    # Add to the chain of similar recommendations
                    if key1 in similar_recommendations_chain:
                        similar_recommendations_chain[key1].append(year2)
                    else:
                        similar_recommendations_chain[key1] = [year2]

                    if key2 in similar_recommendations_chain:
                        similar_recommendations_chain[key2].append(year1)
                    else:
                        similar_recommendations_chain[key2] = [year1]

                    # Create a new row with relevant information
                    new_row = {
                        'Year1': year1,
                        'Rec1': recommendations_dict[year1]['Recommendation'][i],
                        'Result1': recommendations_dict[year1]['Result'][i],
                        'Year2': year2,
                        'Rec2': recommendations_dict[year2]['Recommendation'][j],
                        'Result2': recommendations_dict[year2]['Result'][j],
                        'Similarity': similarity
                    }

                    # Append the new_row dictionary to new_rows list
                    new_rows.append(new_row)

# Convert the list of dictionaries to a DataFrame
new_rows_df = pd.DataFrame(new_rows)

# Concatenate with the original DataFrame
similar_recommendations_df2 = pd.concat([similar_recommendations_df2, new_rows_df]).reset_index(drop=True)




In [None]:
similar_recommendations_chain['2014_4']

[2010, 2012, 2012, 2016, 2018, 2020, 2020, 2020]

In [None]:
# Change settings to display entire table
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)
similar_recommendations_df2

Unnamed: 0,Year1,Rec1,Result1,Year2,Rec2,Result2,Similarity
0,1996,"VA produce a 15-20 minute video to be used for local presentations addressing a variety\nof information regarding women veterans benefits, services and VA programs.",Concur,1998,"Develop and produce a video to address issues affecting women veterans, such as VA eligibility criteria, benefit and health care services and the contributions of women to the United States Military. Distribute this video for use in Transition Assistance Program Briefings, local media presentations and Pubic Service Announcements. (VA)",Nonconcur,0.811964
1,1996,VA should develop programs to meet the special needs of women veterans who are homeless.,Concur,1998,"Develop VA pilot programs to adequately assess and address the issues, concerns, needs and problems of women veterans who are homeless. Developing protocols or guidelines to assist VA health care providers in accommodating the needs of women veterans that are homeless in various shelter and housing situations. (VA)",Concur,0.829369
2,1996,VA survey field facilities to determine the amount of time each women veteran coordinator is allotted to fulfill the functions of the role.,Concur,1998,Monitor and appropriately allocate the amount of time Women Veterans’ Coordinators are authorized and provided to perform the duties related to this position. (VA),Concur,0.829134
3,1998,VA Continue to monitor and improve outreach programs and initiatives for women veterans with special emphasis on minority populations. (VA),Concur,2000,"Increase, improve and monitor outreach programs to women veterans with special emphasis on outreach to minority populations.",Concur,0.856368
4,1998,"Expand VA outreach activities to minority women veterans, including Native American women veterans living on and off the reservations to include:\n• ethnic media (print/radio/TV), churches and community based\n• organizations, minority women organizations and health fairs. (VA)",Concur in principle,2000,"Increase, improve and monitor outreach programs to women veterans with special emphasis on outreach to minority populations.",Concur,0.801621
5,1998,"Expand VA outreach activities to minority women veterans, including Native American women veterans living on and off the reservations to include:\n• ethnic media (print/radio/TV), churches and community based\n• organizations, minority women organizations and health fairs. (VA)",Concur in principle,2000,"Include use of ethnic media (print/radio/internet/TV), churches, community-based organizations, minority women’s organizations and health fairs in outreach efforts to minority women. Increased efforts to reach Native American veterans living on reservations. Identify, obtain and incorporate current field-based audiovisuals that focus on women veterans.",Concur,0.827739
6,1998,"Work with local tribal program officials to ensure Native American women veterans are afforded access to and receive VA benefits including assistance from VA’s Vocational Rehabilitation Specialists and are afforded access to programs administered through the Department of Labor Veterans Employment and Training Service (VETS) programs. (VA, DOL)",Concur in principle,2000,Work with local tribal program officials to ensure Native American women veterans have access to vocational rehabilitation services.,Concur,0.882702
7,1998,"All studies and surveys sponsored, funded or conducted by VA must include gender specific information. VA analysis should routinely report the results of these studies and the gender specific responses through circulation of the information within the veteran community service providers' networks. (VA)",Nonconcur,2000,"Include gender-specific information in all studies and surveys sponsored, funded or conducted by VA. The Committee recommends that this information, regardless of the sample size, be reported.",Concur,0.803634
8,1998,"Include information about issues affecting women veterans in all VA employee training and orientation. Address concerns protocols for treatment, trauma intervention, etc., with residents and visiting faculty at VA health care facilities and regional office staff.(VA)",Concur in principle,2000,"Include information regarding issues affecting women veterans in new employee orientation briefings and in appropriate training for other VA employees. When appropriate, address concerns, protocols for treatment of sexual trauma, interventions, etc., with affiliating students, residents and visiting faculty at VA health care facilities and regional offices.",Concur,0.910301
9,1998,"Ensure that women veterans are equitably represented in appointed positions at all levels of authority within the Department of Veterans Affairs. VA should actively recruit qualified women veterans that reflect the changing face of the veteran population, in positions within the Office of the Secretary and Under Secretaries, Assistant Secretaries, as well as VA working groups, task forces, advisory Committees and research consultants. (VA)",Concur,2000,"Recruit and hire qualified women veterans, with emphasis on identifying qualified minority women, for positions within VA, including senior executive service positions, political appointee positions and membership on special boards and committees.",Concur,0.855496
