# Phase 1: Loading and Exploring the Data
## Task 1: Load the User Data

In [7]:
data1={
    "users": [
        {"id": 1, "name": "Amit", "friends": [2, 3], "liked_pages": [101]},
        {"id": 2, "name": "Priya", "friends": [1, 4], "liked_pages": [102]},
        {"id": 3, "name": "Rahul", "friends": [1], "liked_pages": [101, 103]},
        {"id": 4, "name": "Sara", "friends": [2], "liked_pages": [104]}
    ],
    "pages": [
        {"id": 101, "name": "Python Developers"},
        {"id": 102, "name": "Data Science Enthusiasts"},
        {"id": 103, "name": "AI & ML Community"},
        {"id": 104, "name": "Web Dev Hub"}
    ]
}

In [8]:
import json

def load_data(filename):
    with open(filename, "r") as f: 
        data = json.load(f)
    return data

data = load_data("data.json")

FileNotFoundError: [Errno 2] No such file or directory: 'data.json'

## Task 2: write a function to display users and their connections

In [None]:
def disply_user(data):
    print("User and their connection:")
    for user in data['users']:
        print(f"ID:{user['id']}-{user['name']} is friends with {user['friends']} and liked pages are {user['liked_pages']}")
    print("\nPages Information:")
    for page in data['pages']:
        print(f"{page['id']} : {page['name']}")

disply_user(data)

# Phase 2: Cleaning and Structuring the Data
## Task 1: Identify Issues in the Data
With an example dataset where some records are incomplete or incorrect. Hereâ€™s an example:

In [None]:
data2={
    "users": [
        {"id": 1, "name": "Amit", "friends": [2, 3], "liked_pages": [101]},
        {"id": 2, "name": "Priya", "friends": [1, 4], "liked_pages": [102]},
        {"id": 3, "name": "", "friends": [1], "liked_pages": [101, 103]},
        {"id": 4, "name": "Sara", "friends": [2, 2], "liked_pages": [104]},
        {"id": 5, "name": "Amit", "friends": [], "liked_pages": []}
    ],
    "pages": [
        {"id": 101, "name": "Python Developers"},
        {"id": 102, "name": "Data Science Enthusiasts"},
        {"id": 103, "name": "AI & ML Community"},
        {"id": 104, "name": "Web Dev Hub"},
        {"id": 104, "name": "Web Development"}
    ]
}

# Problems:
1. User ID 3 has an empty name.
2. User ID 4 has a duplicate friend entry.
3. User ID 5 has no connections or liked pages (inactive user).
4. The pages list contains duplicate page IDs.

In [10]:
import json

def clean_data(data):
    # Remove users whith missing names
    data["users"]=[user for user in data["users"] if user["name"].strip()]

    #Remove duplicate friends
    for user in data["users"]:
        user["friends"]=list(set(user["friends"]))

    # Remove inactive users
    data["users"]=[user for user in data["users"] if user["friends"] or user["liked_pages"]]

     # Remove duplicate pages
    unique_pages = {}

    for page in data["pages"]:
        unique_pages[page["id"]]=page
    data["pages"]=list(unique_pages.values())
    
    return data

# Load, clean, and display the cleaned data
with open("data2.json","r") as f:
    data=json.load(f)

data=clean_data(data)

with open("cleaned_codebook_data.json","w") as f:
    json.dump(data,f,indent=4)

print("Data cleaned successfully!")
print(data, sep="/n")

FileNotFoundError: [Errno 2] No such file or directory: 'data2.json'

# Phase 3: People You May Know
Write a function to implements a friend recommendation system based on mutual connections in a social graph.

In [9]:
import json

# Function to load JSON data from a file

def load_data(filename):
    with open(filename,"r") as file:
        return json.load(file)

# Function to find pages a user might like based on common interests
def find_pages_you_might_like(user_id,data):
    user_pages={}
    for user in data['users']:
        user_pages[user["id"]] = set(user["liked_pages"])

    # If the user is not found, return an empty list
    if user_id not in user_pages:
        return []

    user_liked_pages = user_pages[user_id]
    page_suggestions={}

    for other_user, pages in user_pages.items():
        if other_user != user_id:
            shared_pages = user_liked_pages.intersection(pages)
            for page in pages:
                if page not in user_liked_pages:
                    page_suggestions[page] = page_suggestions.get(page, 0)+len(shared_pages)
    # Sort recommended pages based on the number of shared interactions
    sorted_page = sorted(page_suggestions.items(), key=lambda x:x[1],reverse=True)
    return [page_id for page_id, _ in sorted_page]

#load data
data=load_data("massive_data.json")
user_id=1
page_recommendations= find_pages_you_might_like(user_id,data)
print(f"Page you might like for user {user_id}: {page_recommendations}")

Page you might like for user 1: [103, 105, 107, 104, 106, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127]


# Phase 3: Pages You Might Like
Write a function to recommends pages to users based on the interests of their social connections.


In [10]:
import json

def load_data(filename):
    with open(filename,"r") as f:
        return json.load(f)

def find_People_You_May_Know(data,user_id):
    user_friends = {}
    for user in data["users"]:
        user_friends[user['id']] = set(user['friends'])
    if user_id not in user_friends:
        return []

    direct_friends = user_friends[user_id]
    suggestions={}
    for friend in direct_friends:
        for mutual in user_friends[friend]:
            if mutual != user_id and mutual not in direct_friends:

                #count mutual friends
                suggestions[mutual]=suggestions.get(mutual,0)+1
    sorted_suggestions = sorted(suggestions.items(),key=lambda x: x[1],reverse=True)
    return [user_id for user_id, mutual_count in sorted_suggestions]
#load data
data=load_data("massive_data.json")
user_id=1
recommendations = find_People_You_May_Know(data,user_id)
print(f"Pepole you may know for user {user_id}:{recommendations}")

Pepole you may know for user 1:[7, 8, 9, 10, 11, 12]
