In [8]:
import pandas as pd

# Load each file into DataFrames
circles_df = pd.read_csv('12831.circles', sep=' ', header=None)
edges_df = pd.read_csv('12831.edges', sep=' ', header=None, names=['source', 'target'])
egofeat_df = pd.read_csv('12831.egofeat', sep=' ', header=None)
feat_df = pd.read_csv('12831.feat', sep=' ', header=None)
featnames_df = pd.read_csv('12831.featnames', sep=' ', header=None, names=['index', 'feature_name'])

# Adjust feat_df columns based on featnames_df
num_feat_cols = feat_df.shape[1]
num_featnames_rows = featnames_df.shape[0]

if num_feat_cols == num_featnames_rows:
    feat_df.columns = featnames_df['feature_name']
elif num_feat_cols > num_featnames_rows:
    feat_df = feat_df.iloc[:, :num_featnames_rows]
    feat_df.columns = featnames_df['feature_name']
    print("Warning: Trimmed extra columns in feat_df to match featnames.")
else:
    additional_names = [f"Unnamed_{i}" for i in range(num_featnames_rows, num_feat_cols)]
    feat_df.columns = list(featnames_df['feature_name']) + additional_names
    print("Warning: Added placeholder names for extra columns in feat_df.")

# Set egofeat_df columns to match feat_df after adjustment
egofeat_df.columns = feat_df.columns

# Combine edges_df and feat_df
combined_df = pd.concat([edges_df, feat_df], axis=1)

# Add ego features as a row for the ego user
combined_df = pd.concat([combined_df, egofeat_df], axis=0)

# Add circles information as a new column
combined_df['circles'] = pd.Series(circles_df[0])

# Filter out rows with 'source' or 'target' equal to 398874773
combined_df = combined_df[(combined_df['source'] != 398874773) & (combined_df['target'] != 398874773)]

# Filter out rows with 'source' or 'target' equal to 18498878
combined_df = combined_df[(combined_df['source'] != 18498878) & (combined_df['target'] != 18498878)]

# Filter out rows with 'source' or 'target' equal to 18498878
combined_df = combined_df[(combined_df['source'] != 14305022) & (combined_df['target'] != 14305022)]

# Filter out rows with 'source' or 'target' equal to 1186
combined_df = combined_df[(combined_df['source'] != 1186) & (combined_df['target'] != 1186)]

# Filter out rows with 'source' or 'target' equal to 1186
combined_df = combined_df[(combined_df['source'] != 1678471) & (combined_df['target'] != 1678471)]

# Save the combined DataFrame to JSON (or other formats as needed for the HTML code)
combined_df.to_json('combined_df.json', orient='records')

# Display the filtered DataFrame
print(combined_df.head())

       source      target           #,  #...   #1  #1.  #11.  #1:   #2  #2011  \
3     22253.0     12741.0   35357461.0   0.0  0.0  0.0   0.0  0.0  0.0    0.0   
4  15540222.0  14809096.0  180505807.0   0.0  0.0  1.0   0.0  0.0  0.0    0.0   
5   7415132.0  14172562.0  369246180.0   0.0  1.0  0.0   0.0  0.0  0.0    0.0   
6  17129553.0  13839772.0   14202711.0   0.0  0.0  0.0   0.0  0.0  0.0    0.0   
7  15911247.0  14086492.0        761.0   0.0  0.0  0.0   0.0  0.0  0.0    0.0   

   ...  @zeeg  @zeldman  @zeldman:  @zephoria:  @zite  @zsims  @東京ドーム  @清水寺  \
3  ...    0.0       0.0        0.0         0.0    0.0     0.0     0.0   0.0   
4  ...    0.0       0.0        0.0         0.0    0.0     0.0     0.0   0.0   
5  ...    0.0       0.0        0.0         0.0    0.0     0.0     0.0   0.0   
6  ...    0.0       0.0        0.0         0.0    0.0     0.0     0.0   0.0   
7  ...    0.0       0.0        0.0         0.0    0.0     0.0     0.0   0.0   

   @鹿苑寺  circles  
3   0.0      NaN  
