In [3]:
import pandas as pd

# Load each file into DataFrames
circles_df = pd.read_csv('12831.circles', sep=' ', header=None)
edges_df = pd.read_csv('12831.edges', sep=' ', header=None, names=['source', 'target'])
egofeat_df = pd.read_csv('12831.egofeat', sep=' ', header=None)
feat_df = pd.read_csv('12831.feat', sep=' ', header=None)
featnames_df = pd.read_csv('12831.featnames', sep=' ', header=None, names=['index', 'feature_name'])

# Check if number of columns in feat_df matches rows in featnames_df
num_feat_cols = feat_df.shape[1]
num_featnames_rows = featnames_df.shape[0]

if num_feat_cols == num_featnames_rows:
    feat_df.columns = featnames_df['feature_name']  # Assign feature names to feat_df columns
elif num_feat_cols > num_featnames_rows:
    # Trim extra columns in feat_df
    feat_df = feat_df.iloc[:, :num_featnames_rows]
    feat_df.columns = featnames_df['feature_name']
    print("Warning: Trimmed extra columns in feat_df to match featnames.")
else:
    # Add placeholder names if featnames are fewer than feat_df columns
    additional_names = [f"Unnamed_{i}" for i in range(num_featnames_rows, num_feat_cols)]
    feat_df.columns = list(featnames_df['feature_name']) + additional_names
    print("Warning: Added placeholder names for extra columns in feat_df.")

# Prepare egofeat_df as a single-row DataFrame, with feature names from featnames_df if applicable
egofeat_df.columns = feat_df.columns  # Align egofeat_df columns with feat_df after adjustment

# Combine the data
# 1. Concatenate edges and feat_df
combined_df = pd.concat([edges_df, feat_df], axis=1)

# 2. Add egofeat data as the ego user's feature row
combined_df = pd.concat([combined_df, egofeat_df], axis=0)

# 3. Add circles information as a new column
combined_df['circles'] = pd.Series(circles_df[0])

# Display the combined DataFrame
print(combined_df.head())


        source      target           #,  #...   #1  #1.  #11.  #1:   #2  \
0  398874773.0    652193.0   27985216.0   0.0  0.0  0.0   0.0  0.0  0.0   
1   18498878.0  14749606.0   14163141.0   0.0  0.0  0.0   0.0  0.0  0.0   
2   14305022.0   8479062.0   14142965.0   0.0  0.0  0.0   0.0  0.0  0.0   
3      22253.0     12741.0   35357461.0   0.0  0.0  0.0   0.0  0.0  0.0   
4   15540222.0  14809096.0  180505807.0   0.0  0.0  1.0   0.0  0.0  0.0   

   #2011  ...  @zeeg  @zeldman  @zeldman:  @zephoria:  @zite  @zsims  @東京ドーム  \
0    0.0  ...    0.0       0.0        0.0         0.0    0.0     0.0     0.0   
1    0.0  ...    0.0       0.0        0.0         0.0    0.0     0.0     0.0   
2    0.0  ...    0.0       0.0        0.0         0.0    0.0     0.0     0.0   
3    0.0  ...    0.0       0.0        0.0         0.0    0.0     0.0     0.0   
4    0.0  ...    0.0       0.0        0.0         0.0    0.0     0.0     0.0   

   @清水寺  @鹿苑寺  circles  
0   0.0   0.0  1\t6735  
1   0.0   0.0     

In [4]:
combined_df.to_json("combined_df.json", orient="records")