In [294]:
import pandas as pd
import re
import json

In [2]:
def clean_dataframe(df):
    # Remove rows where 'Caller' or 'Callee' is just 'Lcom' or 'Lorg'
    df = df[~df['Caller'].str.match(r'^Lcom$', case=False)]
    df = df[~df['Callee'].str.match(r'^Lcom$', case=False)]
    df = df[~df['Caller'].str.match(r'^Lorg$', case=False)]
    df = df[~df['Callee'].str.match(r'^Lorg$', case=False)]

    # Convert 'Caller' and 'Callee' to lowercase
    # df['Caller'] = df['Caller'].str.lower()
    # df['Callee'] = df['Callee'].str.lower()

    # Remove rows where 'Caller' or 'Callee' matches any of the exclude prefixes
    exclude_prefixes = ['Landroid', 'Lnet', 'Ljava', 'Lsun', 'Ljavax', 'Lkotlin', 'Landroidx']
    exclude_pattern = '|'.join(exclude_prefixes).lower()
    df = df[~df['Caller'].str.contains(exclude_pattern)]
    df = df[~df['Callee'].str.contains(exclude_pattern)]

    obfuscated_pattern = r'(?i)\bL[a-z0-9]{1,2}[0-9]+\b|(?i)\bL[a-z]/[a-z]/[a-z]\b'
    df = df[~df['Caller'].str.contains(obfuscated_pattern)]
    df = df[~df['Callee'].str.contains(obfuscated_pattern)]

    df['app_domain'] = df['package_name'].apply(extract_top_two_levels)
    df['is_frist_party'] = df.apply(is_frist_party, axis=1)
    df = df[df['is_frist_party'] == False]

    df.loc[((df['Caller_sdk'] == 'sonicnotify') | (df['Callee_sdk'] == 'sonicnotify')) & ((df['Caller_sdk'] == 'signal360') | (df['Callee_sdk'] == 'signal360')), 'Type'] = 'Intra-library'
    df = df[df['Type'] == 'Cross-library']
    return df

In [3]:
def get_matches(df):
    # Sanitize library names for valid regex group names
    sanitized_bt_pkg = {re.sub(r'\W+', '', lib): keywords for lib, keywords in bt_pkg.items()}

    # Combine all keywords into a single regex pattern
    combined_pattern = '|'.join(f"(?P<{lib}>{'|'.join(keywords)})" for lib, keywords in sanitized_bt_pkg.items())

    # Compile the regex pattern for case-insensitive matching
    pattern = re.compile(combined_pattern, re.IGNORECASE)

    # Extract matches from 'Caller' and 'Callee'
    caller_matches = df['Caller'].str.extract(pattern, expand=True)
    callee_matches = df['Callee'].str.extract(pattern, expand=True)

    # Find the library name from matches
    df['third_party_lib_caller'] = caller_matches.apply(lambda row: row.dropna().index[0] if not row.dropna().empty else None, axis=1)
    df['third_party_lib_callee'] = callee_matches.apply(lambda row: row.dropna().index[0] if not row.dropna().empty else None, axis=1)

    return df

In [4]:
def identify_xlib_interactions(df):
    sanitized_json_pkg = {sanitize_group_name(lib): keywords for lib, keywords in json_pkg.items()}

    # Combine all keywords into a single regex pattern
    combined_pattern = '|'.join(f"(?P<{lib}>{'|'.join(keywords)})" for lib, keywords in sanitized_json_pkg.items())

    # Compile the regex pattern for case-insensitive matching
    pattern = re.compile(combined_pattern, re.IGNORECASE)

    # Extract matches from 'Caller' and 'Callee'
    caller_matches = df['Caller'].str.extract(pattern, expand=True)
    callee_matches = df['Callee'].str.extract(pattern, expand=True)

    # Find the library name from matches
    df['classified_caller'] = caller_matches.apply(lambda row: row.dropna().index[0] if not row.dropna().empty else None, axis=1)
    df['classified_callee'] = callee_matches.apply(lambda row: row.dropna().index[0] if not row.dropna().empty else None, axis=1)

    # Identify known third-party libraries
    df['known_third_party_caller'] = df['third_party_lib_caller'].notna()
    df['known_third_party_callee'] = df['third_party_lib_callee'].notna()

    # Identify unknown third-party libraries
    df['unknown_lib_caller'] = df['classified_caller'].notna() & ~df['known_third_party_caller']
    df['unknown_lib_callee'] = df['classified_callee'].notna() & ~df['known_third_party_callee']
    
    return df

In [5]:
android9 = pd.read_csv('/home/aniketh/devel/src/ble-beacon/AndroCFG/android9_cross-lib.csv')
android12 = pd.read_csv('/home/aniketh/devel/src/ble-beacon/AndroCFG/android12_cross-lib.csv')
pii_leak = pd.read_csv('/home/aniketh/devel/src/ble-beacon/AndroCFG/pii_bt_leaks-cross-lib.csv')

In [6]:
bt_pkg = {
    'yinzcam': ['yinzcam', 'sobek'],
    'signal360': ['com/signal360/sdk', 'com/sonicnotify/sdk', 'rnsignal360'],
    'cueAudio': ['cueaudio'], 
    'Altbeacon': ['org/altbeacon', 'com/altbeacon'],
    'Radius Networks': ['radiusnetworks'],
    'Estimote': ['com/estimote'],
    'Gimbal': ['com/gimbal'],
    'Kontakt': ['kontakt'],
    'Cuebiq': ['com/cuebiq'],
    'ad4screen': ['ad4screen'],
    'reveal': ['com/stepleaderdigital'],
    'SignalFrame': ['wirelessregistry', 'observersdk'],
    'indooratlas': ['indooratlas'],
    'rover': ['rover'],
    'CoulusCoelib': ['coelib', 'couluslibrary'],
    'BeaconsInSpace': ['beaconsinspace'],
    'inmarket': ['inmarket'],
    'sense360': ['sense360'],
    'locuslabs': ['locuslabs'],
    'roximity': ['roximity'],
    'bluecats': ['bluecats'],
    'sensoro': ['sensoro'], 
    'swirl': ['swirl'],
    'placer': ['placer'],
    'Unacast Pure': ['pure'],
    'Point Inside': ['pointinside'],
    'Woosmap SDK': ['webgeoservices'],
    'MOCA': ['innoquant', 'moca'],
    'Proximi.io': ['io/proximi', 'proximiiolibrary'],
    'pulseid': ['pulseid'],
    'ubudu': ['ubudu'],
    'XMode': ['xmode', 'mysdk'],
    'Radar': ['io/radar'],
    'areametrics': ['areametrics'],
    'bluekai': ['bluekai'],
    'Colocater': ['crowdconnected', 'androidcolocator', 'colocator'],
    'Huq Sourcekit': ['huq', 'sourcekit'],
    'Demdex': ['adobe/mobile', 'demdex'],
    'Pilgrim by Foursquare': ['foursquare', 'pilgrim'],
    'Dynamic Yield': ['dynamicyield'],
    'Singlespot': ['sptproximitykit'],
    'Salesforce Marketing Cloud': ['salesforce'],
    'mParticle': ['com/mparticle'],
    'LeanPlum': ['leanplum'],
    'Zendrive': ['zendrive'],
    'Swrve': ['swrve'],
    'Exponea': ['infinario', 'exponea', 'sygic', 'aura'],
    'OpenLocate (Safegraph)': ['safegraph', 'openlocate'],
    'kochava': ['com/kochava'],
    'PredicIO': ['telescope', 'predic'],
    'bazaarvoice': ['bazaarvoice', 'bvandroidsdk'],
    'zapr': ['com/redbricklane/zapr'],
    'precisely': ['precisely']
}

In [7]:
json_data = open("/home/aniketh/devel/src/ble-beacon/beacon-finder/acr-finder/exodus_trackers").read()
trackers = json.loads(json_data)["trackers"]

json_pkg = {}
for tracker in trackers.values():
    if isinstance(tracker["code_signature"], str):
        code_signatures = tracker["code_signature"].replace(".", "/")
        # if code_signatures.startswith("/"):
        #     code_signatures = code_signatures.split("/")[1:]
        if "|" in code_signatures:
            code_signatures = code_signatures.split("|")
        else:
            code_signatures = [code_signatures]
    elif isinstance(tracker["code_signature"], list):
        code_signatures = [sig.replace(".", "/") for sig in tracker["code_signature"]]
    code_signatures = [sig.lstrip('/') for sig in code_signatures if sig.strip()]
    code_signatures = ["/".join(sig.split("/")[:2]) for sig in code_signatures]
    if code_signatures:
        # code_signatures = [sig.split('/')[0] for sig in code_signatures]
        json_pkg[tracker['name']] = code_signatures


In [8]:
openx_signatures = json_pkg.get('OpenX', None)
openx_signatures

['com/openx', 'com/openx']

In [9]:
json_pkg

{'Teemo': ['com/databerries', 'com/geolocstation'],
 'FidZup': ['com/fidzup'],
 'Audience Studio (Krux)': ['com/krux'],
 'Ad4Screen': ['com/ad4screen'],
 'Weborama': ['com/weborama'],
 'Smart': ['com/smartadserver'],
 'JW Player': ['com/longtailvideo'],
 'Loggly': ['com/github', 'com/github', 'com/visiware'],
 'OutBrain': ['com/outbrain'],
 'AppsFlyer': ['com/appsflyer'],
 'Ligatus': ['LigatusManager', 'LigatusViewClient', 'com/ligatus'],
 'Widespace': ['com/widespace'],
 'AppNexus': ['com/appnexus'],
 'Localytics': ['com/localytics', 'com/localytics', 'com/localytics'],
 'Braze (formerly Appboy)': ['com/appboy'],
 'mParticle': ['com/mparticle'],
 'S4M': ['com/sam4mobile', 'S4MAnalytic'],
 'Sizmek': ['sizmek/'],
 'Batch': ['com/batch'],
 'Sync2Ad': ['com/visiware'],
 'Flurry': ['com/flurry'],
 'HockeyApp': ['net/hockeyapp'],
 'Google CrashLytics': ['io/fabric',
  'com/crashlytics',
  'com/google',
  'com/google',
  'io/invertase'],
 'LeanPlum': ['com/leanplum'],
 'Tinder Analytics': ['

In [10]:
def extract_top_two_levels(package_name):
    parts = package_name.split('.')
    if len(parts) >= 2:
        return '.'.join(parts[:2])
    return package_name

# Function to check if an interaction is third-party
def is_frist_party(row):
    caller_domain = extract_top_two_levels(row['Caller'].replace('/', '.'))
    callee_domain = extract_top_two_levels(row['Callee'].replace('/', '.'))
    app_domain = row['app_domain']
    
    # return not (caller_domain == app_domain or callee_domain == app_domain or caller_domain == callee_domain)
    if app_domain != 'com.app':
        return (app_domain in caller_domain or app_domain in callee_domain or caller_domain in callee_domain or callee_domain in caller_domain)
    
def sanitize_group_name(name):
    sanitized = re.sub(r'\W+', '', name)
    if sanitized[0].isdigit():
        sanitized = f'lib{sanitized}'
    return sanitized

## Android 9

In [11]:
df = android9[android9['Type'] == 'Cross-library']

# Remove rows where 'Caller' or 'Callee' is just 'Lcom' or 'Lorg'
df = df[~df['Caller'].str.match(r'^Lcom$', case=False)]
df = df[~df['Callee'].str.match(r'^Lcom$', case=False)]
df = df[~df['Caller'].str.match(r'^Lorg$', case=False)]
df = df[~df['Callee'].str.match(r'^Lorg$', case=False)]

# Convert 'Caller' and 'Callee' to lowercase
# df['Caller'] = df['Caller'].str.lower()
# df['Callee'] = df['Callee'].str.lower()

# Remove rows where 'Caller' or 'Callee' matches any of the exclude prefixes
exclude_prefixes = ['Landroid', 'Lnet', 'Ljava', 'Lsun', 'Ljavax', 'Lkotlin', 'Landroidx']
exclude_pattern = '|'.join(exclude_prefixes).lower()
df = df[~df['Caller'].str.contains(exclude_pattern)]
df = df[~df['Callee'].str.contains(exclude_pattern)]

obfuscated_pattern = r'(?i)\bL[a-z0-9]{1,2}[0-9]+\b|(?i)\bL[a-z]/[a-z]/[a-z]\b'
df = df[~df['Caller'].str.contains(obfuscated_pattern)]
df = df[~df['Callee'].str.contains(obfuscated_pattern)]

In [12]:
df['app_domain'] = df['package_name'].apply(extract_top_two_levels)
df['is_frist_party'] = df.apply(is_frist_party, axis=1)
df = df[df['is_frist_party'] == False]

df.loc[((df['Caller_sdk'] == 'sonicnotify') | (df['Callee_sdk'] == 'sonicnotify')) & ((df['Caller_sdk'] == 'signal360') | (df['Callee_sdk'] == 'signal360')), 'Type'] = 'Intra-library'
df = df[df['Type'] == 'Cross-library']

In [13]:
df

Unnamed: 0,Caller,Callee,Type,Caller_sdk,Callee_sdk,System API Interaction,file_hash,package_name,dot_file_name,app_domain,is_frist_party
169,Lcom/reactapp,Lorg/altbeacon,Cross-library,reactapp,altbeacon,,28D197A0A1D38DB99E5DE81FC5F125AA11E3BCCAA41BF4...,io.pushpay.edenworshipcenter,useBluetoothBasic,io.pushpay,False
183,Lcom/smaato/sdk,LOooO0o/OooOOO/OooO00o,Cross-library,smaato,OooOOO,,6EE044C69352CD07CD34EC591D84F8F985FFF8989D8C75...,com.adjust.volume.booster.go,useCoarseLocation,com.adjust,False
185,LOooO0o/OooOOO/OooO00o,Lcom/smaato/sdk,Cross-library,OooOOO,smaato,,6EE044C69352CD07CD34EC591D84F8F985FFF8989D8C75...,com.adjust.volume.booster.go,useCoarseLocation,com.adjust,False
188,LOooO0o/OooOOO/OooO00o/o00000Oo/OooO0OO/OooO0o...,Lcom/smaato/sdk,Cross-library,OooOOO,smaato,,6EE044C69352CD07CD34EC591D84F8F985FFF8989D8C75...,com.adjust.volume.booster.go,useCoarseLocation,com.adjust,False
189,Lcom/smaato/sdk/richmedia/ad/BannerHtmlPlayerA...,LOooO0o/OooOOO/OooO00o,Cross-library,smaato,OooOOO,,6EE044C69352CD07CD34EC591D84F8F985FFF8989D8C75...,com.adjust.volume.booster.go,useCoarseLocation,com.adjust,False
...,...,...,...,...,...,...,...,...,...,...,...
1439291,Lcom/braintreepayments,Llib/android/paypal,Cross-library,braintreepayments,android,,002F85F2078DD126675416BD5985ED3C45E0CFCB307458...,cota.plus,useFineLocation,cota.plus,False
1439292,Lcom/braintreepayments,Lcom/cardinalcommerce,Cross-library,braintreepayments,cardinalcommerce,,002F85F2078DD126675416BD5985ED3C45E0CFCB307458...,cota.plus,useFineLocation,cota.plus,False
1439938,Lcom/braintreepayments,Lcom/cardinalcommerce,Cross-library,braintreepayments,cardinalcommerce,,002F85F2078DD126675416BD5985ED3C45E0CFCB307458...,cota.plus,useBluetoothBasic,cota.plus,False
1439947,Lcom/braintreepayments,Lcom/cardinalcommerce,Cross-library,braintreepayments,cardinalcommerce,,002F85F2078DD126675416BD5985ED3C45E0CFCB307458...,cota.plus,accesswifistate,cota.plus,False


In [28]:
df[df['Caller'].str.contains('Lcoelib') | df['Callee'].str.contains('Lcoelib')]

Unnamed: 0,Caller,Callee,Type,Caller_sdk,Callee_sdk,System API Interaction,file_hash,package_name,dot_file_name,app_domain,is_frist_party
69652,Lcom/english_news_in_levels/MainActivity;->onD...,Lcoelib/c/couluslibrary,Cross-library,english_news_in_levels,c,,01AAAB2EBFA091BE48F659F6F0E2CF95E552AE88C4C07A...,com.newsinlevels,useCoarseLocation,com.newsinlevels,False
69658,Lcom/english_news_in_levels/MainActivity$a;->o...,Lcoelib/c/couluslibrary,Cross-library,english_news_in_levels,c,,01AAAB2EBFA091BE48F659F6F0E2CF95E552AE88C4C07A...,com.newsinlevels,useCoarseLocation,com.newsinlevels,False
69675,Lcom/english_news_in_levels/MainActivity;->onD...,Lcoelib/c/couluslibrary,Cross-library,english_news_in_levels,c,,01AAAB2EBFA091BE48F659F6F0E2CF95E552AE88C4C07A...,com.newsinlevels,useFineLocation,com.newsinlevels,False
69681,Lcom/english_news_in_levels/MainActivity$a;->o...,Lcoelib/c/couluslibrary,Cross-library,english_news_in_levels,c,,01AAAB2EBFA091BE48F659F6F0E2CF95E552AE88C4C07A...,com.newsinlevels,useFineLocation,com.newsinlevels,False
264921,Lluo/m,Lcoelib/c/couluslibrary,Cross-library,m,c,,F97CF99148F395EE6AF21F308D16F1DDCBEFB225A0C48A...,luo.speedometergps,useCoarseLocation,luo.speedometergps,False
264940,Lluo/m,Lcoelib/c/couluslibrary,Cross-library,m,c,,F97CF99148F395EE6AF21F308D16F1DDCBEFB225A0C48A...,luo.speedometergps,useFineLocation,luo.speedometergps,False
446123,Lcom/qiblacompass/MainActivity;->onDestroy(),Lcoelib/c/couluslibrary,Cross-library,qiblacompass,c,,E438B80085E04758A65EA98C5D0F3DEBC4DB1466B433E2...,com.prayertimes.ramadan.qiblacompass,useCoarseLocation,com.prayertimes,False
446169,Lcom/qiblacompass/MainActivity;->onCreate(),Lcoelib/c/couluslibrary,Cross-library,qiblacompass,c,,E438B80085E04758A65EA98C5D0F3DEBC4DB1466B433E2...,com.prayertimes.ramadan.qiblacompass,useCoarseLocation,com.prayertimes,False
446470,Lcom/qiblacompass/MainActivity;->onDestroy(),Lcoelib/c/couluslibrary,Cross-library,qiblacompass,c,,E438B80085E04758A65EA98C5D0F3DEBC4DB1466B433E2...,com.prayertimes.ramadan.qiblacompass,useFineLocation,com.prayertimes,False
446516,Lcom/qiblacompass/MainActivity;->onCreate(),Lcoelib/c/couluslibrary,Cross-library,qiblacompass,c,,E438B80085E04758A65EA98C5D0F3DEBC4DB1466B433E2...,com.prayertimes.ramadan.qiblacompass,useFineLocation,com.prayertimes,False


In [29]:
# Sanitize library names for valid regex group names
sanitized_bt_pkg = {re.sub(r'\W+', '', lib): keywords for lib, keywords in bt_pkg.items()}

# Combine all keywords into a single regex pattern
combined_pattern = '|'.join(f"(?P<{lib}>{'|'.join(keywords)})" for lib, keywords in sanitized_bt_pkg.items())

# Compile the regex pattern for case-insensitive matching
pattern = re.compile(combined_pattern, re.IGNORECASE)

# Extract matches from 'Caller' and 'Callee'
caller_matches = df['Caller'].str.extract(pattern, expand=True)
callee_matches = df['Callee'].str.extract(pattern, expand=True)

# Find the library name from matches
df['third_party_lib_caller'] = caller_matches.apply(lambda row: row.dropna().index[0] if not row.dropna().empty else None, axis=1)
df['third_party_lib_callee'] = callee_matches.apply(lambda row: row.dropna().index[0] if not row.dropna().empty else None, axis=1)


In [30]:
df = df[(df['third_party_lib_caller'].notna() & df['third_party_lib_callee'].notna()) & (df['third_party_lib_caller'] != df['third_party_lib_callee'])]

In [31]:
# Count interactions between third-party libraries
grouped_df = df.groupby('package_name').apply(lambda x: x.drop_duplicates(subset=['package_name', 'third_party_lib_caller', 'third_party_lib_callee'])).reset_index(drop=True)

# Count interactions between third-party libraries per app
interaction_counts = grouped_df.groupby(['package_name', 'third_party_lib_caller', 'third_party_lib_callee']).size().reset_index(name='count')

# Generate some basic statistics
total_interactions = df.shape[0]
unique_callers = df['third_party_lib_caller'].nunique()
unique_callees = df['third_party_lib_callee'].nunique()

stats = {
    'Total Interactions': total_interactions,
    'Unique Callers': unique_callers,
    'Unique Callees': unique_callees
}

print("Statistics:")
for key, value in stats.items():
    print(f"{key}: {value}")

Statistics:
Total Interactions: 279
Unique Callers: 11
Unique Callees: 4


In [32]:
grouped_df = df.groupby('package_name').apply(lambda x: x.drop_duplicates(subset=['third_party_lib_caller', 'third_party_lib_callee'])).reset_index(drop=True)

# Count interactions between third-party libraries per app
interaction_counts = grouped_df.groupby(['third_party_lib_caller', 'third_party_lib_callee']).size().reset_index(name='count')

# Sort by count to find the most common interactions
sorted_interactions = interaction_counts.sort_values(by='count', ascending=False)
sorted_interactions

Unnamed: 0,third_party_lib_caller,third_party_lib_callee,count
3,SalesforceMarketingCloud,Altbeacon,15
7,mParticle,Radar,5
9,roximity,Gimbal,5
1,Gimbal,Colocater,4
0,BeaconsInSpace,Altbeacon,1
2,MOCA,Altbeacon,1
4,Singlespot,Altbeacon,1
5,XMode,Altbeacon,1
6,bazaarvoice,Gimbal,1
8,roximity,Altbeacon,1


In [33]:
grouped_df = df.groupby('package_name').apply(lambda x: x.drop_duplicates(subset=['third_party_lib_caller', 'third_party_lib_callee'])).reset_index(drop=True)

# List unique interactions for each third-party library caller
caller_interactions = grouped_df.groupby('third_party_lib_caller')['third_party_lib_callee'].unique().reset_index()
caller_interactions['unique_interactions_count'] = caller_interactions['third_party_lib_callee'].apply(len)
caller_interactions

Unnamed: 0,third_party_lib_caller,third_party_lib_callee,unique_interactions_count
0,BeaconsInSpace,[Altbeacon],1
1,Gimbal,[Colocater],1
2,MOCA,[Altbeacon],1
3,SalesforceMarketingCloud,[Altbeacon],1
4,Singlespot,[Altbeacon],1
5,XMode,[Altbeacon],1
6,bazaarvoice,[Gimbal],1
7,mParticle,[Radar],1
8,roximity,"[Gimbal, Altbeacon]",2
9,ubudu,[Altbeacon],1


In [34]:
grouped_df

Unnamed: 0,Caller,Callee,Type,Caller_sdk,Callee_sdk,System API Interaction,file_hash,package_name,dot_file_name,app_domain,is_frist_party,third_party_lib_caller,third_party_lib_callee
0,Lcom/salesforce/marketingcloud,Lorg/altbeacon,Cross-library,salesforce,altbeacon,,0AC4591BE200C4B6D583094B1DEBF982FFAD614DDA7AFB...,bitnet.hu.rossmann,useBluetoothAdmin,bitnet.hu,False,SalesforceMarketingCloud,Altbeacon
1,Lcom/mparticle/kits/RadarKit;->setOptOut(),Lio/radar,Cross-library,mparticle,radar,,0E7FA98354BCADE983E63133EDBC851782533CFEAD6436...,ch.kaisin,useBluetoothAdmin,ch.kaisin,False,mParticle,Radar
2,Lcom/innoquant/moca,Lorg/altbeacon,Cross-library,innoquant,altbeacon,,DDEDEB572125E4755E8F05A0A8B8047286670CE5B22E71...,cl.sodimac,useBluetoothAdmin,cl.sodimac,False,MOCA,Altbeacon
3,Lcom/salesforce/marketingcloud,Lorg/altbeacon,Cross-library,salesforce,altbeacon,,0D790C9091554A83FE62D176B8BFFAC2933AFE94E076ED...,com.CredemMobile,useBluetoothAdmin,com.CredemMobile,False,SalesforceMarketingCloud,Altbeacon
4,Lcom/salesforce/marketingcloud,Lorg/altbeacon,Cross-library,salesforce,altbeacon,,42B3CCE340ADCD6A4A04753571877FCBF8C9729F4D9591...,com.abz.app,useBluetoothAdmin,com.abz,False,SalesforceMarketingCloud,Altbeacon
5,Lio/mysdk/beacons/utils/BcnUtils$asBeacon$1;->...,Lorg/altbeacon,Cross-library,mysdk,altbeacon,,3075EE7EB48B22E87A54F126F254BB991C38E43626DA51...,com.alienmanfc6.wheresmyandroid,useBluetoothBasic,com.alienmanfc6,False,XMode,Altbeacon
6,Lcom/beaconsinspace/android/beacon/detector/BI...,Lorg/altbeacon,Cross-library,beaconsinspace,altbeacon,,FB925BD8826637F11A2C196F10901F4C45B9579CEE5483...,com.arcsoft.perfect365,useBluetoothBasic,com.arcsoft,False,BeaconsInSpace,Altbeacon
7,Lcom/salesforce/marketingcloud,Lorg/altbeacon,Cross-library,salesforce,altbeacon,,E4F9BDEDC39006F5C5F2AC61C37B915556CE9DF7D2B315...,com.chamberlain.android.liftmaster.myq,useBluetoothAdmin,com.chamberlain,False,SalesforceMarketingCloud,Altbeacon
8,Lcom/salesforce/marketingcloud,Lorg/altbeacon,Cross-library,salesforce,altbeacon,,0975ACFC52153EE65F3E3A5D74938CE952BFB03CB96506...,com.craftsman.myq,useBluetoothAdmin,com.craftsman,False,SalesforceMarketingCloud,Altbeacon
9,Lcom/venue/emkitproximity/manager/ProximityCon...,Lcom/gimbal,Cross-library,venue,gimbal,,0BE2FE8ECD0B4E22EFCD508DE54FE7D919002038C3CA5A...,com.flyers.premiumseating,useCoarseLocation,com.flyers,False,roximity,Gimbal


In [405]:
df[df['Callee_sdk'] == 'openlocate']

Unnamed: 0,Caller,Callee,Type,Caller_sdk,Callee_sdk,System API Interaction,file_hash,package_name,dot_file_name,app_domain,is_frist_party


In [401]:
df[df['Caller'].str.contains('openlocate') | df['Callee'].str.contains('openlocate')]

Unnamed: 0,Caller,Callee,Type,Caller_sdk,Callee_sdk,System API Interaction,file_hash,package_name,dot_file_name,app_domain,is_frist_party


In [14]:
df = android9[android9['Type'] == 'Cross-library']
cleaned_df = clean_dataframe(df)
matches_df = get_matches(cleaned_df)
identified_df = identify_xlib_interactions(matches_df)

In [15]:
identified_df

Unnamed: 0,Caller,Callee,Type,Caller_sdk,Callee_sdk,System API Interaction,file_hash,package_name,dot_file_name,app_domain,is_frist_party,third_party_lib_caller,third_party_lib_callee,classified_caller,classified_callee,known_third_party_caller,known_third_party_callee,unknown_lib_caller,unknown_lib_callee
169,Lcom/reactapp,Lorg/altbeacon,Cross-library,reactapp,altbeacon,,28D197A0A1D38DB99E5DE81FC5F125AA11E3BCCAA41BF4...,io.pushpay.edenworshipcenter,useBluetoothBasic,io.pushpay,False,,Altbeacon,,AltBeacon,False,True,False,False
183,Lcom/smaato/sdk,LOooO0o/OooOOO/OooO00o,Cross-library,smaato,OooOOO,,6EE044C69352CD07CD34EC591D84F8F985FFF8989D8C75...,com.adjust.volume.booster.go,useCoarseLocation,com.adjust,False,,,Smaato,,False,False,True,False
185,LOooO0o/OooOOO/OooO00o,Lcom/smaato/sdk,Cross-library,OooOOO,smaato,,6EE044C69352CD07CD34EC591D84F8F985FFF8989D8C75...,com.adjust.volume.booster.go,useCoarseLocation,com.adjust,False,,,,Smaato,False,False,False,True
188,LOooO0o/OooOOO/OooO00o/o00000Oo/OooO0OO/OooO0o...,Lcom/smaato/sdk,Cross-library,OooOOO,smaato,,6EE044C69352CD07CD34EC591D84F8F985FFF8989D8C75...,com.adjust.volume.booster.go,useCoarseLocation,com.adjust,False,,,,Smaato,False,False,False,True
189,Lcom/smaato/sdk/richmedia/ad/BannerHtmlPlayerA...,LOooO0o/OooOOO/OooO00o,Cross-library,smaato,OooOOO,,6EE044C69352CD07CD34EC591D84F8F985FFF8989D8C75...,com.adjust.volume.booster.go,useCoarseLocation,com.adjust,False,,,Smaato,,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1439291,Lcom/braintreepayments,Llib/android/paypal,Cross-library,braintreepayments,android,,002F85F2078DD126675416BD5985ED3C45E0CFCB307458...,cota.plus,useFineLocation,cota.plus,False,,,,,False,False,False,False
1439292,Lcom/braintreepayments,Lcom/cardinalcommerce,Cross-library,braintreepayments,cardinalcommerce,,002F85F2078DD126675416BD5985ED3C45E0CFCB307458...,cota.plus,useFineLocation,cota.plus,False,,,,,False,False,False,False
1439938,Lcom/braintreepayments,Lcom/cardinalcommerce,Cross-library,braintreepayments,cardinalcommerce,,002F85F2078DD126675416BD5985ED3C45E0CFCB307458...,cota.plus,useBluetoothBasic,cota.plus,False,,,,,False,False,False,False
1439947,Lcom/braintreepayments,Lcom/cardinalcommerce,Cross-library,braintreepayments,cardinalcommerce,,002F85F2078DD126675416BD5985ED3C45E0CFCB307458...,cota.plus,accesswifistate,cota.plus,False,,,,,False,False,False,False


In [17]:
identified_df[((identified_df['known_third_party_callee'] & identified_df['known_third_party_caller']) | (identified_df['known_third_party_caller'] & identified_df['known_third_party_callee']))& 
(identified_df['third_party_lib_caller'] != identified_df['third_party_lib_callee'])]

Unnamed: 0,Caller,Callee,Type,Caller_sdk,Callee_sdk,System API Interaction,file_hash,package_name,dot_file_name,app_domain,is_frist_party,third_party_lib_caller,third_party_lib_callee,classified_caller,classified_callee,known_third_party_caller,known_third_party_callee,unknown_lib_caller,unknown_lib_callee
42816,Lcom/gimbal/internal/communication/services/Co...,Lnet/crowdconnected/androidcolocator,Cross-library,gimbal,crowdconnected,,0BB1C733736DB71971A199D9157DFF10E91FBAE24157F7...,fm.golive.festivalinterceltiquelorient3bc398d9,useCoarseLocation,fm.golive,False,Gimbal,Colocater,Gimbal,Colocator,True,True,False,False
42829,Lcom/gimbal/internal/push/PushHandlerService;-...,Lnet/crowdconnected/androidcolocator,Cross-library,gimbal,crowdconnected,,0BB1C733736DB71971A199D9157DFF10E91FBAE24157F7...,fm.golive.festivalinterceltiquelorient3bc398d9,useCoarseLocation,fm.golive,False,Gimbal,Colocater,Gimbal,Colocator,True,True,False,False
42834,Lcom/gimbal,Lnet/crowdconnected/androidcolocator,Cross-library,gimbal,crowdconnected,,0BB1C733736DB71971A199D9157DFF10E91FBAE24157F7...,fm.golive.festivalinterceltiquelorient3bc398d9,useCoarseLocation,fm.golive,False,Gimbal,Colocater,Gimbal,Colocator,True,True,False,False
42836,Lcom/gimbal/internal/push/FcmListenerService;-...,Lnet/crowdconnected/androidcolocator,Cross-library,gimbal,crowdconnected,,0BB1C733736DB71971A199D9157DFF10E91FBAE24157F7...,fm.golive.festivalinterceltiquelorient3bc398d9,useCoarseLocation,fm.golive,False,Gimbal,Colocater,Gimbal,Colocator,True,True,False,False
42838,Lcom/gimbal/internal,Lnet/crowdconnected/androidcolocator,Cross-library,gimbal,crowdconnected,,0BB1C733736DB71971A199D9157DFF10E91FBAE24157F7...,fm.golive.festivalinterceltiquelorient3bc398d9,useCoarseLocation,fm.golive,False,Gimbal,Colocater,Gimbal,Colocator,True,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1357248,Lcom/salesforce/marketingcloud,Lorg/altbeacon,Cross-library,salesforce,altbeacon,,0AC4591BE200C4B6D583094B1DEBF982FFAD614DDA7AFB...,bitnet.hu.rossmann,useBluetoothBasic,bitnet.hu,False,SalesforceMarketingCloud,Altbeacon,SalesforceMarketingCloud,AltBeacon,True,True,False,False
1365891,Lcom/salesforce/marketingcloud,Lorg/altbeacon,Cross-library,salesforce,altbeacon,,5B630C86373191B604963653D11A1A6627642C602B4DED...,com.ocbcnisp.onemobileapp,useBluetoothAdmin,com.ocbcnisp,False,SalesforceMarketingCloud,Altbeacon,SalesforceMarketingCloud,AltBeacon,True,True,False,False
1365942,Lcom/salesforce/marketingcloud,Lorg/altbeacon,Cross-library,salesforce,altbeacon,,5B630C86373191B604963653D11A1A6627642C602B4DED...,com.ocbcnisp.onemobileapp,useBluetoothBasic,com.ocbcnisp,False,SalesforceMarketingCloud,Altbeacon,SalesforceMarketingCloud,AltBeacon,True,True,False,False
1426080,Lcom/salesforce/marketingcloud,Lorg/altbeacon,Cross-library,salesforce,altbeacon,,013245240CD1CCD50355789CEE6BC6261C40EE91DC4860...,dk.meny,useBluetoothAdmin,dk.meny,False,SalesforceMarketingCloud,Altbeacon,SalesforceMarketingCloud,AltBeacon,True,True,False,False


In [18]:
beacon_to_beacon = identified_df[((identified_df['known_third_party_callee'] & identified_df['known_third_party_caller']) | (identified_df['known_third_party_caller'] & identified_df['known_third_party_callee']))& 
(identified_df['third_party_lib_caller'] != identified_df['third_party_lib_callee'])]
beacon_to_beacon9 = beacon_to_beacon.groupby(['third_party_lib_caller', 'third_party_lib_callee', 'dot_file_name'])['package_name'].nunique().reset_index(name='unique_package_count')
beacon_to_beacon9

Unnamed: 0,third_party_lib_caller,third_party_lib_callee,dot_file_name,unique_package_count
0,BeaconsInSpace,Altbeacon,useBluetoothBasic,1
1,Gimbal,Colocater,accesswifistate,4
2,Gimbal,Colocater,useBluetoothBasic,4
3,Gimbal,Colocater,useCoarseLocation,4
4,Gimbal,Colocater,useFineLocation,4
5,MOCA,Altbeacon,useBluetoothAdmin,1
6,MOCA,Altbeacon,useBluetoothBasic,1
7,SalesforceMarketingCloud,Altbeacon,useBluetoothAdmin,15
8,SalesforceMarketingCloud,Altbeacon,useBluetoothBasic,15
9,Singlespot,Altbeacon,useBluetoothAdmin,1


In [19]:
tpl_to_beacon = identified_df[((identified_df['known_third_party_callee'] & identified_df['unknown_lib_caller']) & (identified_df['third_party_lib_callee'] != identified_df['classified_caller']))]
tpl_to_beacon9 = tpl_to_beacon.groupby(['classified_caller', 'third_party_lib_callee', 'dot_file_name'])['package_name'].nunique().reset_index(name='unique_package_count')
tpl_to_beacon9

Unnamed: 0,classified_caller,third_party_lib_callee,dot_file_name,unique_package_count
0,ExactTarget,Altbeacon,useBluetoothBasic,3
1,Geniee,Altbeacon,useBluetoothAdmin,3
2,Geniee,Altbeacon,useBluetoothBasic,3
3,NativeX,kochava,accesswifistate,5
4,OpenX,kochava,accesswifistate,7
5,Plexure,Altbeacon,useBluetoothBasic,2
6,ShallWeAD,Gimbal,accesswifistate,2
7,ShallWeAD,Gimbal,useBluetoothBasic,2
8,ShallWeAD,Gimbal,useCoarseLocation,2
9,ShallWeAD,Gimbal,useFineLocation,2


In [20]:
beacon_to_tpl = identified_df[(identified_df['known_third_party_caller'] & identified_df['unknown_lib_callee']) & (identified_df['third_party_lib_caller'] != identified_df['classified_callee'])]
beacon_to_tpl9 = beacon_to_tpl.groupby(['third_party_lib_caller', 'classified_callee', 'dot_file_name'])['package_name'].nunique().reset_index(name='unique_package_count')
beacon_to_tpl9

Unnamed: 0,third_party_lib_caller,classified_callee,dot_file_name,unique_package_count
0,CoulusCoelib,Blesh,changewifistate,1
1,CoulusCoelib,Blesh,useCoarseLocation,1
2,CoulusCoelib,Blesh,useFineLocation,1
3,LeanPlum,CleverTap,useBluetoothBasic,1
4,PredicIO,NewRelic,useCoarseLocation,1
5,PredicIO,NewRelic,useFineLocation,1
6,XMode,Placed,accesswifistate,1
7,XMode,Placed,useCoarseLocation,2
8,XMode,Placed,useFineLocation,2
9,inmarket,TwitterMoPub,useCoarseLocation,1


In [21]:
tpl_to_tpl = identified_df[(identified_df['unknown_lib_caller'] & identified_df['unknown_lib_callee'])  & (identified_df['classified_caller'] != identified_df['classified_callee'])]
tpl_to_tpl9 = tpl_to_tpl.groupby(['classified_caller', 'classified_callee', 'dot_file_name'])['package_name'].nunique().reset_index(name='unique_package_count')
tpl_to_tpl9

Unnamed: 0,classified_caller,classified_callee,dot_file_name,unique_package_count
0,AdColony,Unity3dAds,useCoarseLocation,5
1,AdColony,Unity3dAds,useFineLocation,5
2,AddAptTr,AmazonAdvertisement,accesswifistate,6
3,AddAptTr,Inmobi,accesswifistate,2
4,AddAptTr,Inmobi,useCoarseLocation,2
...,...,...,...,...
137,Vdopia,Vungle,useFineLocation,1
138,ironSource,Inmobi,useCoarseLocation,26
139,ironSource,Inmobi,useFineLocation,26
140,ironSource,Vungle,useCoarseLocation,7


## Android 12

In [22]:
df = android12[android12['Type'] == 'Cross-library']
cleaned_df = clean_dataframe(df)
matches_df = get_matches(cleaned_df)
identified_df = identify_xlib_interactions(matches_df)

In [23]:
beacon_to_beacon = identified_df[((identified_df['known_third_party_callee'] & identified_df['known_third_party_caller']) | (identified_df['known_third_party_caller'] & identified_df['known_third_party_callee']))& 
(identified_df['third_party_lib_caller'] != identified_df['third_party_lib_callee'])]
beacon_to_beacon12 = beacon_to_beacon.groupby(['third_party_lib_caller', 'third_party_lib_callee', 'dot_file_name'])['package_name'].nunique().reset_index(name='unique_package_count')
beacon_to_beacon12

Unnamed: 0,third_party_lib_caller,third_party_lib_callee,dot_file_name,unique_package_count
0,BeaconsInSpace,Altbeacon,useBluetoothConnect,1
1,Colocater,Altbeacon,useBluetoothAdvertise,30
2,Gimbal,Colocater,accesswifistate,4
3,Gimbal,Colocater,useCoarseLocation,4
4,Gimbal,Colocater,useFineLocation,4
5,MOCA,Altbeacon,useBluetoothScan,1
6,RadiusNetworks,Altbeacon,useBluetoothAdvertise,150
7,SalesforceMarketingCloud,Altbeacon,useBluetoothScan,15
8,Singlespot,Altbeacon,useBluetoothScan,1
9,XMode,Altbeacon,useBluetoothConnect,1


In [24]:
tpl_to_beacon = identified_df[((identified_df['known_third_party_callee'] & identified_df['unknown_lib_caller']) & (identified_df['third_party_lib_callee'] != identified_df['classified_caller']))]
tpl_to_beacon12 = tpl_to_beacon.groupby(['classified_caller', 'third_party_lib_callee', 'dot_file_name'])['package_name'].nunique().reset_index(name='unique_package_count')
tpl_to_beacon12

Unnamed: 0,classified_caller,third_party_lib_callee,dot_file_name,unique_package_count
0,Geniee,Altbeacon,useBluetoothScan,3
1,NativeX,kochava,accesswifistate,5
2,OpenX,kochava,accesswifistate,7
3,ShallWeAD,Gimbal,accesswifistate,2
4,ShallWeAD,Gimbal,useCoarseLocation,2
5,ShallWeAD,Gimbal,useFineLocation,2
6,Urbanairship,Gimbal,accesswifistate,75
7,Urbanairship,Gimbal,useCoarseLocation,75
8,Urbanairship,Gimbal,useFineLocation,75
9,Verve,Gimbal,useCoarseLocation,6


In [25]:
beacon_to_tpl = identified_df[(identified_df['known_third_party_caller'] & identified_df['unknown_lib_callee']) & (identified_df['third_party_lib_caller'] != identified_df['classified_callee'])]
beacon_to_tpl12 = beacon_to_tpl.groupby(['third_party_lib_caller', 'classified_callee', 'dot_file_name'])['package_name'].nunique().reset_index(name='unique_package_count')
beacon_to_tpl12

Unnamed: 0,third_party_lib_caller,classified_callee,dot_file_name,unique_package_count
0,CoulusCoelib,Blesh,changewifistate,1
1,CoulusCoelib,Blesh,useCoarseLocation,1
2,CoulusCoelib,Blesh,useFineLocation,1
3,PredicIO,NewRelic,useCoarseLocation,1
4,PredicIO,NewRelic,useFineLocation,1
5,XMode,Placed,accesswifistate,1
6,XMode,Placed,useCoarseLocation,2
7,XMode,Placed,useFineLocation,2
8,inmarket,TwitterMoPub,useCoarseLocation,1
9,inmarket,TwitterMoPub,useFineLocation,1


In [28]:
tpl_to_tpl = identified_df[(identified_df['unknown_lib_caller'] & identified_df['unknown_lib_callee'])  & (identified_df['classified_caller'] != identified_df['classified_callee'])]
tpl_to_tpl12 = tpl_to_tpl.groupby(['classified_caller', 'classified_callee', 'dot_file_name'])['package_name'].nunique().reset_index(name='unique_package_count')
tpl_to_tpl12

Unnamed: 0,classified_caller,classified_callee,dot_file_name,unique_package_count
0,AdColony,Unity3dAds,useCoarseLocation,5
1,AdColony,Unity3dAds,useFineLocation,5
2,AddAptTr,AmazonAdvertisement,accesswifistate,6
3,AddAptTr,Inmobi,accesswifistate,2
4,AddAptTr,Inmobi,useCoarseLocation,2
...,...,...,...,...
136,Vdopia,Vungle,useFineLocation,1
137,ironSource,Inmobi,useCoarseLocation,26
138,ironSource,Inmobi,useFineLocation,26
139,ironSource,Vungle,useCoarseLocation,7


In [27]:


# Find interactions where known third-party libraries call unknown libraries and vice versa
known_to_unknown = df[df['known_third_party_caller'] & df['unknown_lib_callee']]
unknown_to_known = df[df['unknown_lib_caller'] & df['known_third_party_callee']]


KeyError: 'known_third_party_caller'

In [100]:
identified_df[identified_df['known_third_party_callee'] & identified_df['unknown_lib_caller']]

Unnamed: 0,Caller,Callee,Type,Caller_sdk,Callee_sdk,System API Interaction,file_hash,package_name,dot_file_name,app_domain,is_frist_party,third_party_lib_caller,third_party_lib_callee,classified_caller,classified_callee,known_third_party_caller,known_third_party_callee,unknown_lib_caller,unknown_lib_callee
21181,Lcom/urbanairship,Lcom/gimbal,Cross-library,urbanairship,gimbal,,2BFA6696E29916710D426FA0501294EB935F6B19034F05...,com.verveacu.verveacu,useCoarseLocation,com.verveacu,False,,Gimbal,Urbanairship,Gimbal,False,True,True,False
21193,Lcom/urbanairship/gimbal/GimbalAdapter;->enabl...,Lcom/gimbal,Cross-library,urbanairship,gimbal,,2BFA6696E29916710D426FA0501294EB935F6B19034F05...,com.verveacu.verveacu,useCoarseLocation,com.verveacu,False,,Gimbal,Urbanairship,Gimbal,False,True,True,False
21195,Lcom/urbanairship/gimbal/GimbalAdapter;->stop(),Lcom/gimbal,Cross-library,urbanairship,gimbal,,2BFA6696E29916710D426FA0501294EB935F6B19034F05...,com.verveacu.verveacu,useCoarseLocation,com.verveacu,False,,Gimbal,Urbanairship,Gimbal,False,True,True,False
21236,Lcom/urbanairship,Lcom/gimbal,Cross-library,urbanairship,gimbal,,2BFA6696E29916710D426FA0501294EB935F6B19034F05...,com.verveacu.verveacu,useFineLocation,com.verveacu,False,,Gimbal,Urbanairship,Gimbal,False,True,True,False
21248,Lcom/urbanairship/gimbal/GimbalAdapter;->enabl...,Lcom/gimbal,Cross-library,urbanairship,gimbal,,2BFA6696E29916710D426FA0501294EB935F6B19034F05...,com.verveacu.verveacu,useFineLocation,com.verveacu,False,,Gimbal,Urbanairship,Gimbal,False,True,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1342735,Lcom/urbanairship,Lcom/gimbal,Cross-library,urbanairship,gimbal,,5B0B25BE66B3916FEABB202B8A94211370F3496DDFBE9D...,com.footlocker.europe.uk,useCoarseLocation,com.footlocker,False,,Gimbal,Urbanairship,Gimbal,False,True,True,False
1342740,Lcom/urbanairship/gimbal/AirshipReadyReceiver;...,Lcom/gimbal,Cross-library,urbanairship,gimbal,,5B0B25BE66B3916FEABB202B8A94211370F3496DDFBE9D...,com.footlocker.europe.uk,useCoarseLocation,com.footlocker,False,,Gimbal,Urbanairship,Gimbal,False,True,True,False
1342807,Lcom/urbanairship,Lcom/gimbal,Cross-library,urbanairship,gimbal,,5B0B25BE66B3916FEABB202B8A94211370F3496DDFBE9D...,com.footlocker.europe.uk,useFineLocation,com.footlocker,False,,Gimbal,Urbanairship,Gimbal,False,True,True,False
1342812,Lcom/urbanairship/gimbal/AirshipReadyReceiver;...,Lcom/gimbal,Cross-library,urbanairship,gimbal,,5B0B25BE66B3916FEABB202B8A94211370F3496DDFBE9D...,com.footlocker.europe.uk,useFineLocation,com.footlocker,False,,Gimbal,Urbanairship,Gimbal,False,True,True,False


In [94]:
identified_df[(identified_df['third_party_lib_caller'].notna() & identified_df['third_party_lib_callee'].notna()) & (identified_df['third_party_lib_caller'] != identified_df['third_party_lib_callee'])]

Unnamed: 0,Caller,Callee,Type,Caller_sdk,Callee_sdk,System API Interaction,file_hash,package_name,dot_file_name,app_domain,is_frist_party,third_party_lib_caller,third_party_lib_callee,classified_caller,classified_callee,known_third_party_caller,known_third_party_callee,unknown_lib_caller,unknown_lib_callee
11030,Lcom/radiusnetworks/transmission/Advertisement...,Lorg/altbeacon,Cross-library,radiusnetworks,altbeacon,,0DCA113F30DC7C249C4C1D477FBA064AE91DF1D8E0381C...,com.droid.mobile.idlehourgolfandcountryclub,useBluetoothAdvertise,com.droid,False,RadiusNetworks,Altbeacon,RadiusNetworks,AltBeacon,True,True,False,False
11034,Lcom/radiusnetworks/transmission/Advertisement...,Lorg/altbeacon,Cross-library,radiusnetworks,altbeacon,,0DCA113F30DC7C249C4C1D477FBA064AE91DF1D8E0381C...,com.droid.mobile.idlehourgolfandcountryclub,useBluetoothAdvertise,com.droid,False,RadiusNetworks,Altbeacon,RadiusNetworks,AltBeacon,True,True,False,False
18406,Lcom/radiusnetworks/transmission/Advertisement...,Lorg/altbeacon,Cross-library,radiusnetworks,altbeacon,,0032DE9C5E4C421519CDE9DCC1FE5E4AD8EC3D62031864...,com.droid.mobile.tiroasegno,useBluetoothAdvertise,com.droid,False,RadiusNetworks,Altbeacon,RadiusNetworks,AltBeacon,True,True,False,False
18407,Lcom/radiusnetworks/transmission/Advertisement...,Lorg/altbeacon,Cross-library,radiusnetworks,altbeacon,,0032DE9C5E4C421519CDE9DCC1FE5E4AD8EC3D62031864...,com.droid.mobile.tiroasegno,useBluetoothAdvertise,com.droid,False,RadiusNetworks,Altbeacon,RadiusNetworks,AltBeacon,True,True,False,False
23570,Lcom/radiusnetworks/transmission/Advertisement...,Lorg/altbeacon,Cross-library,radiusnetworks,altbeacon,,0729DBB4DB24E61CFFA0897FA161E041C77F08EF9DB96A...,com.droid.mobile.torontoclub,useBluetoothAdvertise,com.droid,False,RadiusNetworks,Altbeacon,RadiusNetworks,AltBeacon,True,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1328890,Lcom/radiusnetworks/transmission/Advertisement...,Lorg/altbeacon,Cross-library,radiusnetworks,altbeacon,,0890E942F154593A84478B6E7F53AB19CA074189F7A674...,com.droid.mobile.sanvicenteclub,useBluetoothAdvertise,com.droid,False,RadiusNetworks,Altbeacon,RadiusNetworks,AltBeacon,True,True,False,False
1328891,Lcom/radiusnetworks/transmission/Advertisement...,Lorg/altbeacon,Cross-library,radiusnetworks,altbeacon,,0890E942F154593A84478B6E7F53AB19CA074189F7A674...,com.droid.mobile.sanvicenteclub,useBluetoothAdvertise,com.droid,False,RadiusNetworks,Altbeacon,RadiusNetworks,AltBeacon,True,True,False,False
1332375,Lcom/radiusnetworks/transmission/Advertisement...,Lorg/altbeacon,Cross-library,radiusnetworks,altbeacon,,0303FE0E35FB92A459B0976FA1DB7E01EE1BFFC5CA81E3...,com.droid.mobile.travispointecountryclub,useBluetoothAdvertise,com.droid,False,RadiusNetworks,Altbeacon,RadiusNetworks,AltBeacon,True,True,False,False
1332379,Lcom/radiusnetworks/transmission/Advertisement...,Lorg/altbeacon,Cross-library,radiusnetworks,altbeacon,,0303FE0E35FB92A459B0976FA1DB7E01EE1BFFC5CA81E3...,com.droid.mobile.travispointecountryclub,useBluetoothAdvertise,com.droid,False,RadiusNetworks,Altbeacon,RadiusNetworks,AltBeacon,True,True,False,False


In [101]:
identified_df

Unnamed: 0,Caller,Callee,Type,Caller_sdk,Callee_sdk,System API Interaction,file_hash,package_name,dot_file_name,app_domain,is_frist_party,third_party_lib_caller,third_party_lib_callee,classified_caller,classified_callee,known_third_party_caller,known_third_party_callee,unknown_lib_caller,unknown_lib_callee
193,Lcom/smaato/sdk,LOooO0o/OooOOO/OooO00o,Cross-library,smaato,OooOOO,,6EE044C69352CD07CD34EC591D84F8F985FFF8989D8C75...,com.adjust.volume.booster.go,useCoarseLocation,com.adjust,False,,,Smaato,,False,False,True,False
195,LOooO0o/OooOOO/OooO00o,Lcom/smaato/sdk,Cross-library,OooOOO,smaato,,6EE044C69352CD07CD34EC591D84F8F985FFF8989D8C75...,com.adjust.volume.booster.go,useCoarseLocation,com.adjust,False,,,,Smaato,False,False,False,True
201,LOooO0o/OooOOO/OooO00o/o0Oo0oo/OooO0OO;->inter...,Lcom/smaato/sdk,Cross-library,OooOOO,smaato,,6EE044C69352CD07CD34EC591D84F8F985FFF8989D8C75...,com.adjust.volume.booster.go,useCoarseLocation,com.adjust,False,,,,Smaato,False,False,False,True
202,LOooO0o/OooOOOo/OooO00o/OoooO00/OooO0o;->run(),Lcom/verizon/ads,Cross-library,OooOOOo,verizon,,6EE044C69352CD07CD34EC591D84F8F985FFF8989D8C75...,com.adjust.volume.booster.go,useCoarseLocation,com.adjust,False,,,,VerizonAds,False,False,False,True
204,LOooO0o/OooOOO/OooO00o/o00000oo/OooO0o0/OooO0o...,Lcom/smaato/sdk,Cross-library,OooOOO,smaato,,6EE044C69352CD07CD34EC591D84F8F985FFF8989D8C75...,com.adjust.volume.booster.go,useCoarseLocation,com.adjust,False,,,,Smaato,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1347912,Lcom/braintreepayments,Lcom/cardinalcommerce,Cross-library,braintreepayments,cardinalcommerce,,002F85F2078DD126675416BD5985ED3C45E0CFCB307458...,cota.plus,useCoarseLocation,cota.plus,False,,,,,False,False,False,False
1348554,Lcom/braintreepayments,Llib/android/paypal,Cross-library,braintreepayments,android,,002F85F2078DD126675416BD5985ED3C45E0CFCB307458...,cota.plus,useFineLocation,cota.plus,False,,,,,False,False,False,False
1348555,Lcom/braintreepayments,Lcom/cardinalcommerce,Cross-library,braintreepayments,cardinalcommerce,,002F85F2078DD126675416BD5985ED3C45E0CFCB307458...,cota.plus,useFineLocation,cota.plus,False,,,,,False,False,False,False
1349198,Lcom/braintreepayments,Lcom/cardinalcommerce,Cross-library,braintreepayments,cardinalcommerce,,002F85F2078DD126675416BD5985ED3C45E0CFCB307458...,cota.plus,accesswifistate,cota.plus,False,,,,,False,False,False,False


In [81]:
df[(df['Caller'].str.contains('colocator', case=False) | df['Callee'].str.contains('colocator', case=False))]

Unnamed: 0,Caller,Callee,Type,Caller_sdk,Callee_sdk,System API Interaction,file_hash,package_name,dot_file_name,app_domain,is_frist_party,third_party_lib_caller,third_party_lib_callee,classified_caller,classified_callee,known_third_party_caller,known_third_party_callee,unknown_lib_caller,unknown_lib_callee
40254,Lcom/gimbal/internal/communication/services/Co...,Lnet/crowdconnected/androidcolocator,Cross-library,gimbal,crowdconnected,,0BB1C733736DB71971A199D9157DFF10E91FBAE24157F7...,fm.golive.festivalinterceltiquelorient3bc398d9,useCoarseLocation,fm.golive,False,Gimbal,Colocater,Gimbal,Colocator,True,True,False,False
40260,Lcom/gimbal/internal/push/PushHandlerService;-...,Lnet/crowdconnected/androidcolocator,Cross-library,gimbal,crowdconnected,,0BB1C733736DB71971A199D9157DFF10E91FBAE24157F7...,fm.golive.festivalinterceltiquelorient3bc398d9,useCoarseLocation,fm.golive,False,Gimbal,Colocater,Gimbal,Colocator,True,True,False,False
40267,Lcom/gimbal,Lnet/crowdconnected/androidcolocator,Cross-library,gimbal,crowdconnected,,0BB1C733736DB71971A199D9157DFF10E91FBAE24157F7...,fm.golive.festivalinterceltiquelorient3bc398d9,useCoarseLocation,fm.golive,False,Gimbal,Colocater,Gimbal,Colocator,True,True,False,False
40269,Lcom/gimbal/internal/push/FcmListenerService;-...,Lnet/crowdconnected/androidcolocator,Cross-library,gimbal,crowdconnected,,0BB1C733736DB71971A199D9157DFF10E91FBAE24157F7...,fm.golive.festivalinterceltiquelorient3bc398d9,useCoarseLocation,fm.golive,False,Gimbal,Colocater,Gimbal,Colocator,True,True,False,False
40271,Lcom/gimbal/internal,Lnet/crowdconnected/androidcolocator,Cross-library,gimbal,crowdconnected,,0BB1C733736DB71971A199D9157DFF10E91FBAE24157F7...,fm.golive.festivalinterceltiquelorient3bc398d9,useCoarseLocation,fm.golive,False,Gimbal,Colocater,Gimbal,Colocator,True,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1255241,Lcom/gimbal/proximity/core/bluetooth/PendingIn...,Lnet/crowdconnected/androidcolocator,Cross-library,gimbal,crowdconnected,,016680555DA677244E43961272223E9FBCD8B43886982F...,com.governorsballmusicfestival,accesswifistate,com.governorsballmusicfestival,False,Gimbal,Colocater,Gimbal,Colocator,True,True,False,False
1255243,Lcom/gimbal/internal/service/GimbalService;->o...,Lnet/crowdconnected/androidcolocator,Cross-library,gimbal,crowdconnected,,016680555DA677244E43961272223E9FBCD8B43886982F...,com.governorsballmusicfestival,accesswifistate,com.governorsballmusicfestival,False,Gimbal,Colocater,Gimbal,Colocator,True,True,False,False
1255244,Lcom/gimbal/internal/push/PushHandlerService;-...,Lnet/crowdconnected/androidcolocator,Cross-library,gimbal,crowdconnected,,016680555DA677244E43961272223E9FBCD8B43886982F...,com.governorsballmusicfestival,accesswifistate,com.governorsballmusicfestival,False,Gimbal,Colocater,Gimbal,Colocator,True,True,False,False
1255253,Lnet/crowdconnected/androidcolocator,Lorg/altbeacon,Cross-library,crowdconnected,altbeacon,,016680555DA677244E43961272223E9FBCD8B43886982F...,com.governorsballmusicfestival,useBluetoothAdvertise,com.governorsballmusicfestival,False,Colocater,Altbeacon,Colocator,AltBeacon,True,True,False,False


In [82]:
df[(df['Caller'].str.contains('colocator', case=False) | df['Callee'].str.contains('colocator', case=False))]

Unnamed: 0,Caller,Callee,Type,Caller_sdk,Callee_sdk,System API Interaction,file_hash,package_name,dot_file_name,app_domain,is_frist_party,third_party_lib_caller,third_party_lib_callee,classified_caller,classified_callee,known_third_party_caller,known_third_party_callee,unknown_lib_caller,unknown_lib_callee
40254,Lcom/gimbal/internal/communication/services/Co...,Lnet/crowdconnected/androidcolocator,Cross-library,gimbal,crowdconnected,,0BB1C733736DB71971A199D9157DFF10E91FBAE24157F7...,fm.golive.festivalinterceltiquelorient3bc398d9,useCoarseLocation,fm.golive,False,Gimbal,Colocater,Gimbal,Colocator,True,True,False,False
40260,Lcom/gimbal/internal/push/PushHandlerService;-...,Lnet/crowdconnected/androidcolocator,Cross-library,gimbal,crowdconnected,,0BB1C733736DB71971A199D9157DFF10E91FBAE24157F7...,fm.golive.festivalinterceltiquelorient3bc398d9,useCoarseLocation,fm.golive,False,Gimbal,Colocater,Gimbal,Colocator,True,True,False,False
40267,Lcom/gimbal,Lnet/crowdconnected/androidcolocator,Cross-library,gimbal,crowdconnected,,0BB1C733736DB71971A199D9157DFF10E91FBAE24157F7...,fm.golive.festivalinterceltiquelorient3bc398d9,useCoarseLocation,fm.golive,False,Gimbal,Colocater,Gimbal,Colocator,True,True,False,False
40269,Lcom/gimbal/internal/push/FcmListenerService;-...,Lnet/crowdconnected/androidcolocator,Cross-library,gimbal,crowdconnected,,0BB1C733736DB71971A199D9157DFF10E91FBAE24157F7...,fm.golive.festivalinterceltiquelorient3bc398d9,useCoarseLocation,fm.golive,False,Gimbal,Colocater,Gimbal,Colocator,True,True,False,False
40271,Lcom/gimbal/internal,Lnet/crowdconnected/androidcolocator,Cross-library,gimbal,crowdconnected,,0BB1C733736DB71971A199D9157DFF10E91FBAE24157F7...,fm.golive.festivalinterceltiquelorient3bc398d9,useCoarseLocation,fm.golive,False,Gimbal,Colocater,Gimbal,Colocator,True,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1255241,Lcom/gimbal/proximity/core/bluetooth/PendingIn...,Lnet/crowdconnected/androidcolocator,Cross-library,gimbal,crowdconnected,,016680555DA677244E43961272223E9FBCD8B43886982F...,com.governorsballmusicfestival,accesswifistate,com.governorsballmusicfestival,False,Gimbal,Colocater,Gimbal,Colocator,True,True,False,False
1255243,Lcom/gimbal/internal/service/GimbalService;->o...,Lnet/crowdconnected/androidcolocator,Cross-library,gimbal,crowdconnected,,016680555DA677244E43961272223E9FBCD8B43886982F...,com.governorsballmusicfestival,accesswifistate,com.governorsballmusicfestival,False,Gimbal,Colocater,Gimbal,Colocator,True,True,False,False
1255244,Lcom/gimbal/internal/push/PushHandlerService;-...,Lnet/crowdconnected/androidcolocator,Cross-library,gimbal,crowdconnected,,016680555DA677244E43961272223E9FBCD8B43886982F...,com.governorsballmusicfestival,accesswifistate,com.governorsballmusicfestival,False,Gimbal,Colocater,Gimbal,Colocator,True,True,False,False
1255253,Lnet/crowdconnected/androidcolocator,Lorg/altbeacon,Cross-library,crowdconnected,altbeacon,,016680555DA677244E43961272223E9FBCD8B43886982F...,com.governorsballmusicfestival,useBluetoothAdvertise,com.governorsballmusicfestival,False,Colocater,Altbeacon,Colocator,AltBeacon,True,True,False,False


In [83]:
df[(df['Caller'].str.contains('coelib') | df['Callee'].str.contains('coelib'))]

Unnamed: 0,Caller,Callee,Type,Caller_sdk,Callee_sdk,System API Interaction,file_hash,package_name,dot_file_name,app_domain,is_frist_party,third_party_lib_caller,third_party_lib_callee,classified_caller,classified_callee,known_third_party_caller,known_third_party_callee,unknown_lib_caller,unknown_lib_callee
64844,Lcom/english_news_in_levels/MainActivity;->onD...,Lcoelib/c/couluslibrary,Cross-library,english_news_in_levels,c,,01AAAB2EBFA091BE48F659F6F0E2CF95E552AE88C4C07A...,com.newsinlevels,useCoarseLocation,com.newsinlevels,False,,CoulusCoelib,,CoulusCoelib,False,True,False,False
64855,Lcom/english_news_in_levels/MainActivity$a;->o...,Lcoelib/c/couluslibrary,Cross-library,english_news_in_levels,c,,01AAAB2EBFA091BE48F659F6F0E2CF95E552AE88C4C07A...,com.newsinlevels,useCoarseLocation,com.newsinlevels,False,,CoulusCoelib,,CoulusCoelib,False,True,False,False
64867,Lcom/english_news_in_levels/MainActivity;->onD...,Lcoelib/c/couluslibrary,Cross-library,english_news_in_levels,c,,01AAAB2EBFA091BE48F659F6F0E2CF95E552AE88C4C07A...,com.newsinlevels,useFineLocation,com.newsinlevels,False,,CoulusCoelib,,CoulusCoelib,False,True,False,False
64878,Lcom/english_news_in_levels/MainActivity$a;->o...,Lcoelib/c/couluslibrary,Cross-library,english_news_in_levels,c,,01AAAB2EBFA091BE48F659F6F0E2CF95E552AE88C4C07A...,com.newsinlevels,useFineLocation,com.newsinlevels,False,,CoulusCoelib,,CoulusCoelib,False,True,False,False
249430,Lluo/m,Lcoelib/c/couluslibrary,Cross-library,m,c,,F97CF99148F395EE6AF21F308D16F1DDCBEFB225A0C48A...,luo.speedometergps,useCoarseLocation,luo.speedometergps,False,,CoulusCoelib,,CoulusCoelib,False,True,False,False
249449,Lluo/m,Lcoelib/c/couluslibrary,Cross-library,m,c,,F97CF99148F395EE6AF21F308D16F1DDCBEFB225A0C48A...,luo.speedometergps,useFineLocation,luo.speedometergps,False,,CoulusCoelib,,CoulusCoelib,False,True,False,False
419271,Lcom/qiblacompass/MainActivity;->onDestroy(),Lcoelib/c/couluslibrary,Cross-library,qiblacompass,c,,E438B80085E04758A65EA98C5D0F3DEBC4DB1466B433E2...,com.prayertimes.ramadan.qiblacompass,useCoarseLocation,com.prayertimes,False,,CoulusCoelib,,CoulusCoelib,False,True,False,False
419294,Lcom/qiblacompass/MainActivity;->onCreate(),Lcoelib/c/couluslibrary,Cross-library,qiblacompass,c,,E438B80085E04758A65EA98C5D0F3DEBC4DB1466B433E2...,com.prayertimes.ramadan.qiblacompass,useCoarseLocation,com.prayertimes,False,,CoulusCoelib,,CoulusCoelib,False,True,False,False
419672,Lcom/qiblacompass/MainActivity;->onDestroy(),Lcoelib/c/couluslibrary,Cross-library,qiblacompass,c,,E438B80085E04758A65EA98C5D0F3DEBC4DB1466B433E2...,com.prayertimes.ramadan.qiblacompass,useFineLocation,com.prayertimes,False,,CoulusCoelib,,CoulusCoelib,False,True,False,False
419695,Lcom/qiblacompass/MainActivity;->onCreate(),Lcoelib/c/couluslibrary,Cross-library,qiblacompass,c,,E438B80085E04758A65EA98C5D0F3DEBC4DB1466B433E2...,com.prayertimes.ramadan.qiblacompass,useFineLocation,com.prayertimes,False,,CoulusCoelib,,CoulusCoelib,False,True,False,False


In [84]:
df = df[(df['third_party_lib_caller'].notna() & df['third_party_lib_callee'].notna()) & (df['third_party_lib_caller'] != df['third_party_lib_callee'])]

In [85]:
df

Unnamed: 0,Caller,Callee,Type,Caller_sdk,Callee_sdk,System API Interaction,file_hash,package_name,dot_file_name,app_domain,is_frist_party,third_party_lib_caller,third_party_lib_callee,classified_caller,classified_callee,known_third_party_caller,known_third_party_callee,unknown_lib_caller,unknown_lib_callee
11030,Lcom/radiusnetworks/transmission/Advertisement...,Lorg/altbeacon,Cross-library,radiusnetworks,altbeacon,,0DCA113F30DC7C249C4C1D477FBA064AE91DF1D8E0381C...,com.droid.mobile.idlehourgolfandcountryclub,useBluetoothAdvertise,com.droid,False,RadiusNetworks,Altbeacon,RadiusNetworks,AltBeacon,True,True,False,False
11034,Lcom/radiusnetworks/transmission/Advertisement...,Lorg/altbeacon,Cross-library,radiusnetworks,altbeacon,,0DCA113F30DC7C249C4C1D477FBA064AE91DF1D8E0381C...,com.droid.mobile.idlehourgolfandcountryclub,useBluetoothAdvertise,com.droid,False,RadiusNetworks,Altbeacon,RadiusNetworks,AltBeacon,True,True,False,False
18406,Lcom/radiusnetworks/transmission/Advertisement...,Lorg/altbeacon,Cross-library,radiusnetworks,altbeacon,,0032DE9C5E4C421519CDE9DCC1FE5E4AD8EC3D62031864...,com.droid.mobile.tiroasegno,useBluetoothAdvertise,com.droid,False,RadiusNetworks,Altbeacon,RadiusNetworks,AltBeacon,True,True,False,False
18407,Lcom/radiusnetworks/transmission/Advertisement...,Lorg/altbeacon,Cross-library,radiusnetworks,altbeacon,,0032DE9C5E4C421519CDE9DCC1FE5E4AD8EC3D62031864...,com.droid.mobile.tiroasegno,useBluetoothAdvertise,com.droid,False,RadiusNetworks,Altbeacon,RadiusNetworks,AltBeacon,True,True,False,False
23570,Lcom/radiusnetworks/transmission/Advertisement...,Lorg/altbeacon,Cross-library,radiusnetworks,altbeacon,,0729DBB4DB24E61CFFA0897FA161E041C77F08EF9DB96A...,com.droid.mobile.torontoclub,useBluetoothAdvertise,com.droid,False,RadiusNetworks,Altbeacon,RadiusNetworks,AltBeacon,True,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1328890,Lcom/radiusnetworks/transmission/Advertisement...,Lorg/altbeacon,Cross-library,radiusnetworks,altbeacon,,0890E942F154593A84478B6E7F53AB19CA074189F7A674...,com.droid.mobile.sanvicenteclub,useBluetoothAdvertise,com.droid,False,RadiusNetworks,Altbeacon,RadiusNetworks,AltBeacon,True,True,False,False
1328891,Lcom/radiusnetworks/transmission/Advertisement...,Lorg/altbeacon,Cross-library,radiusnetworks,altbeacon,,0890E942F154593A84478B6E7F53AB19CA074189F7A674...,com.droid.mobile.sanvicenteclub,useBluetoothAdvertise,com.droid,False,RadiusNetworks,Altbeacon,RadiusNetworks,AltBeacon,True,True,False,False
1332375,Lcom/radiusnetworks/transmission/Advertisement...,Lorg/altbeacon,Cross-library,radiusnetworks,altbeacon,,0303FE0E35FB92A459B0976FA1DB7E01EE1BFFC5CA81E3...,com.droid.mobile.travispointecountryclub,useBluetoothAdvertise,com.droid,False,RadiusNetworks,Altbeacon,RadiusNetworks,AltBeacon,True,True,False,False
1332379,Lcom/radiusnetworks/transmission/Advertisement...,Lorg/altbeacon,Cross-library,radiusnetworks,altbeacon,,0303FE0E35FB92A459B0976FA1DB7E01EE1BFFC5CA81E3...,com.droid.mobile.travispointecountryclub,useBluetoothAdvertise,com.droid,False,RadiusNetworks,Altbeacon,RadiusNetworks,AltBeacon,True,True,False,False


In [86]:
grouped_df = df.groupby('package_name').apply(lambda x: x.drop_duplicates(subset=['third_party_lib_caller', 'third_party_lib_callee'])).reset_index(drop=True)

# Count unique interactions for each third-party library caller
caller_interactions = grouped_df.groupby('third_party_lib_caller')['third_party_lib_callee'].nunique().reset_index(name='unique_callees')

# Count unique interactions for each third-party library callee
callee_interactions = grouped_df.groupby('third_party_lib_callee')['third_party_lib_caller'].nunique().reset_index(name='unique_callers')

# Combine the counts
interaction_stats = pd.merge(caller_interactions, callee_interactions, left_on='third_party_lib_caller', right_on='third_party_lib_callee', how='outer').fillna(0)
interaction_stats['total_unique_interactions'] = interaction_stats['unique_callees'] + interaction_stats['unique_callers']
interaction_stats = interaction_stats[['third_party_lib_caller', 'unique_callees', 'unique_callers', 'total_unique_interactions']]
interaction_stats

Unnamed: 0,third_party_lib_caller,unique_callees,unique_callers,total_unique_interactions
0,BeaconsInSpace,1.0,0.0,1.0
1,Colocater,1.0,1.0,2.0
2,Gimbal,1.0,3.0,4.0
3,MOCA,1.0,0.0,1.0
4,RadiusNetworks,1.0,0.0,1.0
5,SalesforceMarketingCloud,1.0,0.0,1.0
6,Singlespot,1.0,0.0,1.0
7,XMode,1.0,0.0,1.0
8,bazaarvoice,1.0,0.0,1.0
9,mParticle,1.0,0.0,1.0


In [87]:
grouped_df

Unnamed: 0,Caller,Callee,Type,Caller_sdk,Callee_sdk,System API Interaction,file_hash,package_name,dot_file_name,app_domain,is_frist_party,third_party_lib_caller,third_party_lib_callee,classified_caller,classified_callee,known_third_party_caller,known_third_party_callee,unknown_lib_caller,unknown_lib_callee
0,Lcom/salesforce/marketingcloud,Lorg/altbeacon,Cross-library,salesforce,altbeacon,,0AC4591BE200C4B6D583094B1DEBF982FFAD614DDA7AFB...,bitnet.hu.rossmann,useBluetoothScan,bitnet.hu,False,SalesforceMarketingCloud,Altbeacon,SalesforceMarketingCloud,AltBeacon,True,True,False,False
1,Lcom/mparticle,Lio/radar,Cross-library,mparticle,radar,,0E7FA98354BCADE983E63133EDBC851782533CFEAD6436...,ch.kaisin,useBluetoothScan,ch.kaisin,False,mParticle,Radar,mParticle,Radar,True,True,False,False
2,Lcom/innoquant/moca,Lorg/altbeacon,Cross-library,innoquant,altbeacon,,DDEDEB572125E4755E8F05A0A8B8047286670CE5B22E71...,cl.sodimac,useBluetoothScan,cl.sodimac,False,MOCA,Altbeacon,MOCA,AltBeacon,True,True,False,False
3,Lcom/salesforce/marketingcloud,Lorg/altbeacon,Cross-library,salesforce,altbeacon,,0D790C9091554A83FE62D176B8BFFAC2933AFE94E076ED...,com.CredemMobile,useBluetoothScan,com.CredemMobile,False,SalesforceMarketingCloud,Altbeacon,SalesforceMarketingCloud,AltBeacon,True,True,False,False
4,Lcom/salesforce/marketingcloud,Lorg/altbeacon,Cross-library,salesforce,altbeacon,,42B3CCE340ADCD6A4A04753571877FCBF8C9729F4D9591...,com.abz.app,useBluetoothScan,com.abz,False,SalesforceMarketingCloud,Altbeacon,SalesforceMarketingCloud,AltBeacon,True,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
212,Lcom/mparticle,Lio/radar,Cross-library,mparticle,radar,,0A0442E089DC447379F79C457EC880F361350883188EBD...,is.tommis,useBluetoothScan,is.tommis,False,mParticle,Radar,mParticle,Radar,True,True,False,False
213,Lcom/salesforce/marketingcloud,Lorg/altbeacon,Cross-library,salesforce,altbeacon,,338DEC539B6EE3C362ECD5021567C59CCC8322A99D5655...,jp.tokyodisneyresort.portalapp,useBluetoothScan,jp.tokyodisneyresort,False,SalesforceMarketingCloud,Altbeacon,SalesforceMarketingCloud,AltBeacon,True,True,False,False
214,Lcom/salesforce/marketingcloud,Lorg/altbeacon,Cross-library,salesforce,altbeacon,,A0E2CAAE04E2F87CEA2887C2B10AB6ECFF081BA37B9393...,net.cloudhms.booking.vinpearl,useBluetoothScan,net.cloudhms,False,SalesforceMarketingCloud,Altbeacon,SalesforceMarketingCloud,AltBeacon,True,True,False,False
215,Lcom/gimbal/internal/communication/services/Co...,Lnet/crowdconnected/androidcolocator,Cross-library,gimbal,crowdconnected,,0CAFF12AF09593D01E54C37E86FE2D354FBAA0CCF01784...,nl.downtherabbithole.festival2015,useCoarseLocation,nl.downtherabbithole,False,Gimbal,Colocater,Gimbal,Colocator,True,True,False,False


In [67]:
grouped_df = df.groupby('package_name').apply(lambda x: x.drop_duplicates(subset=['third_party_lib_caller', 'third_party_lib_callee'])).reset_index(drop=True)

# List unique interactions for each third-party library caller
caller_interactions = grouped_df.groupby('third_party_lib_caller')['third_party_lib_callee'].unique().reset_index()
caller_interactions['unique_interactions_count'] = caller_interactions['third_party_lib_callee'].apply(len)
caller_interactions

Unnamed: 0,third_party_lib_caller,third_party_lib_callee,unique_interactions_count
0,BeaconsInSpace,[Altbeacon],1
1,MOCA,[Altbeacon],1
2,RadiusNetworks,[Altbeacon],1
3,SalesforceMarketingCloud,[Altbeacon],1
4,Singlespot,[Altbeacon],1
5,XMode,[Altbeacon],1
6,bazaarvoice,[Gimbal],1
7,mParticle,[Radar],1
8,roximity,"[Gimbal, Altbeacon]",2
9,ubudu,[Altbeacon],1


In [63]:
df[df['Caller_sdk'] == 'openx']

Unnamed: 0,Caller,Callee,Type,Caller_sdk,Callee_sdk,System API Interaction,file_hash,package_name,dot_file_name,app_domain,is_frist_party,third_party_lib_caller,third_party_lib_callee,classified_caller,classified_callee,known_third_party_caller,known_third_party_callee,unknown_lib_caller,unknown_lib_callee
96641,Lcom/openx,Lcom/kochava/android,Cross-library,openx,kochava,,FDA1614184B70838AD5A680527C5337B7D61F9576A265C...,com.jacobsmedia.nwpr,accesswifistate,com.jacobsmedia,False,,kochava,OpenX,Kochava,False,True,True,False
267065,Lcom/openx,Lcom/kochava/android,Cross-library,openx,kochava,,9A4464578CFF01F4905D24787AB17095086C9A491EDAD4...,com.jacobsmedia.lma91x,accesswifistate,com.jacobsmedia,False,,kochava,OpenX,Kochava,False,True,True,False
561924,Lcom/openx,Lcom/kochava/android,Cross-library,openx,kochava,,97FE560E5E78A3D979463E98A260DE9FA3BF08C9F6976E...,com.jacapps.wbns,accesswifistate,com.jacapps,False,,kochava,OpenX,Kochava,False,True,True,False
665370,Lcom/openx,Lcom/kochava/android,Cross-library,openx,kochava,,723A729B6209BC53FFDF9B196EE1410F4E9AECF6774910...,com.jacobsmedia.wsb,accesswifistate,com.jacobsmedia,False,,kochava,OpenX,Kochava,False,True,True,False
824857,Lcom/openx/view/tp/adapters/interstitials/InMo...,Lcom/inmobi,Cross-library,openx,inmobi,,2E05DC900331938FF5FF6C1CBF3A43B77F6886312C0B24...,com.intentsoftware.solitaire.free__aat_google,useCoarseLocation,com.intentsoftware,False,,,OpenX,Inmobi,False,False,True,True
824879,Lcom/openx/view/tp/adapters/banners/InMobiBann...,Lcom/inmobi,Cross-library,openx,inmobi,,2E05DC900331938FF5FF6C1CBF3A43B77F6886312C0B24...,com.intentsoftware.solitaire.free__aat_google,useCoarseLocation,com.intentsoftware,False,,,OpenX,Inmobi,False,False,True,True
824922,Lcom/openx/view/tp/adapters/interstitials/InMo...,Lcom/inmobi,Cross-library,openx,inmobi,,2E05DC900331938FF5FF6C1CBF3A43B77F6886312C0B24...,com.intentsoftware.solitaire.free__aat_google,useFineLocation,com.intentsoftware,False,,,OpenX,Inmobi,False,False,True,True
824944,Lcom/openx/view/tp/adapters/banners/InMobiBann...,Lcom/inmobi,Cross-library,openx,inmobi,,2E05DC900331938FF5FF6C1CBF3A43B77F6886312C0B24...,com.intentsoftware.solitaire.free__aat_google,useFineLocation,com.intentsoftware,False,,,OpenX,Inmobi,False,False,True,True
824970,Lcom/openx/core,Lcom/kochava/android,Cross-library,openx,kochava,,2E05DC900331938FF5FF6C1CBF3A43B77F6886312C0B24...,com.intentsoftware.solitaire.free__aat_google,accesswifistate,com.intentsoftware,False,,kochava,OpenX,Kochava,False,True,True,False
825009,Lcom/openx/view/tp/adapters/interstitials/InMo...,Lcom/inmobi,Cross-library,openx,inmobi,,2E05DC900331938FF5FF6C1CBF3A43B77F6886312C0B24...,com.intentsoftware.solitaire.free__aat_google,accesswifistate,com.intentsoftware,False,,,OpenX,Inmobi,False,False,True,True


## Cross lib in general PII Leak

In [33]:
df = pii_leak[pii_leak['Type'] == 'Cross-library']
cleaned_df = clean_dataframe(df)
matches_df = get_matches(cleaned_df)
identified_df = identify_xlib_interactions(matches_df)

In [34]:
beacon_to_beacon = identified_df[((identified_df['known_third_party_callee'] & identified_df['known_third_party_caller']) | (identified_df['known_third_party_caller'] & identified_df['known_third_party_callee']))& 
(identified_df['third_party_lib_caller'] != identified_df['third_party_lib_callee'])]
beacon_to_beaconpii = beacon_to_beacon.groupby(['third_party_lib_caller', 'third_party_lib_callee', 'dot_file_name'])['package_name'].nunique().reset_index(name='unique_package_count')
beacon_to_beaconpii

Unnamed: 0,third_party_lib_caller,third_party_lib_callee,dot_file_name,unique_package_count
0,Colocater,Altbeacon,bluetooth_advertise,30
1,Colocater,Gimbal,readLocation,4
2,Gimbal,Colocater,access_coarse_location,4
3,Gimbal,Colocater,access_fine_location,4
4,Gimbal,Colocater,nearby_wifi_devices,4
5,Gimbal,Colocater,readLocation,4
6,MOCA,Altbeacon,bluetooth_scan,1
7,Proximiio,indooratlas,readLocation,2
8,RadiusNetworks,Altbeacon,bluetooth_advertise,150
9,SalesforceMarketingCloud,Altbeacon,bluetooth_scan,15


In [35]:
tpl_to_beacon = identified_df[((identified_df['known_third_party_callee'] & identified_df['unknown_lib_caller']) & (identified_df['third_party_lib_callee'] != identified_df['classified_caller']))]
tpl_to_beaconpii = tpl_to_beacon.groupby(['classified_caller', 'third_party_lib_callee', 'dot_file_name'])['package_name'].nunique().reset_index(name='unique_package_count')
tpl_to_beaconpii

Unnamed: 0,classified_caller,third_party_lib_callee,dot_file_name,unique_package_count
0,Geniee,Altbeacon,bluetooth_scan,3
1,Glympse,roximity,access_coarse_location,2
2,InLoco,roximity,access_coarse_location,2
3,NativeX,kochava,nearby_wifi_devices,5
4,OpenX,kochava,get_accounts,3
5,OpenX,kochava,nearby_wifi_devices,7
6,Pushwoosh,roximity,access_coarse_location,2
7,ShallWeAD,Gimbal,access_coarse_location,2
8,ShallWeAD,Gimbal,access_fine_location,2
9,ShallWeAD,Gimbal,nearby_wifi_devices,2


In [36]:
beacon_to_tpl = identified_df[(identified_df['known_third_party_caller'] & identified_df['unknown_lib_callee']) & (identified_df['third_party_lib_caller'] != identified_df['classified_callee'])]
beacon_to_tplpii = beacon_to_tpl.groupby(['third_party_lib_caller', 'classified_callee', 'dot_file_name'])['package_name'].nunique().reset_index(name='unique_package_count')
beacon_to_tplpii

Unnamed: 0,third_party_lib_caller,classified_callee,dot_file_name,unique_package_count
0,Altbeacon,Dynatrace,readLocation,6
1,Colocater,GoogleCrashLytics,body_sensors,13
2,CoulusCoelib,Blesh,access_coarse_location,1
3,CoulusCoelib,Blesh,access_fine_location,1
4,CoulusCoelib,Blesh,nearby_wifi_devices,1
5,CoulusCoelib,Blesh,readLocation,1
6,Demdex,Dynatrace,readLocation,23
7,Exponea,Dynatrace,readLocation,1
8,LeanPlum,CleverTap,readLocation,1
9,LeanPlum,CleverTap,read_phone_state,1


In [37]:
tpl_to_tpl = identified_df[(identified_df['unknown_lib_caller'] & identified_df['unknown_lib_callee'])  & (identified_df['classified_caller'] != identified_df['classified_callee'])]
tpl_to_tplpii = tpl_to_tpl.groupby(['classified_caller', 'classified_callee', 'dot_file_name'])['package_name'].nunique().reset_index(name='unique_package_count')
tpl_to_tplpii

Unnamed: 0,classified_caller,classified_callee,dot_file_name,unique_package_count
0,AdColony,Unity3dAds,access_coarse_location,5
1,AdColony,Unity3dAds,access_fine_location,5
2,AdColony,Unity3dAds,body_sensors,2
3,AdColony,Unity3dAds,readLocation,5
4,AdMarvel,AdColony,readLocation,1
...,...,...,...,...
382,ironSource,Mintegral,read_phone_state,6
383,ironSource,Vungle,access_coarse_location,9
384,ironSource,Vungle,access_fine_location,9
385,ironSource,Vungle,readLocation,9


## Plotting the network graph beacon to beacon interactions

In [38]:
beacon_to_beaconpii.rename(columns={'third_party_lib_caller': 'caller', 'third_party_lib_callee': 'callee'}, inplace=True)
beacon_to_beaconpii

Unnamed: 0,caller,callee,dot_file_name,unique_package_count
0,Colocater,Altbeacon,bluetooth_advertise,30
1,Colocater,Gimbal,readLocation,4
2,Gimbal,Colocater,access_coarse_location,4
3,Gimbal,Colocater,access_fine_location,4
4,Gimbal,Colocater,nearby_wifi_devices,4
5,Gimbal,Colocater,readLocation,4
6,MOCA,Altbeacon,bluetooth_scan,1
7,Proximiio,indooratlas,readLocation,2
8,RadiusNetworks,Altbeacon,bluetooth_advertise,150
9,SalesforceMarketingCloud,Altbeacon,bluetooth_scan,15


In [39]:
beacon_to_beacon9.rename(columns={'third_party_lib_caller': 'caller', 'third_party_lib_callee': 'callee'}, inplace=True)
beacon_to_beacon9

Unnamed: 0,caller,callee,dot_file_name,unique_package_count
0,BeaconsInSpace,Altbeacon,useBluetoothBasic,1
1,Gimbal,Colocater,accesswifistate,4
2,Gimbal,Colocater,useBluetoothBasic,4
3,Gimbal,Colocater,useCoarseLocation,4
4,Gimbal,Colocater,useFineLocation,4
5,MOCA,Altbeacon,useBluetoothAdmin,1
6,MOCA,Altbeacon,useBluetoothBasic,1
7,SalesforceMarketingCloud,Altbeacon,useBluetoothAdmin,15
8,SalesforceMarketingCloud,Altbeacon,useBluetoothBasic,15
9,Singlespot,Altbeacon,useBluetoothAdmin,1


In [40]:
beacon_to_beacon12.rename(columns={'third_party_lib_caller': 'caller', 'third_party_lib_callee': 'callee'}, inplace=True)
beacon_to_beacon12

Unnamed: 0,caller,callee,dot_file_name,unique_package_count
0,BeaconsInSpace,Altbeacon,useBluetoothConnect,1
1,Colocater,Altbeacon,useBluetoothAdvertise,30
2,Gimbal,Colocater,accesswifistate,4
3,Gimbal,Colocater,useCoarseLocation,4
4,Gimbal,Colocater,useFineLocation,4
5,MOCA,Altbeacon,useBluetoothScan,1
6,RadiusNetworks,Altbeacon,useBluetoothAdvertise,150
7,SalesforceMarketingCloud,Altbeacon,useBluetoothScan,15
8,Singlespot,Altbeacon,useBluetoothScan,1
9,XMode,Altbeacon,useBluetoothConnect,1


In [41]:
beacon_to_beacon = pd.concat([beacon_to_beacon9, beacon_to_beacon12, beacon_to_beaconpii])
standardized_mapping = {
    'useBluetoothBasic': 'bluetooth_basic',
    'useBluetoothAdvertise': 'bluetooth_advertise',
    'bluetooth_advertise': 'bluetooth_advertise',
    'useCoarseLocation': 'access_coarse_location',
    'useFineLocation': 'access_fine_location',
    'useBluetoothAdmin': 'bluetooth_admin',
    'useBluetoothConnect': 'bluetooth_connect',
    'useBluetoothScan': 'bluetooth_scan',
    'accesswifistate': 'access_wifi_state',
    'readLocation': 'read_location',
    'access_coarse_location': 'access_coarse_location',
    'access_fine_location': 'access_fine_location',
    'nearby_wifi_devices': 'nearby_wifi_devices',
    'bluetooth_scan': 'bluetooth_scan'
}

# Apply the mapping to the 'dot_file_name' column
beacon_to_beacon['dot_file_name'] = beacon_to_beacon['dot_file_name'].map(standardized_mapping)
beacon_to_beacon.drop_duplicates(subset=['caller', 'callee', 'dot_file_name'], inplace=True)

In [42]:
beacon_to_beacon.to_csv('beacon_to_beacon.csv', index=False)

In [43]:
beacon_to_beaconpii.rename(columns={'third_party_lib_caller': 'caller', 'third_party_lib_callee': 'callee'}, inplace=True)
beacon_to_beaconpii
beacon_to_beacon9.rename(columns={'third_party_lib_caller': 'caller', 'third_party_lib_callee': 'callee'}, inplace=True)
beacon_to_beacon9
beacon_to_beacon12.rename(columns={'third_party_lib_caller': 'caller', 'third_party_lib_callee': 'callee'}, inplace=True)
beacon_to_beacon12
beacon_to_beacon = pd.concat([beacon_to_beacon9, beacon_to_beacon12, beacon_to_beaconpii])
standardized_mapping = {
    'useBluetoothBasic': 'bluetooth_basic',
    'useBluetoothAdvertise': 'bluetooth_advertise',
    'bluetooth_advertise': 'bluetooth_advertise',
    'useCoarseLocation': 'access_coarse_location',
    'useFineLocation': 'access_fine_location',
    'useBluetoothAdmin': 'bluetooth_admin',
    'useBluetoothConnect': 'bluetooth_connect',
    'useBluetoothScan': 'bluetooth_scan',
    'accesswifistate': 'access_wifi_state',
    'readLocation': 'read_location',
    'access_coarse_location': 'access_coarse_location',
    'access_fine_location': 'access_fine_location',
    'nearby_wifi_devices': 'nearby_wifi_devices',
    'bluetooth_scan': 'bluetooth_scan'
}

# Apply the mapping to the 'dot_file_name' column
beacon_to_beacon['dot_file_name'] = beacon_to_beacon['dot_file_name'].map(standardized_mapping)
beacon_to_beacon.drop_duplicates(subset=['caller', 'callee', 'dot_file_name'], inplace=True)
beacon_to_beacon.to_csv('beacon_to_beacon.csv', index=False)
from pyvis.network import Network
import networkx as nx

df = beacon_to_beacon

# Define colors for each interaction type
interaction_colors = {
    'bluetooth_basic': 'blue',
    'access_wifi_state': 'green',
    'access_coarse_location': 'red',
    'access_fine_location': 'purple',
    'bluetooth_admin': 'orange',
    'bluetooth_connect': 'yellow',
    'bluetooth_scan': 'pink',
    'bluetooth_advertise': 'brown',
    'read_location': 'cyan',
    'nearby_wifi_devices': 'magenta'
    # Add more interactions and their colors here if needed
}

# Create a directed graph
G = nx.DiGraph()

# Add nodes and edges to the graph
for _, row in df.iterrows():
    caller = row['caller']
    callee = row['callee']
    interaction = row['dot_file_name']
    count = row['unique_package_count']
    
    # Add nodes
    G.add_node(caller)
    G.add_node(callee)
    
    # Add edges with interaction as label, color, and count as weight
    G.add_edge(caller, callee, label=interaction, weight=count, color=interaction_colors[interaction])

# Create a PyVis network
net = Network(notebook=True, directed=True)

# Convert networkx graph to PyVis graph
for node in G.nodes:
    net.add_node(node, label=node, title=node, color='lightblue', size=20)

for edge in G.edges(data=True):
    caller, callee, data = edge
    net.add_edge(caller, callee, title=f"{data['label']}: {data['weight']} interactions", color=data['color'], arrowStrikethrough=True)


net.set_options("""
{
  "layout": {
    "hierarchical": {
      "enabled": true,
      "direction": "UD",
      "sortMethod": "hubsize",
      "levelSeparation": 100
    }
  },
  "nodes": {
    "color": {
      "background": "lightblue",
      "border": "black"
    },
    "font": {
      "color": "black",
      "size": 15
    }
  },
  "edges": {
    "smooth": false,
    "arrows": {
      "to": {
        "enabled": true,
        "type": "arrow"
      }
    },
    "width": 3 
  }
}
""")


# Show the graph
net.show('beacon_to_beacon_interaction.html')


beacon_to_beacon_interaction.html


## Plotting the network graph beacon to tpl interactions

In [48]:
tpl_to_beacon9.rename(columns={'classified_caller': 'caller', 'third_party_lib_callee': 'callee'}, inplace=True)

In [49]:
beacon_to_tpl9.rename(columns={'third_party_lib_caller': 'caller', 'classified_callee': 'callee'}, inplace=True)

In [50]:
tpl_to_beacon12.rename(columns={'classified_caller': 'caller', 'third_party_lib_callee': 'callee'}, inplace=True)

In [51]:
beacon_to_tpl12.rename(columns={'third_party_lib_caller': 'caller', 'classified_callee': 'callee'}, inplace=True)

In [53]:
tpl_to_beaconpii.rename(columns={'classified_caller': 'caller', 'third_party_lib_callee': 'callee'}, inplace=True)

In [55]:
beacon_to_tplpii.rename(columns={'third_party_lib_caller': 'caller', 'classified_callee': 'callee'}, inplace=True)

In [181]:
beacon_to_tpl = pd.concat([beacon_to_tpl9, beacon_to_tpl12, tpl_to_beacon9, tpl_to_beacon12, beacon_to_tplpii, tpl_to_beaconpii])

In [313]:
beacon_to_tpl.to_csv('beacon_to_tpl.csv', index=False)

In [302]:
tt = pd.concat([beacon_to_tpl9, beacon_to_tpl12, beacon_to_tplpii])
tt.nunique()

caller                  22
callee                  16
dot_file_name           13
unique_package_count    13
dtype: int64

In [310]:
len(set(list(beacon_to_beacon['callee']) + list(beacon_to_beacon['caller'])))

17

In [311]:
beacon_to_tpl.dot_file_name.unique()

array(['change_wifi_state', 'access_coarse_location',
       'access_fine_location', 'bluetooth_basic', 'access_wifi_state',
       'record_audio', 'bluetooth_admin', 'bluetooth_scan',
       'read_location', 'nearby_wifi_devices', 'read_phone_state'],
      dtype=object)

In [183]:
standardized_mapping = {
    'useBluetoothBasic': 'bluetooth_basic',
    'useBluetoothAdvertise': 'bluetooth_advertise',
    'bluetooth_advertise': 'bluetooth_advertise',
    'useCoarseLocation': 'access_coarse_location',
    'useFineLocation': 'access_fine_location',
    'useBluetoothAdmin': 'bluetooth_admin',
    'useBluetoothConnect': 'bluetooth_connect',
    'useBluetoothScan': 'bluetooth_scan',
    'accesswifistate': 'access_wifi_state',
    'readLocation': 'read_location',
    'access_coarse_location': 'access_coarse_location',
    'access_fine_location': 'access_fine_location',
    'nearby_wifi_devices': 'nearby_wifi_devices',
    'bluetooth_scan': 'bluetooth_scan',
    'body_sensors': 'body_sensors',
    'get_accounts': 'get_accounts',
    'record_audio': 'record_audio',
    'read_phone_state': 'read_phone_state',
    'Record audio': 'record_audio',
    'changewifistate': 'change_wifi_state'
}

# Apply the mapping to the 'dot_file_name' column
beacon_to_tpl['dot_file_name'] = beacon_to_tpl['dot_file_name'].map(standardized_mapping)
beacon_to_tpl.drop_duplicates(subset=['caller', 'callee', 'dot_file_name'], inplace=True)

In [184]:
beacon_to_tpl = beacon_to_tpl[beacon_to_tpl['dot_file_name'] != 'body_sensors']
beacon_to_tpl = beacon_to_tpl[beacon_to_tpl['dot_file_name'] != 'get_accounts']

In [185]:
from pyvis.network import Network
import networkx as nx

df = beacon_to_tpl

# Define colors for each interaction type
# interaction_colors = {
#     'bluetooth_basic': 'blue',
#     'access_wifi_state': 'green',
#     'access_coarse_location': 'red',
#     'access_fine_location': 'purple',
#     'bluetooth_admin': 'orange',
#     'bluetooth_connect': 'yellow',
#     'bluetooth_scan': 'pink',
#     'bluetooth_advertise': 'brown',
#     'read_location': 'cyan',
#     'nearby_wifi_devices': 'magenta',
#     'change_wifi_state': 'lightblue',
#     'body_sensors': 'lightgreen',
#     'get_accounts': 'lightred',
#     'record_audio': 'lightpurple',
#     'read_phone_state': 'lightorange'
#     # Add more interactions and their colors here if needed
# }

interaction_colors = {
    'bluetooth_basic': 'blue',
    'access_wifi_state': 'green',
    'access_coarse_location': 'red',
    'access_fine_location': 'red',
    'bluetooth_admin': 'blue',
    'bluetooth_connect': 'blue',
    'bluetooth_scan': 'blue',
    'bluetooth_advertise': 'blue',
    'read_location': 'red',
    'nearby_wifi_devices': 'green',
    'change_wifi_state': 'green',
    # 'body_sensors': 'lightgreen',
    # 'get_accounts': 'lightred',
    'record_audio': 'yellow',
    'read_phone_state': 'orange'
    # Add more interactions and their colors here if needed
}

# Create a directed graph
G = nx.DiGraph()

# Add nodes and edges to the graph
for _, row in df.iterrows():
    caller = row['caller']
    callee = row['callee']
    interaction = row['dot_file_name']
    count = row['unique_package_count']
    
    # Add nodes
    G.add_node(caller)
    G.add_node(callee)
    
    # Add edges with interaction as label, color, and count as weight
    G.add_edge(caller, callee, label=interaction, weight=count, color=interaction_colors[interaction])

# Create a PyVis network
net = Network(notebook=True, directed=True)

# Convert networkx graph to PyVis graph
for node in G.nodes:
    net.add_node(node, label=node, title=node, color='lightblue', size=20)

for edge in G.edges(data=True):
    caller, callee, data = edge
    net.add_edge(caller, callee, title=f"{data['label']}: {data['weight']} interactions", color=data['color'], arrowStrikethrough=True)


net.set_options("""
{
  "layout": {
    "hierarchical": {
      "enabled": true,
      "direction": "UD",
      "sortMethod": "hubsize",
      "levelSeparation": 80
    }
  },
  "nodes": {
    "color": {
      "background": "lightblue",
      "border": "black"
    },
    "font": {
      "color": "black",
      "size": 15
    }
  },
  "edges": {
    "smooth": false,
    "arrows": {
      "to": {
        "enabled": true,
        "type": "arrow"
      }
    },
    "width": 3 
  }
}
""")


# Show the graph
net.show('beacon_to_tpl_interaction.html')


beacon_to_tpl_interaction.html


In [295]:
beacon_to_tpl

Unnamed: 0,caller,callee,dot_file_name,unique_package_count,b-to-b
0,CoulusCoelib,Blesh,change_wifi_state,1,0
1,CoulusCoelib,Blesh,access_coarse_location,1,0
2,CoulusCoelib,Blesh,access_fine_location,1,0
3,LeanPlum,CleverTap,bluetooth_basic,1,0
4,PredicIO,NewRelic,access_coarse_location,1,0
...,...,...,...,...,...
10,ShallWeAD,Gimbal,read_location,2,0
14,Urbanairship,Gimbal,nearby_wifi_devices,75,0
15,Urbanairship,Gimbal,read_location,75,0
18,Verve,Gimbal,read_location,6,0


In [186]:
beacon_to_beacon['b-to-b'] = 1
beacon_to_beacon

Unnamed: 0,caller,callee,dot_file_name,unique_package_count,b-to-b
0,BeaconsInSpace,Altbeacon,bluetooth_basic,1,1
1,Gimbal,Colocater,access_wifi_state,4,1
2,Gimbal,Colocater,bluetooth_basic,4,1
3,Gimbal,Colocater,access_coarse_location,4,1
4,Gimbal,Colocater,access_fine_location,4,1
5,MOCA,Altbeacon,bluetooth_admin,1,1
6,MOCA,Altbeacon,bluetooth_basic,1,1
7,SalesforceMarketingCloud,Altbeacon,bluetooth_admin,15,1
8,SalesforceMarketingCloud,Altbeacon,bluetooth_basic,15,1
9,Singlespot,Altbeacon,bluetooth_admin,1,1


In [187]:
beacon_to_tpl['b-to-b'] = 0
beacon_to_tpl

Unnamed: 0,caller,callee,dot_file_name,unique_package_count,b-to-b
0,CoulusCoelib,Blesh,change_wifi_state,1,0
1,CoulusCoelib,Blesh,access_coarse_location,1,0
2,CoulusCoelib,Blesh,access_fine_location,1,0
3,LeanPlum,CleverTap,bluetooth_basic,1,0
4,PredicIO,NewRelic,access_coarse_location,1,0
...,...,...,...,...,...
10,ShallWeAD,Gimbal,read_location,2,0
14,Urbanairship,Gimbal,nearby_wifi_devices,75,0
15,Urbanairship,Gimbal,read_location,75,0
18,Verve,Gimbal,read_location,6,0


In [191]:
beacon_graph = pd.concat([beacon_to_beacon, beacon_to_tpl])

In [268]:
mapping = {
    'AnvatoAGoogleCompany': 'Anvato',
    'GoogleCrashLytics': 'CrashLytics',
    'PilgrimbyFoursquare': 'Pilgrim',
    'SalesforceMarketingCloud': 'Salesforce MC'
}

beacon_graph['callee'] = beacon_graph['callee'].replace(mapping)
beacon_graph['caller'] = beacon_graph['caller'].replace(mapping)

In [256]:
beacon_graph.drop_duplicates(subset=['caller', 'callee', 'dot_file_name'], inplace=True)

In [266]:
beacon_graph[beacon_graph['callee'] == 'SalesforceMarketingCloud']

Unnamed: 0,caller,callee,dot_file_name,unique_package_count,b-to-b,dot_file_name_normalized


In [241]:
interaction_mapping = {
    'bluetooth_basic': 'bluetooth',
    'bluetooth_admin': 'bluetooth',
    'bluetooth_connect': 'bluetooth',
    'bluetooth_scan': 'bluetooth',
    'bluetooth_advertise': 'bluetooth',
    
    'access_wifi_state': 'wifi',
    'change_wifi_state': 'wifi',
    'nearby_wifi_devices': 'wifi',
    
    'access_coarse_location': 'location',
    'access_fine_location': 'location',
    'read_location': 'location',
    
    'record_audio': 'record_audio',
    'read_phone_state': 'read_phone_state'
}

beacon_graph['dot_file_name_normalized'] = beacon_graph['dot_file_name'].replace(interaction_mapping)
beacon_graph.drop_duplicates(subset=['caller', 'callee', 'dot_file_name_normalized'], inplace=True)


In [222]:
from pyvis.network import Network
import networkx as nx

# Assuming df is the DataFrame you're working with, and it has the column 'b-to-b'
df = beacon_graph[beacon_graph['dot_file_name'] != 'read_phone_state']

# Define colors for each interaction type
interaction_colors = {
    'bluetooth_basic': 'blue',
    'access_wifi_state': 'green',
    'access_coarse_location': 'red',
    'access_fine_location': 'red',
    'bluetooth_admin': 'blue',
    'bluetooth_connect': 'blue',
    'bluetooth_scan': 'blue',
    'bluetooth_advertise': 'blue',
    'read_location': 'red',
    'nearby_wifi_devices': 'green',
    'change_wifi_state': 'green',
    'record_audio': 'yellow',
    'read_phone_state': 'orange'
}

# Create a directed graph
G = nx.DiGraph()

# Add nodes and edges to the graph
for _, row in df.iterrows():
    caller = row['caller']
    callee = row['callee']
    interaction = row['dot_file_name']
    count = row['unique_package_count']
    b_to_b = row['b-to-b']
    
    # Add nodes
    G.add_node(caller)
    G.add_node(callee)
    
    # Add edges with interaction as label, color, and count as weight
    G.add_edge(caller, callee, label=interaction, weight=count, color=interaction_colors[interaction], b_to_b=b_to_b)

# Create a PyVis network
net = Network(notebook=True, directed=True, cdn_resources='remote')

# Convert networkx graph to PyVis graph
for node in G.nodes:
    net.add_node(node, label=node, title=node, color='gold', size=20)

for edge in G.edges(data=True):
    caller, callee, data = edge
    edge_style = 'dotted' if data['b_to_b'] == 1 else 'solid'  # Use dotted line if b_to_b is 1
    net.add_edge(
        caller, 
        callee, 
        title=f"{data['label']}: {data['weight']} interactions", 
        color=data['color'], 
        arrowStrikethrough=True,
        dashes=(edge_style == 'dotted')  # Set the edge to be dashed if needed
    )

net.set_options("""
{
  "layout": {
    "hierarchical": {
      "enabled": true,
      "direction": "UD",
      "sortMethod": "hubsize",
      "levelSeparation": 180,
      "nodeSpacing": 100 
    }
  },
  "nodes": {
    "color": {
      "background": "green",
      "border": "black"
    },
    "font": {
      "color": "black",
      "size": 25
    }
  },
  "edges": {
    "smooth": true,
    "arrows": {
      "to": {
        "enabled": true,
        "type": "arrow"
      }
    },
    "width": 3 
  },
  "physics": {
    "hierarchicalRepulsion": {
      "nodeDistance": 200,
      "centralGravity": 0.0,
      "springLength": 150
    },
    "minVelocity": 0.25
  }
}
""")

# Show the graph
net.show('beacon_interaction.html')


beacon_interaction.html


In [293]:
from pyvis.network import Network
import networkx as nx

# Assuming df is the DataFrame you're working with, and it has the column 'b-to-b'
df = beacon_graph

# Define colors for each interaction type
interaction_colors = {
    'bluetooth': 'blue',
    'wifi': 'green',
    'location': 'red',
    'record_audio': 'yellow',
    'read_phone_state': 'orange'
}

# Create a directed graph
G = nx.MultiDiGraph()  # Use MultiDiGraph to allow multiple edges

# Add nodes and edges to the graph
for _, row in df.iterrows():
    caller = row['caller']
    callee = row['callee']
    interaction = row['dot_file_name_normalized']
    count = row['unique_package_count']
    b_to_b = row['b-to-b']
    
    # Add nodes
    G.add_node(caller)
    G.add_node(callee)
    
    # Add edges with interaction as label, color, and count as weight
    G.add_edge(caller, callee, key=interaction, label=interaction, weight=count, color=interaction_colors[interaction], b_to_b=b_to_b)

# Create a PyVis network
net = Network(notebook=True, directed=True, cdn_resources='remote')

# Convert networkx graph to PyVis graph
for node in G.nodes:
    net.add_node(node, label=node, title=node, color='#663a82', size=25)

# Use different curvature values for edges
curvatures = [0.15, 0.25, 0.35]

for idx, edge in enumerate(G.edges(data=True, keys=True)):
    caller, callee, interaction, data = edge
    edge_style = 'dotted' if data['b_to_b'] == 1 else 'solid'
    
    # Cycle through different curvature values to avoid overlap
    smooth_type = "curvedCW" if idx % 2 == 0 else "curvedCCW"
    curvature = curvatures[idx % len(curvatures)]
    
    net.add_edge(
        caller, 
        callee, 
        title=f"{data['label']}: {data['weight']} interactions", 
        color=data['color'], 
        arrowStrikethrough=True,
        dashes=(edge_style == 'dotted'),  # Set the edge to be dashed if needed
        smooth={"type": smooth_type, "roundness": curvature}  # Apply variable curvature
    )

net.set_options("""
{
  "layout": {
    "hierarchical": {
      "enabled": true,
      "direction": "UD",
      "sortMethod": "hubsize",
      "levelSeparation": 210,
      "nodeSpacing": 100 
    }
  },
  "nodes": {
    "color": {
      "background": "green",
      "border": "black"
    },
    "font": {
      "color": "black",
      "size": 30
    }
  },
  "edges": {
    "smooth": true,
    "arrows": {
      "to": {
        "enabled": true,
        "type": "arrow"
      }
    },
    "width": 3 
  },
  "physics": {
    "hierarchicalRepulsion": {
      "nodeDistance": 230
    },
    "minVelocity": 0.25
  }
}
""")

# Show the graph
net.show('beacon_interaction.html')


beacon_interaction.html


## network graph for tpl to tpl

In [90]:
tpl_to_tpl9.rename(columns={'classified_caller': 'caller', 'classified_callee': 'callee'}, inplace=True)

In [92]:
tpl_to_tpl12.rename(columns={'classified_caller': 'caller', 'classified_callee': 'callee'}, inplace=True)

In [93]:
tpl_to_tplpii.rename(columns={'classified_caller': 'caller', 'classified_callee': 'callee'}, inplace=True)

In [106]:
tpl_to_tpl = pd.concat([tpl_to_tpl9, tpl_to_tpl12, tpl_to_tplpii])
tpl_to_tpl

Unnamed: 0,caller,callee,dot_file_name,unique_package_count
0,AdColony,Unity3dAds,useCoarseLocation,5
1,AdColony,Unity3dAds,useFineLocation,5
2,AddAptTr,AmazonAdvertisement,accesswifistate,6
3,AddAptTr,Inmobi,accesswifistate,2
4,AddAptTr,Inmobi,useCoarseLocation,2
...,...,...,...,...
382,ironSource,Mintegral,read_phone_state,6
383,ironSource,Vungle,access_coarse_location,9
384,ironSource,Vungle,access_fine_location,9
385,ironSource,Vungle,readLocation,9


In [107]:
tpl_to_tpl.dot_file_name.unique()

array(['useCoarseLocation', 'useFineLocation', 'accesswifistate',
       'useBluetoothBasic', 'useBluetoothConnect',
       'access_coarse_location', 'access_fine_location', 'body_sensors',
       'readLocation', 'nearby_wifi_devices', 'read_phone_state',
       'readCellLocation', 'bluetooth_connect', 'get_accounts'],
      dtype=object)

In [108]:
standardized_mapping = {
    'useBluetoothBasic': 'bluetooth_basic',
    'useBluetoothAdvertise': 'bluetooth_advertise',
    'bluetooth_advertise': 'bluetooth_advertise',
    'useCoarseLocation': 'access_coarse_location',
    'useFineLocation': 'access_fine_location',
    'useBluetoothAdmin': 'bluetooth_admin',
    'useBluetoothConnect': 'bluetooth_connect',
    'useBluetoothScan': 'bluetooth_scan',
    'accesswifistate': 'access_wifi_state',
    'readLocation': 'read_location',
    'access_coarse_location': 'access_coarse_location',
    'access_fine_location': 'access_fine_location',
    'nearby_wifi_devices': 'nearby_wifi_devices',
    'bluetooth_scan': 'bluetooth_scan',
    'body_sensors': 'body_sensors',
    'get_accounts': 'get_accounts',
    'record_audio': 'record_audio',
    'read_phone_state': 'read_phone_state',
    'Record audio': 'record_audio',
    'changewifistate': 'change_wifi_state',
    'readCellLocation': 'read_cell_location',
    'bluetooth_connect': 'bluetooth_connect'

}

# Apply the mapping to the 'dot_file_name' column
tpl_to_tpl['dot_file_name'] = tpl_to_tpl['dot_file_name'].map(standardized_mapping)
tpl_to_tpl.drop_duplicates(subset=['caller', 'callee', 'dot_file_name'], inplace=True)

In [109]:
tpl_to_tpl[tpl_to_tpl['dot_file_name'].isna()]

Unnamed: 0,caller,callee,dot_file_name,unique_package_count


In [110]:
tpl_to_tpl

Unnamed: 0,caller,callee,dot_file_name,unique_package_count
0,AdColony,Unity3dAds,access_coarse_location,5
1,AdColony,Unity3dAds,access_fine_location,5
2,AddAptTr,AmazonAdvertisement,access_wifi_state,6
3,AddAptTr,Inmobi,access_wifi_state,2
4,AddAptTr,Inmobi,access_coarse_location,2
...,...,...,...,...
378,ironSource,Fyber,read_phone_state,13
381,ironSource,Inmobi,read_location,30
382,ironSource,Mintegral,read_phone_state,6
385,ironSource,Vungle,read_location,9


In [113]:
from pyvis.network import Network
import networkx as nx

df = tpl_to_tpl

# Define colors for each interaction type
interaction_colors = {
    'bluetooth_basic': 'blue',
    'access_wifi_state': 'green',
    'access_coarse_location': 'red',
    'access_fine_location': 'purple',
    'bluetooth_admin': 'orange',
    'bluetooth_connect': 'yellow',
    'bluetooth_scan': 'pink',
    'bluetooth_advertise': 'brown',
    'read_location': 'cyan',
    'nearby_wifi_devices': 'magenta',
    'change_wifi_state': 'lightblue',
    'body_sensors': 'lightgreen',
    'get_accounts': 'lightred',
    'record_audio': 'lightpurple',
    'read_phone_state': 'lightorange',
    'read_cell_location': 'lightpink',
    'bluetooth_connect': 'lightyellow'
    # Add more interactions and their colors here if needed
}

# Create a directed graph
G = nx.DiGraph()

# Add nodes and edges to the graph
for _, row in df.iterrows():
    caller = row['caller']
    callee = row['callee']
    interaction = row['dot_file_name']
    count = row['unique_package_count']
    
    # Add nodes
    G.add_node(caller)
    G.add_node(callee)
    
    # Add edges with interaction as label, color, and count as weight
    G.add_edge(caller, callee, label=interaction, weight=count, color=interaction_colors[interaction])

# Create a PyVis network
net = Network(notebook=True, directed=True)

# Convert networkx graph to PyVis graph
for node in G.nodes:
    net.add_node(node, label=node, title=node, color='lightblue', size=20)

for edge in G.edges(data=True):
    caller, callee, data = edge
    net.add_edge(caller, callee, title=f"{data['label']}: {data['weight']} interactions", color=data['color'], arrowStrikethrough=True)


net.set_options("""
{
  "layout": {
    "hierarchical": {
      "enabled": true,
      "direction": "UD",
      "sortMethod": "hubsize",
      "levelSeparation": 80
    }
  },
  "nodes": {
    "color": {
      "background": "lightblue",
      "border": "black"
    },
    "font": {
      "color": "black",
      "size": 15
    }
  },
  "edges": {
    "smooth": false,
    "arrows": {
      "to": {
        "enabled": true,
        "type": "arrow"
      }
    },
    "width": 3 
  }
}
""")


# Show the graph
net.show('tpl_to_tpl_interaction.html')


tpl_to_tpl_interaction.html
