### Finding Peers

In [52]:
import pandas as pd
import seaborn as sns
import sqlite3, hashlib, requests, json, uuid, os

# Collect to and query the DB for all of the peers
conn = sqlite3.connect('../test.db')
query = "SELECT * FROM peers;"

df = pd.read_sql_query(query,conn)

In [29]:
# Total Unique IPs
print(f"Total Unique IPs: {len(df.address.unique())} Total IPs: {len(df.address)}")

# Getting a df of unique IP+torrent combos only
## Hash the IP + torrent name concatenated
## Insert hash as own column
## Select only unique hashes

df["ip_tor_hash"] = df["address"] + df["torrent_name"]
df["ip_tor_hash"] = df["ip_tor_hash"].apply(lambda x: hashlib.md5(x.encode()).hexdigest())

u_df = df.drop_duplicates(subset=['ip_tor_hash']) # Unique df

Total Unique IPs: 209 Total IPs: 3292


In [30]:
# Check already created json files to see which IPs are already fetched
## This is currently slow and just so I do not blow through my API limits while building this thing

read_ips = []
ips = {}

for file in os.listdir( '.' ):
    if file.endswith( ".json" ):
        jfile = json.loads(open(file, "r").read())
        for ip in jfile:
            read_ips.append(ip)
            ips[ip] = jfile[ip]

In [31]:
# Get IP Address information from ipinfo.io
## e00976a117244e
## Get the responses into a larger object that we can iterate through after the queries

for ip in u_df.address:
    if ip not in read_ips:
        r = requests.get(f"https://ipinfo.io/{ip}?token=e00976a117244e")
        data = json.loads(r.text)
        ips[ip] = data
        #break
        

In [33]:
# Save our IPs to a file for now so we do not lose them and waste API calls on duplications
with open(f"ips_{str(uuid.uuid4())}.json", "w") as f:
    f.write(json.dumps(ips))


In [34]:
# Write a parser for the IP entries that returns a dictionary for the
# Calling function to then insert into the df
def ip_parser(ip):
    """
    Takes one IP from the ips dictionary that was created.
    ip.addr.is.here : {
        ip: ip.addr.is.here,
        county: etc....
    }
    """
    cleaned_ip = {
        "ip": "", "hostname": "", "city": "", 
        "region": "", "country": "", "loc": "",
        "org": "", "postal": "", "timezone": ""
        }
    
    for k,v in ip.items():
        cleaned_ip[k] = v
        
    return { cleaned_ip["ip"]: {
        "address": cleaned_ip["ip"],
        "hostname": cleaned_ip["hostname"],
        "city": cleaned_ip["city"],
        "region": cleaned_ip["region"],
        "country": cleaned_ip["country"],
        "loc": cleaned_ip["loc"],
        "org": cleaned_ip["org"],
        "postal": cleaned_ip["postal"],
        "timezone": cleaned_ip["timezone"]
        }
    }


In [35]:
# Enter default empty strings for missing values for data clarity. Convert into its own dataframe for insertion into main DF
clean_ip_list = {}
for ip in ips:    
    clean_ip_list[ip] = ip_parser(ips[ip])[ip]
    
ipdf = pd.DataFrame.from_dict(clean_ip_list).T

In [36]:
# Merge into a single df with u_df
mu_df = pd.merge(u_df, ipdf, on="address")

In [37]:
#mu_df.sample(8)

In [47]:
mu_df.columns

Index(['address', 'clientIsChoked', 'clientIsInterested', 'clientName',
       'flagStr', 'isDownloadingFrom', 'isEncrypted', 'isIncoming', 'isUTP',
       'isUploadingTo', 'peerIsChoked', 'peerIsInterested', 'port', 'progress',
       'rateToClient', 'rateToPeer', 'tag', 'date', 'torrent_name',
       'ip_tor_hash', 'hostname', 'city', 'region', 'country', 'loc', 'org',
       'postal', 'timezone'],
      dtype='object')

In [50]:
mu_df.address.value_counts()

40.117.119.130     2
128.0.183.14       2
108.12.224.55      2
110.175.89.172     1
5.135.244.34       1
                  ..
37.48.95.220       1
138.197.143.248    1
50.39.177.9        1
85.143.219.196     1
100.2.24.55        1
Name: address, Length: 209, dtype: int64

In [51]:
mu_df.loc[mu_df.address == "40.117.119.130"]

Unnamed: 0,address,clientIsChoked,clientIsInterested,clientName,flagStr,isDownloadingFrom,isEncrypted,isIncoming,isUTP,isUploadingTo,...,torrent_name,ip_tor_hash,hostname,city,region,country,loc,org,postal,timezone
91,40.117.119.130,True,False,Transmission 2.92,EX,False,True,False,False,False,...,Rocky-8.5-x86_64-dvd1,99dbe7989221b21b952d4f1effe630f1,,Hampden Sydney,Virginia,US,"37.3058,-78.5462",AS8075 Microsoft Corporation,23960,America/New_York
92,40.117.119.130,True,False,Transmission 2.92,,False,False,False,False,False,...,2021-10-30-raspios-bullseye-arm64-lite.zip,7543c3fb1f3210972d5e395f9e991048,,Hampden Sydney,Virginia,US,"37.3058,-78.5462",AS8075 Microsoft Corporation,23960,America/New_York


### Finding Torrent Stats