In [208]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from urllib.parse import urljoin

# URL der Webseite mit der Tabelle
url_guild_1 = 'https://swgoh.gg/g/0f2MDLSaTn-4gRsbvd8ReQ/'
url_guild_2 = 'https://swgoh.gg/g/Wvyx-znoTZyBHpvucmEdSQ/'

def get_territory_wars_characters(url):
    # HTTP-Anfrage senden und HTML-Inhalt abrufen
    response = requests.get(urljoin(url, 'omicrons/'))
    soup = BeautifulSoup(response.content, "html.parser")

    # Den Inhalt nach dem <h3>-Tag mit dem Text "Territory Wars" extrahieren
    territory_wars_tag = soup.find('h3', string='Territory Wars')
    if territory_wars_tag:
        next_element = territory_wars_tag.find_next_sibling()
        territory_wars_content = []
        while next_element and next_element.name != 'h3':
            territory_wars_content.append(next_element)
            next_element = next_element.find_next_sibling()
    else:
        territory_wars_content = None
        
    # Charaktereinträge aus der Kategorie "Territory Wars" finden
    filtered_entries = []
    for entry in territory_wars_content:
        # Finde alle <div>-Elemente mit der Klasse "zeta-char-list__item"
        char_entries = entry.find_all("div", class_="zeta-char-list__item")
        # Füge die gefundenen Einträge zur Liste filtered_entries hinzu
        filtered_entries.extend(char_entries)
        
    # Daten für jeden Charakter extrahieren
    char_data = []
    for entry in filtered_entries:
        char_name = entry.find("div", class_="zeta-char-list__character-name").text.strip()
        char_ability = entry.find("div", class_="zeta-char-list__ability-name").text.strip()
        char_omicrons = entry.find("div", class_="zeta-char-list__ability-type").text.strip()
        
        # Nur die Zahl der Omicrons extrahieren
        char_omicrons = char_omicrons.split()[0]  # Hier wird der Text "Omicrons" entfernt
        
        char_data.append({
            "Character Name": char_name,
            "Ability": char_ability,
            "Omicrons": char_omicrons
        })
        
    # DataFrame erstellen
    df = pd.DataFrame(char_data)

    return df

# Verwende die Funktion für beide Gilden
df_1_territory_wars = get_territory_wars_characters(url_guild_1)
df_2_territory_wars = get_territory_wars_characters(url_guild_2)

# Zusammenführen der gefilterten DataFrames basierend auf "Character Name" und "Ability"
combined_df_territory_wars = pd.merge(df_1_territory_wars, df_2_territory_wars, on=["Character Name", "Ability"], suffixes=("_Guild1", "_Guild2"))

# Sortiere den DataFrame nach "Character Name"
combined_df_territory_wars_sorted = combined_df_territory_wars.sort_values(by="Character Name")



combined_df_filtered = combined_df_territory_wars_sorted[
    (combined_df_territory_wars_sorted['Omicrons_Guild1'] != '0') |
    (combined_df_territory_wars_sorted['Omicrons_Guild2'] != '0')
]

# Zeige den DataFrame ohne Indexspalte an
print(combined_df_filtered.to_string(index=False))

                Character Name                       Ability Omicrons_Guild1 Omicrons_Guild2
         Ahsoka Tano (Fulcrum)                  Perseverance               7               4
     Boba Fett, Scion of Jango                  Dual Barrage               0               1
     Boba Fett, Scion of Jango                   Fett Legacy               1               1
     Boba Fett, Scion of Jango          Dangerous Reputation               1               1
                     Boss Nass            Boss of Otoh Gunga               0               1
                    Cal Kestis                I'm Persistent               1               2
                Captain Phasma                  Fire at Will              22              12
                   Chief Nebit               Raiding Parties               6               2
                 Darth Sidious                 Sadistic Glee               8               6
                      Droideka    Deflector Shield Generator          