In [4]:
import pandas as pd

# Example article titles
titles = ["The Top 10 Tourist Attractions in France", "Mexico's Struggle With Drug Cartels", "China's Economic Growth in the 21st Century", "Discovering the Best Beaches in Australia"]

# Create a pandas dataframe with the article titles
df = pd.DataFrame({'Title': titles})

# List of countries to search for in the article titles
countries = ['France', 'Mexico', 'China', 'Australia']

# Loop through each country and check if it appears in the article title
for country in countries:
    # Create a new column in the dataframe indicating whether the country appears in the article title
    df[country] = df['Title'].apply(lambda x: 1 if country in x else 0)
    
# Print the resulting dataframe
df

Unnamed: 0,Title,France,Mexico,China,Australia
0,The Top 10 Tourist Attractions in France,1,0,0,0
1,Mexico's Struggle With Drug Cartels,0,1,0,0
2,China's Economic Growth in the 21st Century,0,0,1,0
3,Discovering the Best Beaches in Australia,0,0,0,1


In [9]:
import pandas as pd
import pycountry

# Example article titles
titles = ['10 Best Places to Visit in Japan', 
          'The Fascinating Culture of India', 
          'Exploring the Natural Beauty of Norway', 
          'Discovering the Hidden Gems of South Africa', 
          'The Rich History of Egypt', 
          'Adventures in the Canadian Wilderness']

# Create a pandas dataframe with the article titles
df = pd.DataFrame({'Title': titles})

# List of country names
countries = list(pycountry.countries)

# Find the country names in the article titles
for i, row in df.iterrows():
    for country in countries:
        if country.name.lower() in row['Title'].lower():
            df.at[i, country.name] = 1
            
# Fill NaN values with 0
df = df.fillna(0)

# Print the resulting dataframe
df


Unnamed: 0,Title,Japan,India,Norway,South Africa,Egypt
0,10 Best Places to Visit in Japan,1.0,0.0,0.0,0.0,0.0
1,The Fascinating Culture of India,0.0,1.0,0.0,0.0,0.0
2,Exploring the Natural Beauty of Norway,0.0,0.0,1.0,0.0,0.0
3,Discovering the Hidden Gems of South Africa,0.0,0.0,0.0,1.0,0.0
4,The Rich History of Egypt,0.0,0.0,0.0,0.0,1.0
5,Adventures in the Canadian Wilderness,0.0,0.0,0.0,0.0,0.0


In [29]:
import pandas as pd
import pycountry

# Read the csv file into a pandas dataframe
df = pd.read_csv('https://raw.githubusercontent.com/YusufAliOzkan/zotero-intelligence-bibliography/main/all_items.csv')

# List of country names
countries = list(pycountry.countries)

# Find the country names in the "title" column of the dataframe
found_countries = {}
for i, row in df.iterrows():
    for country in countries:
        if country.name.lower() in str(row['Title']).lower():
            if country.name in found_countries:
                found_countries[country.name] += 1
            else:
                found_countries[country.name] = 1

# Create a new dataframe containing the found countries and their counts
df_countries = pd.DataFrame({'Country': list(found_countries.keys()), 'Count': list(found_countries.values())})

# Print the resulting dataframe
print(df_countries)


        Country  Count
0       Ukraine     87
1        Angola      1
2       Belgium      2
3       Denmark      2
4     Australia     19
..          ...    ...
66  Philippines      1
67    Gibraltar      1
68        Nepal      1
69        Chile      1
70      Albania      1

[71 rows x 2 columns]


In [30]:
df_countries=df_countries.sort_values(by='Count', ascending=False)
df_countries

Unnamed: 0,Country,Count
0,Ukraine,87
6,Israel,27
12,Germany,27
31,Japan,20
4,Australia,19
...,...,...
9,Bangladesh,1
8,Sri Lanka,1
29,Ghana,1
51,Indonesia,1


In [31]:
df_countries.reset_index(drop=True)

Unnamed: 0,Country,Count
0,Ukraine,87
1,Israel,27
2,Germany,27
3,Japan,20
4,Australia,19
...,...,...
66,Bangladesh,1
67,Sri Lanka,1
68,Ghana,1
69,Indonesia,1


In [11]:
import pandas as pd
import pycountry
import re
import plotly.express as px

# Read the csv file into a pandas dataframe
df = pd.read_csv('https://raw.githubusercontent.com/YusufAliOzkan/zotero-intelligence-bibliography/main/all_items.csv')

# Dictionary to map non-proper country names to their proper names
country_map = {
    'british': 'UK',
    'great britain': 'UK',
    'america' : 'United States',
    'United States of America' : 'United States',
    'Soviet Union': 'Russia', 
    'american' : 'United States',
    'United States' : 'United States',
    'russian' : 'Russia'
    # Add more mappings as needed
}

# Find the country names in the "title" column of the dataframe
found_countries = {}
for i, row in df.iterrows():
    title = str(row['Title']).lower()
    for country in pycountry.countries:
        name = country.name.lower()
        if name in title or (name + 's') in title:  # Check for singular and plural forms of country names
            proper_name = country.name
            found_countries[proper_name] = found_countries.get(proper_name, 0) + 1
    for non_proper, proper in country_map.items():
        if non_proper in title:
            found_countries[proper] = found_countries.get(proper, 0) + title.count(non_proper)

# Create a new dataframe containing the found countries and their counts
df_countries = pd.DataFrame({'Country': list(found_countries.keys()), 'Count': list(found_countries.values())})

# Create a map using the plotly express library
fig = px.choropleth(df_countries, locations='Country', locationmode='country names', color='Count', 
                    title='Country mentions in titles', color_continuous_scale='Viridis')

# Display the map
fig.show()


In [12]:
df_countries=df_countries.sort_values(by='Count', ascending=False)
df_countries.reset_index(drop=True)

Unnamed: 0,Country,Count
0,UK,200
1,United States,168
2,Ukraine,87
3,Russia,55
4,Israel,27
...,...,...
68,Romania,1
69,North Macedonia,1
70,Libya,1
71,Bulgaria,1
