In [1]:
import pandas as pd
import numpy as np

# Loading the CSV data
def load_and_process_data(S6-UK-Bank-Customers.csv):
    # Reading the CSV file
    df = pd.read_csv(S6-UK-Bank-Customers.csv)
    
    # Cleaning data: removing any rows with missing values
    df = df.dropna()
    
    # Converting Balance to numeric, handling any potential errors
    df['Balance'] = pd.to_numeric(df['Balance'], errors='coerce')
    
    # Grouping by Region to get customer counts
    region_counts = df['Region'].value_counts()
    
    return df, region_counts

# Function to save processed data
def save_processed_data(region_counts, output_filename="region_counts.csv"):
    region_counts.to_csv(output_filename)
    return output_filename

if __name__ == "__main__":
    # Assuming the data file is available as 'S6-UK-Bank-Customers.csv'
    filename = "S6-UK-Bank-Customers.csv"
    df, region_counts = load_and_process_data(filename)
    output_file = save_processed_data(region_counts)
    print(f"Processed data saved to {output_file}")
    print("\nCustomer counts by region:")
    print(region_counts)

SyntaxError: invalid syntax (3961459493.py, line 5)

In [2]:
import pandas as pd
import numpy as np

# Loading the CSV data
def load_and_process_data(filename):  # Changed parameter to a proper variable name
    # Reading the CSV file
    df = pd.read_csv(filename)  # Use the parameter variable
    
    # Cleaning data: removing any rows with missing values
    df = df.dropna()
    
    # Converting Balance to numeric, handling any potential errors
    df['Balance'] = pd.to_numeric(df['Balance'], errors='coerce')
    
    # Grouping by Region to get customer counts
    region_counts = df['Region'].value_counts()
    
    return df, region_counts

# Function to save processed data
def save_processed_data(region_counts, output_filename="region_counts.csv"):
    region_counts.to_csv(output_filename)
    return output_filename

if __name__ == "__main__":
    # Assuming the data file is available as 'S6-UK-Bank-Customers.csv'
    filename = "S6-UK-Bank-Customers.csv"
    df, region_counts = load_and_process_data(filename)
    output_file = save_processed_data(region_counts)
    print(f"Processed data saved to {output_file}")
    print("\nCustomer counts by region:")
    print(region_counts)

Processed data saved to region_counts.csv

Customer counts by region:
Region
England             2159
Scotland            1124
Wales                520
Northern Ireland     211
Name: count, dtype: int64


In [3]:
import pandas as pd
import matplotlib.pyplot as plt

# Loading processed region counts
def load_region_counts(filename="region_counts.csv"):
    return pd.read_csv(filename, index_col=0, squeeze=True)

# Creating a bar chart for customers by region
def plot_customers_by_region(region_counts):
    plt.figure(figsize=(10, 6))
    region_counts.plot(kind='bar', color='skyblue')
    
    # Adding titles and labels
    plt.title('UK Bank Customers by Region', fontsize=14)
    plt.xlabel('Region', fontsize=12)
    plt.ylabel('Number of Customers', fontsize=12)
    
    # Adding value labels on top of bars
    for i, count in enumerate(region_counts):
        plt.text(i, count + 5, str(count), ha='center', fontsize=10)
    
    # Adjusting layout to prevent label cutoff
    plt.tight_layout()
    
    # Saving the plot
    plt.savefig('customers_by_region.png')
    plt.close()

if __name__ == "__main__":
    # Loading the processed region counts
    region_counts = load_region_counts()
    
    # Generating the visualization
    plot_customers_by_region(region_counts)
    print("Bar chart saved as 'customers_by_region.png'")

TypeError: read_csv() got an unexpected keyword argument 'squeeze'

In [4]:
import pandas as pd
import matplotlib.pyplot as plt

# Loading processed region counts
def load_region_counts(filename="region_counts.csv"):
    # Removed 'squeeze=True' and added .squeeze() method after reading the CSV
    return pd.read_csv(filename, index_col=0).squeeze()

# Creating a bar chart for customers by region
def plot_customers_by_region(region_counts):
    plt.figure(figsize=(10, 6))
    region_counts.plot(kind='bar', color='skyblue')
    
    # Adding titles and labels
    plt.title('UK Bank Customers by Region', fontsize=14)
    plt.xlabel('Region', fontsize=12)
    plt.ylabel('Number of Customers', fontsize=12)
    
    # Adding value labels on top of bars
    for i, count in enumerate(region_counts):
        plt.text(i, count + 5, str(count), ha='center', fontsize=10)
    
    # Adjusting layout to prevent label cutoff
    plt.tight_layout()
    
    # Saving the plot
    plt.savefig('customers_by_region.png')
    plt.close()

if __name__ == "__main__":
    # Loading the processed region counts
    region_counts = load_region_counts()
    
    # Generating the visualization
    plot_customers_by_region(region_counts)
    print("Bar chart saved as 'customers_by_region.png'")

Bar chart saved as 'customers_by_region.png'


In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
import contextily as ctx

# Loading processed region counts
def load_region_counts(filename="region_counts.csv"):
    return pd.read_csv(filename, index_col=0).squeeze()

# Creating a choropleth map for customers by region
def plot_uk_map_customers(region_counts):
    # Load UK shapefile (you may need to download this)
    # This uses the standard UK administrative boundaries
    uk_map = gpd.read_file(gpd.datasets.get_path('uk_regions'))
    
    # Ensure the region names match between your data and the shapefile
    # You might need to adjust this mapping based on your actual data
    # This is a simplified example
    uk_map['region_name'] = uk_map['name'].str.upper()
    
    # Merge the customer counts with the map data
    # Convert region_counts to DataFrame for merging
    region_counts_df = region_counts.reset_index()
    region_counts_df.columns = ['region_name', 'customer_count']
    
    # Make sure region names are in the same format
    region_counts_df['region_name'] = region_counts_df['region_name'].str.upper()
    
    # Merge the geodata with customer counts
    uk_map = uk_map.merge(region_counts_df, on='region_name', how='left')
    
    # Create the plot
    fig, ax = plt.subplots(1, 1, figsize=(15, 10))
    
    # Create a custom colormap from light to dark blue
    colors = ['#f7fbff', '#08306b']  # Light blue to dark blue
    cmap = LinearSegmentedColormap.from_list('custom_blue', colors, N=256)
    
    # Plot the map with customer counts determining the color
    uk_map.plot(column='customer_count', 
                ax=ax,
                legend=True,
                cmap=cmap,
                legend_kwds={'label': "Number of Customers",
                             'orientation': "vertical"})
    
    # Add basemap for context
    try:
        ctx.add_basemap(ax, crs=uk_map.crs.to_string())
    except:
        # If adding basemap fails, continue without it
        pass
    
    # Add title and remove axis
    ax.set_title('UK Bank Customers by Region', fontsize=16)
    ax.set_axis_off()
    
    # Add text labels with customer counts
    for idx, row in uk_map.iterrows():
        if pd.notna(row['customer_count']):
            # Get the centroid of each region to place the text
            centroid = row['geometry'].centroid
            ax.text(centroid.x, centroid.y, str(int(row['customer_count'])), 
                    ha='center', fontweight='bold', color='black')
    
    # Save the figure
    plt.savefig('uk_customers_map.png', dpi=300, bbox_inches='tight')
    plt.close()

if __name__ == "__main__":
    # Loading the processed region counts
    region_counts = load_region_counts()
    
    # Generating the visualization
    plot_uk_map_customers(region_counts)
    print("UK map visualization saved as 'uk_customers_map.png'")

In [6]:
plt.savefig('uk_customers_map.png', dpi=300, bbox_inches='tight')

<Figure size 640x480 with 0 Axes>