In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import json

In [2]:
# load the 500 images dataset 50 images at a time
# change the filepaths to fit your situation


f50 = open('/content/drive/MyDrive/advanced_geo_ai/500_results/small_50.json')
data50 = json.load(f50)

f100 = open('/content/drive/MyDrive/advanced_geo_ai/500_results/small_100.json')
data100 = json.load(f100)

f150 = open('/content/drive/MyDrive/advanced_geo_ai/500_results/small_150.json')
data150 = json.load(f150)

f200 = open('/content/drive/MyDrive/advanced_geo_ai/500_results/small_200.json')
data200 = json.load(f200)

f250 = open('/content/drive/MyDrive/advanced_geo_ai/500_results/small_250.json')
data250 = json.load(f250)

f300 = open('/content/drive/MyDrive/advanced_geo_ai/500_results/small_300.json')
data300 = json.load(f300)

f350 = open('/content/drive/MyDrive/advanced_geo_ai/500_results/small_350.json')
data350 = json.load(f350)

f400 = open('/content/drive/MyDrive/advanced_geo_ai/500_results/small_400.json')
data400 = json.load(f400)

f450 = open('/content/drive/MyDrive/advanced_geo_ai/500_results/small_450.json')
data450 = json.load(f450)

f500 = open('/content/drive/MyDrive/advanced_geo_ai/500_results/small_500.json')
data500 = json.load(f500)

In [3]:
## Since the GPS Coordinates are contained in the filenames, we use this helper function to extract them out
def extract_gps(filename):
    parts = filename.split('_')
    if len(parts) > 0:
        coords = parts[0].split(',')
        if len(coords) == 2:
            return [coords[0], coords[1]]
    return None, None



In [4]:
## This function is used to calculate the nature index and urban index for each image, and extract gps coords from filename,
## and put everything in one big list so we can convert everything into a pandas dataframe.


def calculate_proportions(rs):

  results = []

  # Specific numbers we're interested in.
  #These stands for the following labels {8: 'vegetation',9: 'terrain',10: 'sky',1: 'sidewalk', 18: 'bicycle'}
  nature = [8, 9, 10, 1, 18]

  for img in rs:

    arr = np.array(img['segmentaion'])

    arr = arr.flatten()

    flattened_arr = arr.flatten()

    # Get unique numbers and their counts
    unique_numbers, counts = np.unique(flattened_arr, return_counts=True)

    # Calculate proportions
    proportions = counts / flattened_arr.size

    # Create a dictionary to see each number and its proportion
    number_proportions = dict(zip(unique_numbers, proportions))


    # Filter counts for specific numbers
    specific_counts = counts[np.isin(unique_numbers, nature)]

    # filter counts for the rest
    the_rest_counts = counts[~np.isin(unique_numbers, nature)]

    # Calculate the sum of counts for specific numbers
    specific_sum = np.sum(specific_counts)

    # Calculate the sum of the counts for the rest
    the_rest_sum = np.sum(the_rest_counts)

    # Total elements in the array
    total_elements = flattened_arr.size

    # Calculate the proportion of specific numbers
    nature_proportion = specific_sum / total_elements

    # Calculate the proportion of the rest of the numbers
    urban_proportion = the_rest_sum / total_elements

    #extract the GPS coords for visualisation later on
    coords = extract_gps(img['filename'])

    results.append({'coords':coords, 'filename': img['filename'], 'proportions': number_proportions, 'nature':nature_proportion, 'urban': urban_proportion})

  return results


In [5]:



pro50 = calculate_proportions(data50)
pro100 = calculate_proportions(data100)
pro150 = calculate_proportions(data150)
pro200 = calculate_proportions(data200)
pro250 = calculate_proportions(data250)
pro300 = calculate_proportions(data300)
pro350 = calculate_proportions(data350)
pro400 = calculate_proportions(data400)
pro450 = calculate_proportions(data450)
pro500 = calculate_proportions(data500)

In [10]:
## This function converts into pandas dataframe
def as_pandas(rs):

  df = pd.DataFrame({
      'latitude': [item['coords'][0] for item in rs],
      'longitude': [item['coords'][1] for item in rs],
      'nature_index': [item['nature'] for item in rs],
      'urban_index': [item['urban'] for item in rs]
  })

  # Convert latitude and longitude to numeric types
  df['latitude'] = pd.to_numeric(df['latitude'])
  df['longitude'] = pd.to_numeric(df['longitude'])

  return df

In [11]:
pro50 = as_pandas(pro50)
pro100 = as_pandas(pro100)
pro150 = as_pandas(pro150)
pro200 = as_pandas(pro200)
pro250 = as_pandas(pro250)
pro300 = as_pandas(pro300)
pro350 = as_pandas(pro350)
pro400 = as_pandas(pro400)
pro450 = as_pandas(pro450)
pro500 = as_pandas(pro500)

In [16]:
## concatenate everything into one single pandas dataframe

final_ready_vis = pd.concat([pro50, pro100, pro150, pro200, pro250, pro300, pro350, pro400, pro450, pro500], ignore_index=True)


In [None]:

# Using pandas 'cut' to create 5 equal-width bins for the 'nature_index'
bins, bin_edges = pd.cut(final_ready_vis['nature_index'], bins=5, retbins=True, right=True)

# Create labels based on the bin edges
bin_labels = [f"{round(bin_edges[i], 2)} to {round(bin_edges[i+1], 2)}" for i in range(len(bin_edges)-1)]

# Assign these labels to the bins
final_ready_vis['nature_index_equal_bin'] = pd.cut(final_ready_vis['nature_index'], bins=5, labels=bin_labels)

# Using pandas 'cut' to create 5 equal-width bins for the 'nature_index'
bins, bin_edges = pd.cut(final_ready_vis['urban_index'], bins=5, retbins=True, right=True)

# Create labels based on the bin edges
bin_labels = [f"{round(bin_edges[i], 2)} to {round(bin_edges[i+1], 2)}" for i in range(len(bin_edges)-1)]

# Assign these labels to the bins
final_ready_vis['urban_index_equal_bin'] = pd.cut(final_ready_vis['urban_index'], bins=5, labels=bin_labels)



In [18]:
final_ready_vis.to_csv('my_dataframe.csv', index=False)  # export this csv so we can visualize everything in QGIS
