In [8]:
import os
import numpy as np
import xarray as xr
import pandas as pd
import netCDF4 as nc
import geopandas as gpd
import rioxarray as rxr
from myfunc import timer
from myfunc import DirMan
from myfunc import load_and_flatten_data
import config

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

resolution     = config.resolution
region         = config.region
data_path      = config.data_path
post_data_path = config.post_data_path
shp_path       = config.shp_path
fig_path       = config.fig_path

dir_man = DirMan(data_path)
dir_man.enter()

os.makedirs(f'{data_path}/csv', exist_ok=True)

Entered /tera11/zhwei/students/Xionghui/data/run/500/


In [None]:
df = pd.DataFrame()

with nc.Dataset('Sbedrock.nc4') as dataset:
    lat = dataset['lat'][:].flatten()
    lon = dataset['lon'][:].flatten()
    df['lat'] = np.repeat(lat, len(lon))
    df['lon'] = np.tile(lon, len(lat))

file_variable_list = [
    ('Sbedrock', 'Sbedrock'),
    ('Sr', 'Sr'),
    ('Ssoil', 'Band1'),
    ('mask123', 'Band1'),
    ('Area', 'area'),
    ('Koppen', 'Band1'),
    ('IGBP', 'LC', 0),
]

for entry in file_variable_list:
    file = entry[0]
    variable_name = entry[1]  
    index = entry[2:] if len(entry) > 2 else None  
    if index:
        df[file] = load_and_flatten_data(file, variable_name, index[0])
    else:
        df[file] = load_and_flatten_data(file, variable_name)

df = df.dropna()
df = df[df['Sbedrock'] > 0]
df = df[df['mask123'] == 1]

df.drop(labels='mask123',axis=1,inplace=True)

# df['Area'] = df['Area'].sum()/(1e12)

shp1 = gpd.read_file(shp_path+'continent/continent.shp')
shp2 = gpd.read_file(shp_path+'World_CN/ne_10m_admin_0_countries_chn.shp')

df = df.reset_index(drop=True)
gdf_points = gpd.GeoDataFrame(geometry=gpd.points_from_xy(df['lon'], df['lat']), crs='EPSG:4326')
result1 = gpd.sjoin(gdf_points, shp1, how='left', predicate='within')
result2 = gpd.sjoin(gdf_points, shp2, how='left', predicate='within')

df['Continent'] = result1['CONTINENT']
df['Subregion'] = result2['SUBREGION']
df['Sovereignt'] = result2['SOVEREIGNT']

df['Continent'] = df['Continent'].replace('Australia', 'Oceania')

list1 = ['Asia','South America','Africa','Europe','North America','Oceania','Antarctica','Seven seas (open ocean)']
list2 = ['AS','SA','AF','EU','NA','OC','AN','Seven seas (open ocean)']
mapping = dict(zip(list1, list2))

df['Continent_short'] = df['Continent'].map(mapping)

list1 = ['South America','Australia and New Zealand','Southern Africa','Eastern Africa','Melanesia',
            'Western Europe','Polynesia','Middle Africa','South-Eastern Asia','Western Africa','Southern Asia',
            'Central America','Northern Africa', 'Caribbean', 'Western Asia', 'Eastern Asia','Northern America',
            'Southern Europe', 'Central Asia', 'Eastern Europe','Northern Europe']

list2 = ['SA','ANZ','SAF','EAF','MEL',
            'WEU','Polynesias','MAF','SEA','WAF','SAS',
            'CAM','NAF','CAR','WAS','EAS','NA',
            'SEU','CAS','EEU','NEU']
mapping = dict(zip(list1, list2))

df['Subregion_short'] = df['Subregion'].map(mapping)
df['Sovereignt_short'] = result2['ISO_A3']

print(df['Area'].sum()/(1e12))

with open('csv/Global.csv','w') as f:
    df.to_csv(f)

df1 = df[df['Sovereignt_short'] == 'USA']
# print(df1)

with open('csv/US.csv','w') as f:
    df1.to_csv(f)

# df1
df

In [12]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

# df = pd.read_csv(f'{data_path}/csv/Global.csv')

# df['Continent'] = df['Continent'].replace('Australia', 'Oceania')
# df.to_csv(f'{data_path}/csv/Global.csv', index=False)

df_area = df.copy()
df1 = pd.DataFrame()

df1['Sr'] = df_area.groupby('Continent')['Sr'].mean()
df1['Sbedrock'] = df_area.groupby('Continent')['Sbedrock'].mean()
df1['Ssoil'] = df_area.groupby('Continent')['Ssoil'].mean()
df1['Area'] = df_area.groupby('Continent')['Area'].sum()
df1['Continent'] = df1.index
df1 = df1.sort_values(by='Area', ascending=False)
print(df1)
df2 = df1.set_index('Continent').transpose()
print(df2)
df2['name'] = df2.index

                       Sr    Sbedrock       Ssoil          Area      Continent
Continent                                                                     
Asia           193.790012   90.453844   97.887132  6.118740e+12           Asia
Africa         298.666314  190.386979   87.921690  4.540072e+12         Africa
South America  320.085247  208.718369   91.248118  3.675738e+12  South America
North America  210.625560  119.233203   88.499439  2.786138e+12  North America
Europe         185.469256   74.560369  101.142590  1.582207e+12         Europe
Oceania        197.693148  102.052837   81.144635  1.208023e+12        Oceania
Antarctica      21.499716   11.866321  108.028324  2.973036e+07     Antarctica
Continent          Asia        Africa  South America  North America  \
Sr         1.937900e+02  2.986663e+02   3.200852e+02   2.106256e+02   
Sbedrock   9.045384e+01  1.903870e+02   2.087184e+02   1.192332e+02   
Ssoil      9.788713e+01  8.792169e+01   9.124812e+01   8.849944e+01   
Area 

In [21]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

df1 = pd.DataFrame()

df1['Sr'] = df_area.groupby('Subregion')['Sr'].mean()
df1['Sbedrock'] = df_area.groupby('Subregion')['Sbedrock'].mean()
df1['Ssoil'] = df_area.groupby('Subregion')['Ssoil'].mean()
df1['Area'] = df_area.groupby('Subregion')['Area'].sum()
df1['Subregion'] = df1.index
df1 = df1.sort_values(by='Area', ascending=False)
print(df1.to_string())
df2 = df1.set_index('Subregion').transpose()
df2['name'] = df2.index
# print(df2.to_string())

                                   Sr    Sbedrock       Ssoil          Area                  Subregion
Subregion                                                                                             
South America              319.390595  208.170724   91.198992  3.617472e+12              South America
Eastern Europe             147.431458   40.035177  103.695885  3.109309e+12             Eastern Europe
Middle Africa              281.110265  168.326445   88.776367  2.265934e+12              Middle Africa
Northern America           178.964416   90.491238   88.338736  1.934032e+12           Northern America
Eastern Asia               168.373503   70.068709   93.327269  1.456304e+12               Eastern Asia
South-Eastern Asia         354.503090  239.420099   99.086000  1.434825e+12         South-Eastern Asia
Eastern Africa             342.024487  235.427083   88.485655  1.401657e+12             Eastern Africa
Australia and New Zealand  196.003150  100.445668   80.525601  1.174315e+

In [27]:
df1 = pd.DataFrame()

df1['Sr'] = df_area.groupby('Sovereignt')['Sr'].mean()
df1['Sbedrock'] = df_area.groupby('Sovereignt')['Sbedrock'].mean()
df1['Ssoil'] = df_area.groupby('Sovereignt')['Ssoil'].mean()
df1['Area'] = df_area.groupby('Sovereignt')['Area'].sum()
df1['Sovereignt'] = df1.index
df1 = df1.sort_values(by='Area', ascending=False)
print(df1.to_string())
df2 = df1.set_index('Sovereignt').transpose()
# print(df2.to_string())
df2['name'] = df2.index

                                          Sr    Sbedrock       Ssoil          Area                        Sovereignt
Sovereignt                                                                                                          
Russia                            145.715879   38.166200  104.527867  2.902502e+12                            Russia
Brazil                            364.553967  250.926477   91.022661  2.350912e+12                            Brazil
United States of America          206.116536  110.101204   88.287130  1.448327e+12          United States of America
China                             173.072221   73.799030   93.314951  1.326109e+12                             China
Australia                         196.970527  102.004589   79.067417  1.130610e+12                         Australia
Democratic Republic of the Congo  284.684570  173.131768   87.024988  1.036446e+12  Democratic Republic of the Congo
Mexico                            319.575225  227.352911   83.29

In [35]:
# print(df1['Area'])
print(df1['Ssoil'].round(0).astype(int).values)


[105  91  88  93  79  87  83  89  89  93 105  97  84  80  97  94  94  79
  82 100  84  92  94  86  96  98  94  98  81  89  87  95 100  82 100  90
  88  74  96 103  64  93  95 108  87  83  98  89 104 101  86 103  93 103
  89  88  83 112  80  89  96 105  97  63  89 109  94  60  92  88 104  92
  70  98  98 109  59  95  85  90  86  85 107  80  83  83  83  96  90  94
  98  93  77  92 101  79  89  51  76 100  92  97  99 103  93  96  76  99
 113 111  84  90  86 101  99  89 103  89  87  91  43  80  74 109  81 108
  53  88  74  71  83  84  67 107  93 103 105  73 113  69  69 103   5 108
  74 108  88 100  85  89 102 110  72  73  64 113 103  91 116 105  96 111
 105  96  56  97  72  13 115  92  76   6 120  62 107  94 172]


In [None]:
df = pd.read_csv('csv/US.csv')
print(df)

shp = gpd.read_file(shp_path+'US/USA_adm1.shp')

print(shp)

gdf_points = gpd.GeoDataFrame(geometry=gpd.points_from_xy(df['lon'], df['lat']), crs='EPSG:4326')
result1 = gpd.sjoin(gdf_points, shp, how='left', predicate='within')

df['State'] = result1['NAME_1']

print(df)

with open('US.csv','w') as f:
    df.to_csv(f)

df