When pandas calculates certain statistics, the output numbers may be numpy.int64 or numpy.float... or not

```
count  <class 'numpy.int64'>
mean  <class 'float'>
median  <class 'float'>
mode  <class 'numpy.float64'>
std  <class 'float'>
```

Note hat all numbers sent to these methods were from pandas series  where the series type is float64

In [1]:
import pandas as pd
import json


In [16]:
df = pd.read_csv('Data/SFBayWaterQualityNutrientData.csv', 
                    header=0, 
                    parse_dates=['DateTime'],
                    dtype={'Station Number' : str}
                    )

In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9678 entries, 0 to 9677
Data columns (total 12 columns):
DateTime             9678 non-null datetime64[ns]
Station Number       9678 non-null object
Distance from 36     9428 non-null float64
Depth                9678 non-null float64
Salinity             9643 non-null float64
Temperature          9589 non-null float64
Sigma-t              5018 non-null float64
Nitrite              6813 non-null float64
Nitrate + Nitrite    9217 non-null float64
Ammonium             8722 non-null float64
Phosphate            9355 non-null float64
Silicate             7018 non-null float64
dtypes: datetime64[ns](1), float64(10), object(1)
memory usage: 907.4+ KB


In [18]:
df['Salinity']

0        0.30
1        0.20
2        0.10
3        0.10
4        0.10
        ...  
9673    30.07
9674    30.73
9675    30.44
9676    29.98
9677    30.17
Name: Salinity, Length: 9678, dtype: float64

In [4]:
def summary_stats(s):
    """ 
    Calculate summary statistics for a series or list, s 
    returns a dictionary
    """
    
    stats = {
      'count': 0,
      'max': 0,
      'min': 0,
      'mean': 0,
      'mean_r': 0,
      'median': 0,
      'mode': 0,
      'std': 0,
      'std_r': 0,
      'z': (0,0)
    }
    
    stats['count'] = s.count()
    stats['max'] = s.max()
    stats['min'] = s.min()
    stats['mean'] = s.mean()
    stats['mean_r'] = round(s.mean(),3)
    stats['median'] = s.median()
    stats['mode'] = s.mode()[0]
    stats['std'] = s.std()
    stats['std_r'] = round(s.std(),3)

    
    std3 = 3* stats['std']
    low_z = round(stats['mean'] - (std3),3)
    high_z = round(stats['mean'] + (std3),3)
    stats['z'] = (low_z, high_z)
        
    return(stats)
    
#end_def summary_stats

In [5]:
station = '32'
col='Salinity'
st_df = df[df['Station Number'] == station]
d = st_df.loc[st_df[col].notnull()]
s=d[col] # The column we want to look at 

stats = summary_stats(s)

In [6]:
stats

{'count': 564,
 'max': 33.46,
 'min': 7.36,
 'mean': 24.727588652482286,
 'mean_r': 24.728,
 'median': 26.28,
 'mode': 25.6,
 'std': 5.857237135129123,
 'std_r': 5.857,
 'z': (7.156, 42.299)}

In [7]:
for key, value in stats.items():
    print(key, value, type(value))

count 564 <class 'numpy.int64'>
max 33.46 <class 'float'>
min 7.36 <class 'float'>
mean 24.727588652482286 <class 'float'>
mean_r 24.728 <class 'float'>
median 26.28 <class 'float'>
mode 25.6 <class 'numpy.float64'>
std 5.857237135129123 <class 'float'>
std_r 5.857 <class 'float'>
z (7.156, 42.299) <class 'tuple'>


In [8]:
with open('Data/stats_test.json', 'w') as fp:
    json.dump(stats, fp)


TypeError: Object of type int64 is not JSON serializable

In [9]:
df = pd.DataFrame(stats)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 10 columns):
count     2 non-null int64
max       2 non-null float64
min       2 non-null float64
mean      2 non-null float64
mean_r    2 non-null float64
median    2 non-null float64
mode      2 non-null float64
std       2 non-null float64
std_r     2 non-null float64
z         2 non-null float64
dtypes: float64(9), int64(1)
memory usage: 288.0 bytes


In [10]:
df.to_json('Data/tmp_stats.json')