# Python3 JSON 數據解析  
*****
01 編碼、解碼  
+ json.dumps()  
+ json.loads()  

02 讀寫Json檔案  
*****

### 01 編碼、解碼
*****
json.dumps(): 對數據進行解碼  
json.loads(): 對數據進行編碼  

In [1]:
import json

data = {
    'no' : 1,
    'name' : 'Runoob',
    'url' : 'http://www.runoob.com'
}

# Python 字典類型轉換為JSON對象
json_str = json.dumps(data)
print ("Python 原始数据：", repr(data))
print ("JSON 对象：", json_str)

# 將JSON對象轉換為Python字典
data2 = json.loads(json_str)
print ("data2['name']: ", data2['name'])
print ("data2['url']: ", data2['url'])

### 02 讀寫Json檔案 
*****
r - 讀取(檔案需存在)  
w - 新建檔案寫入(檔案可不存在，若存在則清空)  
a - 資料附加到舊檔案後面(游標指在EOF)  
r+ - 讀取舊資料並寫入(檔案需存在且游標指在開頭)  
w+ - 清空檔案內容，新寫入的東西可在讀出(檔案可不存在，會自行新增)  
a+ - 資料附加到舊檔案後面(游標指在EOF)，可讀取資料  
b - 二進位模式  

In [2]:
# 寫入JSON數據
with open('data.json', 'w') as f:
    json.dump(data, f)

# 讀取數據
with open('data.json', 'r') as f:
    data = json.load(f)
    print ("data['no']: ", data['no'])


data['no']:  1


### 03 複雜的操作 
*****

In [3]:
import json

data = {
    'no' : {
        "Birthday" : 123,
        "Year" : 456,
        "Record" : {
            "1": {
                "Date": 20020808, 
                "ICD": [
                    785
                ], 
                "Op": 1
            }
        }
    },
    'name' : 'Runoob',
    'url' : 'http://www.runoob.com'
}



# Python 字典類型轉換為JSON對象
json_str = json.dumps(data)

# 將JSON對象轉換為Python字典
data2 = json.loads(json_str)
print ("data2['no'] : ", data2['no'])
print ("data2['no']['Record'] : ", data2['no']['Record'])
print ("data2['no']['Record']['1'] : ", data2['no']['Record']['1'])
print ("data2['no']['Record']['1']['ICD'] : ", data2['no']['Record']['1']['ICD'])


data2['no'] :  {'Birthday': 123, 'Year': 456, 'Record': {'1': {'Date': 20020808, 'ICD': [785], 'Op': 1}}}
data2['no']['Record'] :  {'1': {'Date': 20020808, 'ICD': [785], 'Op': 1}}
data2['no']['Record']['1'] :  {'Date': 20020808, 'ICD': [785], 'Op': 1}
data2['no']['Record']['1']['ICD'] :  [785]


In [4]:
import pandas as pd
import codecs
import json

path = 'patient-date335.json'
with codecs.open(path, 'r', 'utf-8') as json_data:
    json_dicts = json.load(json_data) # 读取json数据为list[dict]结构
    df = pd.DataFrame(json_dicts)
    #print ("json_dicts['0008dd29c86ee88a817cea190dad0346']['Record']['1']['ICD'] : ", json_dicts['0008dd29c86ee88a817cea190dad0346']['Record']['1']['ICD'])
    #print("df2 : ", df2)
    #print("df2.index : ", df2.index)
df

Unnamed: 0,0008dd29c86ee88a817cea190dad0346,000cfd5074b45bf168fb182a4eea7167,00224d1715680e16dfd62703f24dd8c3
BirthDay,193902,196602,195804
BirthYear,1939,1966,1958
DeathDay,,,2024
DeathYear,,,
Record,"{'1': {'Date': 20020808, 'ICD': [785], 'Op': 1...","{'1': {'Date': 19990519, 'ICD': [463, 276], 'O...","{'1': {'Date': 20110302, 'ICD': [727], 'Op': 1..."


In [15]:
import pandas as pd
import numpy as np

# 讀取數據
with open('patient-date335.json', 'r') as f:
    data = json.load(f)
    df = pd.DataFrame(data)
df

Unnamed: 0,0008dd29c86ee88a817cea190dad0346,000cfd5074b45bf168fb182a4eea7167,00224d1715680e16dfd62703f24dd8c3
BirthDay,193902,196602,195804
BirthYear,1939,1966,1958
DeathDay,,,2024
DeathYear,,,
Record,"{'1': {'Date': 20020808, 'ICD': [785], 'Op': 1...","{'1': {'Date': 19990519, 'ICD': [463, 276], 'O...","{'1': {'Date': 20110302, 'ICD': [727], 'Op': 1..."


In [16]:
# 行列互換
df = df.T # 矩阵的转置
df

Unnamed: 0,BirthDay,BirthYear,DeathDay,DeathYear,Record
0008dd29c86ee88a817cea190dad0346,193902,1939,,,"{'1': {'Date': 20020808, 'ICD': [785], 'Op': 1..."
000cfd5074b45bf168fb182a4eea7167,196602,1966,,,"{'1': {'Date': 19990519, 'ICD': [463, 276], 'O..."
00224d1715680e16dfd62703f24dd8c3,195804,1958,2024.0,,"{'1': {'Date': 20110302, 'ICD': [727], 'Op': 1..."


In [17]:
print("df.dtypes-----------\n", df.dtypes, "\n")
print("df.index-----------\n", df.index, "\n")
print("df.info()-----------", )
df.info()

df.dtypes-----------
 BirthDay     object
BirthYear    object
DeathDay     object
DeathYear    object
Record       object
dtype: object 

df.index-----------
 Index(['0008dd29c86ee88a817cea190dad0346', '000cfd5074b45bf168fb182a4eea7167',
       '00224d1715680e16dfd62703f24dd8c3'],
      dtype='object') 

df.info()-----------
<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 0008dd29c86ee88a817cea190dad0346 to 00224d1715680e16dfd62703f24dd8c3
Data columns (total 5 columns):
BirthDay     3 non-null object
BirthYear    3 non-null object
DeathDay     1 non-null object
DeathYear    0 non-null object
Record       3 non-null object
dtypes: object(5)
memory usage: 224.0+ bytes


In [25]:
fliter = (df["DeathDay"].isnan())
df[fliter]

Unnamed: 0,BirthDay,BirthYear,DeathDay,DeathYear,Record
00224d1715680e16dfd62703f24dd8c3,195804,1958,2024,,"{'1': {'Date': 20110302, 'ICD': [727], 'Op': 1..."


In [None]:
df2 = df.loc[['BirthDay', 'BirthYear'],:]
print(df2)

len(df2)

# 參考連結

*****
[Python3 JSON 数据解析](http://www.runoob.com/python3/python3-json.html)  
[Python 讀寫檔案](https://github.com/wtfaha/StudyNote/tree/master/Python)  