# Read Json Files
## This notebook handles the process of reading through json data and storing it into a pandas dataframe to convert into a csv file.


### Import the required libraries.

In [None]:
import json
import pandas as pd
import csv
from bs4 import BeautifulSoup

### (OPTIONAL): If using google colabs, mount your drive so you can reference a file system to reading in and storing datasets.

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

### Indicate the json file that you want to read in:

In [None]:
file_in = "/content/gdrive/My Drive/Colab Notebooks/various_json/localize/new_localize_data_original.json"

### The code below parses the json file and stores data from each attribute into a list.
#### Note: Depending on how the json data is formatted, the number of item types read in and their names might need to be changed. Be sure to update the number of lists as well as ensure each json attribute of interest is being appended to a list.

In [None]:
ans_ids = []
tags = []
reviews = []
tag_descs = []
taggers = []
crt_times = []
updt_times = []
with open(file_in) as json_file:
    data = json.load(json_file)
    inc = 0
      for p in data:
        ans_ids.append(p['ANS_ID'])
        tags.append(p['TAG'])
        soup = BeautifulSoup(p['REVIEW'])
        text = soup.get_text()
        reviews.append(text)
        tag_descs.append(p['TAG_DESC'])
        taggers.append(p['TAGGER'])
        crt_times.append(p['CRT_TIME'])
        updt_times.append(p['UPDT_TIME'])

### This code creates a pandas dataframe out of the parsed json data.
#### Note: As with the json parsing accomplished previously, be sure that all the json data is being included and that the names match as appropriate.

In [None]:
new_data = pd.DataFrame({
                         "ANS_ID": ans_ids,
                         "TAG": tags,
                         "REVIEW": reviews,
                         "TAG_DESC": tag_descs,
                         "TAGGER": taggers,
                         "CRT_TIME": crt_times,
                         "UPDT_TIME": updt_times,
                         })
new_data.head(5) # Shows sample of the new dataset

### Indicate the name of the output csv file for this pandas dataframe:

In [None]:
file_out = "/content/gdrive/My Drive/Colab Notebooks/various_json/localize/new_localize_data_parsed.csv"

### Create the csv file:

In [None]:
new_data.to_csv(file_out, index=False)