In [1]:
import json
import requests
import pandas as pd
from datetime import datetime

# Commands in EDFS

### mkdir: create a directory in file system

In [442]:
base = "https://imdb-movies-d490a-default-rtdb.firebaseio.com/"

def mkdir(url,path):
    now = datetime.now()
    date = now.strftime("%Y-%m-%d")
    curr = now.strftime("%H:%M:%S")
    dic = {date:curr}
    data = json.dumps(dic,indent = 4)
    link = url + path
    r = requests.put(link,data)

In [443]:
new_dir = input("Enter specific directory: ")
mkdir(base,new_dir + ".json")

Enter specific directory: test


### ls: list content of a given directory

In [444]:
def ls(name):
    base = "https://imdb-movies-d490a-default-rtdb.firebaseio.com/"
    directory = base + name
    r = requests.get(directory)
    dic = r.json()
    keys = []
    #dir_path = []
    for i in dic:
        keys.append(i)
    return keys

In [445]:
n = input("Enter specific directory: ")
new_path = ls(n + "/.json")
#print(new_path)

def valid_path():
    if len(n) == 0:
        base_path = "data/"
    else:
        base_path = "data/" + n + "/"
    return base_path

p = valid_path()
for i in new_path:
    print(p+i)

Enter specific directory: 
data/genres
data/information
data/language
data/popularity
data/scores
data/test
data/year


### cat: display content of a file

In [436]:
file = input("Enter specific file: ")
def cat(file):
    base = "https://imdb-movies-d490a-default-rtdb.firebaseio.com/"
    link = base + file
    content = requests.get(link)
    txt = content.text
    d = content.json()
    return d

Enter specific file: genres.json


In [437]:
output = cat(file)
for i in output:
    if type(output[i]) == str and len(output[i]) == 0:
        continue
    else:
        print(i, ":", output[i])

Action : ['300', '13 Assassins', '13 Hours: The Secret Soldiers of Benghazi', '2 Fast 2 Furious', 'A Night to Remember', 'Abraham Lincoln: Vampire Hunter', 'Altitude', "Assassin's Creed: Lineage", 'Avenging Angelo', 'Babylon 5: In the Beginning', 'Badges of Fury', 'Bangkok Dangerous', 'Beverly Hills Cop II', 'Beverly Hills Ninja', 'Bhaag Milkha Bhaag', 'Big Game', 'Black Mask', 'Bloody Sunday', 'Braveheart', 'Breakdown', 'Bronco Billy', 'Cat Run', 'Changing Lanes', 'Conan the Destroyer', 'Conspiracy Theory', 'Cowboy Bebop: The Movie', 'Die Another Day', 'Dragonball Evolution', 'Fighting', 'First Blood', 'Free State of Jones', 'Get the Gringo', 'Ghost in the Shell: Stand Alone Complex - Solid State Society', 'Gone in Sixty Seconds', 'Hackers', 'Hancock', 'Highlander V: The Source', 'Hitman: Agent 47', 'How I Live Now', 'I Am a Hero', 'Inglourious Basterds', 'Iron Monkey', 'Johnny Dangerously', 'Kingsglaive: Final Fantasy XV', 'Knock Off', 'Lone Survivor', 'Mad City', 'Mad Detective', 'M

### rm: remove a file from the file system
#### note: suggest enter "test" to delete test table, not affecting other tables

In [446]:
base = "https://imdb-movies-d490a-default-rtdb.firebaseio.com/"
rm_dic = {"information":"information.json","scores":"scores.json", "test":"test.json"}

def valid_rm(path):
    if path in rm_dic:
        new_path = rm_dic[path]
    else:
        new_path = path + ".json"
    return new_path

In [447]:
path = input("Enter specific table: ")
def rm(url):
    rm_link = url + valid_rm(path)
    remove = requests.delete(rm_link)
rm(base)

Enter specific table: test


### put(file, path): uploading a file to file system
#### Idea: in the below put() function, we are supposed to add data to the NameNode (information table) in firebase. Because there are several DataNodes, when the new data is uploaded, the DataNodes should simultaneously upload the corresponding partition of the new data. 

##### file: {"movie_title":{"new_genres":"g1, g2", "new_language": "l1, l2", "popularity": value, "release_date": value,"title":"movie_movie"}}
##### path: subset of a list ["year", "genres","popularity", "language"]

In [448]:
check = {"year":"release_date", "genres": "new_genres", "popularity":"popularity", "language": "new_language"}
def put(movie_dic, path):
    for i in movie_dic:
        base = "https://imdb-movies-d490a-default-rtdb.firebaseio.com/"
        link = base + "information/" + i + ".json"
        movie_content = movie_dic[i] # type(movie_content): dictionary
        movie_json = json.dumps(movie_content)
        r = requests.put(link, movie_json)
        for j in path:
            part = check[j]
            upload_part = movie_content[part]
            if type(upload_part) == int or type(upload_part) == float:
                upload_part = round(upload_part)
                upload_link = base + j + "/" + str(int(upload_part)) + ".json"
                exist = requests.get(upload_link)
                if exist.json() == None:
                    upload_lst = [i]
                else:
                    upload_lst = exist.json()
                    upload_lst.append(i)
                upload_json = json.dumps(upload_lst, indent = 4)
                db = requests.put(upload_link, data = upload_json)
            else:
                tmp = upload_part.split(", ")
                for k in tmp:
                    upload_link = base + j + "/" + k +".json"
                    exist = requests.get(upload_link)
                    if exist.json() == None:
                        upload_lst = [i]
                    else:
                        upload_lst = exist.json()
                        upload_lst.append(i)
                    upload_json = json.dumps(upload_lst, indent = 4)
                    db = requests.put(upload_link, data = upload_json)

In [300]:
example = {"test_movie":{"new_genres":"Action, History", "new_language": "Danish, Hindi", "popularity": 90,"release_date":"2022","title":"test_movie"}}
path_lst = ["year", "genres","popularity", "language"]
put(example,path_lst)

note: if we add a new attribute in a table, remember to change the rule on firebase

### getPartitionLocations(file): return the locations of partitions of the file

In [510]:
base = "https://imdb-movies-d490a-default-rtdb.firebaseio.com/"
base_paths = ["genres","year","popularity","language"]
overall_loc = {}
for i in base_paths:
    link = base + i + ".json"
    content = requests.get(link)
    dic = content.json()
    keys = [j for j in dic.keys()]
    loc_dic = {}
    for k in keys:
        loc_link = base + i + "/" + k
        loc_dic[loc_link] = "data/"+ i + "/" + k
    overall_loc[i] = loc_dic

In [511]:
base = "https://imdb-movies-d490a-default-rtdb.firebaseio.com/"
def loc(file):
    arr = []
    p = ["genres","year","popularity","language"]
    arr_dic = {i:[] for i in p}
    for path in p:
        table_content = requests.get(base + path + ".json")
        table_dic = table_content.json()
        keys = [i for i in table_dic]
        for key in keys:
            url = base + path + "/" + key + '.json?orderBy="$value"&equalTo="'+ file +'"'
            r = requests.get(url)
            s = r.json()
            if len(s) != 0:
                arr_dic[path].append(key)
    return arr_dic


In [512]:
def getpl():
    loc_path = []
    new_dic = loc(file)
    for dic in new_dic:
        tmp_lst = new_dic[dic]
        for i in tmp_lst:
            loc_link = base + dic + "/" + i
            if loc_link in overall_loc[dic]:
                loc_path.append(overall_loc[dic][loc_link])
    return loc_path

In [513]:
file = input("Enter the partition file name: ") 
print("The locations of partitions of the file are: ", getpl())

Enter the partition file name: Curve
The locations of partitions of the file are:  ['data/genres/Horror', 'data/genres/Thriller', 'data/year/2015', 'data/popularity/5', 'data/language/English', 'data/language/German']


### readPartition(file, partition#): return the content of partition # of the specified file

##### file: specific movie name
##### partition #: one of the set ("year", "genres","popularity", "language")  

In [528]:
base = "https://imdb-movies-d490a-default-rtdb.firebaseio.com/"
read = []
def readp(file,part):
    part_dic = loc(file)
    part_lst = part_dic[part]
    for i in part_lst:
        url = base + part + "/" + i + ".json"
        r = requests.get(url)
        r_content = r.json()
        r_dic = {i:r_content}
        read.append(r_dic)
    return read 

In [529]:
file = input("Enter the file name: ") 
part = input("Enter the partition table: ")

Enter the file name: Big Game
Enter the partition table: genres


In [531]:
#print("The content of partition of the file: ", readp(file,part))

# Partition-based MapReduce( )


### Example 1
##### pmr(partition): input should be one of the set ("genres", "languages")
##### pmr() returns the attibutes of a specific file of the given partition(table) 
##### SQL query: select * from partition_table where attrs = file;

In [514]:
file = input("Enter a file name: ")
def pmr(partition):
    lst = []
    base = "https://imdb-movies-d490a-default-rtdb.firebaseio.com/"
    url = base + partition + "/" + ".json"
    r = requests.get(url)
    r_content = r.json()
    for i in r_content:
        if file in r_content[i]:
            lst.append(i)
    return lst

Enter a file name: Altitude


In [516]:
print("The genres list: ",pmr("genres"))
print("The language list: ",pmr("language"))

The genres list:  ['Action', 'Horror', 'Science Fiction', 'Thriller']
The language list:  ['English', 'French']


### Example 2
##### pmr_range(low,high,partition): low is the low_limit, high is the high limit, partition is one of set ("year", "popularity")
##### pmr_range() returns the movies in a given range of a specific partition(table) 
##### SQL query: select * from partition_table where low < value < high; 

In [517]:
def pmr_range(low,high,partition):
    base = "https://imdb-movies-d490a-default-rtdb.firebaseio.com/"
    url = base + partition + '.json?orderBy="$key"&startAt="'+ str(low) +'"&endAt='+ '"' + str(high) + '"'
    r = requests.get(url)
    r_dic = r.json()
    return r_dic

In [527]:
print(pmr_range(19,24,"popularity"))
print(pmr_range(1992,1995,"year"))

{'19': ['Undisputed II: Last Man Standing'], '20': ['Night at the Museum: Secret of the Tomb', 'Sicario', 'Top Gun'], '21': ['Braveheart']}
{'1992': ['A River Runs Through It', 'All Ladies Do It', 'Encino Man', 'Hoffa', 'Housesitter', 'Like Water for Chocolate', 'Man Bites Dog', 'The Mighty Ducks'], '1993': ['A Bronx Tale', 'Falling Down', 'Fire in the Sky', 'Iron Monkey', 'Nowhere to Run', 'Return of the Living Dead 3', 'The Fugitive', 'The Man without a Face', 'The Wrong Trousers', "What's Love Got to Do with It"], '1994': ['Four Weddings and a Funeral', 'Iron Will', 'Pulp Fiction', 'Queen Margot', 'Satantango', 'Street Fighter', 'The Client', 'The Madness of King George', 'Thumbelina'], '1995': ['Braveheart', 'Casino', 'Hackers', 'Nixon', 'Something to Talk About', 'Tales from the Crypt: Demon Knight', 'The Crossing Guard', 'Under Siege 2: Dark Territory']}


### Example 3
##### pmr_find(partition): partition is one of set ("genres","language")
##### pmr_find() returns the movies which have the same partition as that of a given movie 
##### SQL query: select * from partition_table where value = file.partition; 

In [561]:
from collections import Counter

In [574]:
file = input("Enter a file name: ")
def pmr_find(partition):
    base = "https://imdb-movies-d490a-default-rtdb.firebaseio.com/"
    url = base + partition + ".json"
    tmp_dic = requests.get(url).json()
    tmp_keys = [i for i in tmp_dic]
    cnt = []
    num = 0
    for j in tmp_keys:
        new_url = base + partition + "/" + j + ".json?" + 'orderBy="$value"&equalTo="' + file + '"'
        r = requests.get(new_url)
        r_dic = r.json()
        if len(r_dic) != 0:
            link = base + partition + "/" + j + ".json"
            content = requests.get(link).json()
            #print(content)
            cnt += content
            num += 1
    cnt_dic = Counter(cnt)
    output = []
    for i in cnt_dic:
        if cnt_dic[i] == num:
            output.append(i)
    return output
            

Enter a file name: Catacombs


In [578]:
#print("The movies with the same langaguage: ", pmr_find("language"))

In [579]:
#print("The movies with the same genres: ", pmr_find("genres"))

Note: The output value of pmr_find(partition) is dependent on whether "genres" or "language" is set as input. We can further find out which movie appears in both genres_list and language_list, which means the movie share the common language types and genre types as the input file. 

In [577]:
lang_lst = pmr_find("language")
genres_lst = pmr_find("genres")
common = []
for i in lang_lst:
    if i in genres_lst:
        common.append(i)
print("Movies with the same genres and language", common)

Movies with the same genres and language ['Altitude', 'Catacombs', 'Twilight Zone: The Movie']
