################################################################################################################################
#                  Mid Term: Author-Vaishali Lambe, NUID-001286444                            #
################################################################################################################################

# Question 2: 
**Analysis 1**

#### Which bestseller list has the book which has been on the list for the greatest length of time?
################################################################################################################################

In [1]:
# Import modules we need.
import json
import os


# Path to the data directory into which downloaded raw JSON is saved.
data_path = os.path.join("..", "data")
print(data_path)

if os.path.isdir(data_path):
    print(data_path + " is a directory")
else:
    print(data_path + " is NOT a directory - something is wrong :(")
    

# Path to the data directory into which intermediate results are saved.
intermediate_data_path = os.path.join(".", "ana_1")
print(intermediate_data_path)

if os.path.isdir(intermediate_data_path):
    print(intermediate_data_path + " is a directory")
else:
    print(intermediate_data_path + " is NOT a directory - something is wrong :(")

..\data
..\data is a directory
.\ana_1
.\ana_1 is a directory


In [2]:
# General function for getting JSON, either by downloading or from a cache file.
def get_cached_nyt_json(cache_file):
    if os.path.isfile(cache_file):
        # Cache file exists, so use that.
        result = {}
        with open(cache_file, 'rt') as f:
            try:
                result = json.load(f)
            except ValueError:
                result = {}
                
        print("get_cached_nyt_json(): returning value from cache file: " + cache_file)
        return result

    # It's not in the cache, so display a message and return an empty result.
    print("get_cached_nyt_json(): required data has not been downloaded from NYT API")
    print("get_cached_nyt_json(): ensure the data is available locally, or run the collection.ipynb notebook to download it")

    return {}


# Name of the cache file for calls to books/v3/lists/names.
def get_books_list_names_cache_file_path():
    return os.path.join(data_path, "books_v3_lists_names.json")


# Name of the cache file for calls to books/v3/lists.
def get_books_list_cache_file_path(list_name):
    filename = "books_v3_lists_{list_name}.json".format(list_name=list_name)
    print(filename)
    return os.path.join(data_path, filename)


# Convenience routine for getting the names of the bestseller lists.
def resolve_books_list_names():
    return get_cached_nyt_json(get_books_list_names_cache_file_path())


# Convenience routine for getting a bestseller list.
def resolve_books_list(list_name):
    return get_cached_nyt_json(get_books_list_cache_file_path(list_name))

In [3]:
list_names_json = resolve_books_list_names()
# print(list_names_json['results'])

# Only weekly lists have a 'weeks on list' value.
weekly_list_names = list([w['list_name_encoded'] for w in list_names_json['results'] if w['updated'] == 'WEEKLY'])
print("Weekly bestseller lists ({count}):".format(count=len(weekly_list_names)))
print(weekly_list_names)

monthly_list_names = list([w['list_name_encoded'] for w in list_names_json['results'] if w['updated'] == 'MONTHLY'])
print("Monthly bestseller lists ({count}):".format(count=len(monthly_list_names)))
print(monthly_list_names)

get_cached_nyt_json(): returning value from cache file: ..\data\books_v3_lists_names.json
Weekly bestseller lists (29):
['combined-print-and-e-book-fiction', 'combined-print-and-e-book-nonfiction', 'hardcover-fiction', 'hardcover-nonfiction', 'trade-fiction-paperback', 'mass-market-paperback', 'paperback-nonfiction', 'e-book-fiction', 'e-book-nonfiction', 'hardcover-advice', 'paperback-advice', 'advice-how-to-and-miscellaneous', 'chapter-books', 'childrens-middle-grade', 'childrens-middle-grade-e-book', 'childrens-middle-grade-hardcover', 'childrens-middle-grade-paperback', 'paperback-books', 'picture-books', 'series-books', 'young-adult', 'young-adult-e-book', 'young-adult-hardcover', 'young-adult-paperback', 'hardcover-graphic-books', 'paperback-graphic-books', 'manga', 'combined-print-fiction', 'combined-print-nonfiction']
Monthly bestseller lists (24):
['animals', 'business-books', 'celebrities', 'crime-and-punishment', 'culture', 'education', 'espionage', 'expeditions-disasters-an

In [4]:
# Get the data for each bestseller list.
longest_on_list = []
for weekly_list in weekly_list_names:
    print(weekly_list)
    response = resolve_books_list(weekly_list)
    books_list = response['results']
    for b in books_list[0:1]:
        if b['weeks_on_list'] != 0:
            print("{title} ({rank}) spent {n} weeks on the list".format(
                    title=b['book_details'][0]['title'],
                    rank=b['rank'],
                    n=b['weeks_on_list']))
        
    books_list.sort(key=lambda x : x['weeks_on_list'],reverse=True)
    for b in books_list[0:1]:
        if b['weeks_on_list'] != 0:
            title = b['book_details'][0]['title']
            print("{title} ({rank}) spent {n} weeks on the list".format(
                    title=title,
                    rank=b['rank'],
                    n=b['weeks_on_list']))
            longest_on_list.append({'list':weekly_list, 
                                    'title':title,
                                    'rank':b['rank'], 
                                    'weeks_on_list':b['weeks_on_list']})

longest_on_list.sort(key=lambda x : x['weeks_on_list'], reverse=True)
for item in longest_on_list:
    print("{title}, #{rank} from {list_name}, spent {weeks} weeks on the list".format(
            title=item['title'], 
            rank=item['rank'], 
            weeks=item['weeks_on_list'], 
            list_name=item['list']))
    
# Write to a csv file.
csv_file_path = os.path.join(intermediate_data_path, "longest-on-list.csv")
print("Saving to CSV: " + csv_file_path)
with open(csv_file_path, "wt") as f:
    # Header row.
    f.write("Weeks on list, Title, List, Rank on list\n")
    
    # Data rows.
    for item in longest_on_list:
        line = "{weeks}, {title}, {list_name}, {rank}\n".format(
                title=item['title'], 
                rank=item['rank'], 
                weeks=item['weeks_on_list'], 
                list_name=item['list'])
        f.write(line)

combined-print-and-e-book-fiction
books_v3_lists_combined-print-and-e-book-fiction.json
get_cached_nyt_json(): returning value from cache file: ..\data\books_v3_lists_combined-print-and-e-book-fiction.json
THE SHACK (1) spent 5 weeks on the list
A MAN CALLED OVE (4) spent 40 weeks on the list
combined-print-and-e-book-nonfiction
books_v3_lists_combined-print-and-e-book-nonfiction.json
get_cached_nyt_json(): returning value from cache file: ..\data\books_v3_lists_combined-print-and-e-book-nonfiction.json
HIDDEN FIGURES (1) spent 11 weeks on the list
ALEXANDER HAMILTON (15) spent 61 weeks on the list
hardcover-fiction
books_v3_lists_hardcover-fiction.json
get_cached_nyt_json(): returning value from cache file: ..\data\books_v3_lists_hardcover-fiction.json
LINCOLN IN THE BARDO (1) spent 2 weeks on the list
THE UNDERGROUND RAILROAD (9) spent 29 weeks on the list
hardcover-nonfiction
books_v3_lists_hardcover-nonfiction.json
get_cached_nyt_json(): returning value from cache file: ..\data\boo