In [None]:
# MetaData

# Metadata for the dataset

# - Country: Name of the country.
# - Total Library Size: The total number of titles available in the country's Netflix library.
# - No. of TV Shows: The number of TV shows available in the country's Netflix library.
# - No. of Movies: The number of movies available in the country's Netflix library.
# - Cost Per Month - Basic ($): The cost of the Basic Netflix subscription plan in USD.
# - Cost Per Month - Standard ($): The cost of the Standard Netflix subscription plan in USD.
# - Cost Per Month - Premium ($): The cost of the Premium Netflix subscription plan in USD.

# Example data:
# Country      | Total Library Size | No. of TV Shows | No. of Movies | Cost Per Month - Basic ($) | Cost Per Month - Standard ($) | Cost Per Month - Premium ($)
# Argentina    | 4760               | 3154            | 1606          | 3.74                      | 6.3                           | 9.26
# Austria      | 5640               | 3779            | 1861          | 9.03                      | 14.67                         | 20.32
# Bolivia      | 4991               | 3155            | 1836          | 7.99                      | 10.99                         | 13.99
# Bulgaria     | 6797               | 4819            | 1978          | 9.03                      | 11.29                         | 13.54
# Chile        | 4994               | 3156            | 1838          | 7.07                      | 9.91                          | 12.74
# Colombia     | 4991               | 3156            | 1835          | 4.31                      | 6.86                          | 9.93
# Costa Rica   | 4988               | 3152            | 1836          | 8.99                      | 12.99                         | 15.99
# Croatia      | 2274               | 1675            | 599           | 9.03                      | 11.29                         | 13.54
# Czechia      | 7325               | 5234            | 2091          | 8.83                      | 11.49                         | 14.15


In [None]:
pip install cassandra-driver

Collecting cassandra-driver
  Downloading cassandra_driver-3.29.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.8/18.8 MB[0m [31m63.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting geomet<0.3,>=0.1 (from cassandra-driver)
  Downloading geomet-0.2.1.post1-py3-none-any.whl (18 kB)
Installing collected packages: geomet, cassandra-driver
Successfully installed cassandra-driver-3.29.0 geomet-0.2.1.post1


In [None]:
!python -c "import cassandra; print(cassandra.__version__)"

3.29.0


In [None]:

from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider

cloud_config= {
        'secure_connect_bundle': '/secure-connect-mydatabase.zip'
        # replace <</PATH/TO/>> with the path where your downloaded bundle was downloaded (make sure to place this python file in the same place as well)
}
auth_provider = PlainTextAuthProvider('<<CLIENT ID>>', '<<CLIENT SECRET>>')
# replace <<CLIENT ID>> and <<CLIENT SECRET>> with the ClientID and Client Secret from your generated token
cluster = Cluster(cloud=cloud_config, auth_provider=auth_provider)
session = cluster.connect()

In [None]:
# -✔️-- Describing a keyspace

session.set_keyspace('<<KEYSPACENAME>>') # replace <<KEYSPACENAME>> with the name of the keyspace you created in DataStax
row = cluster.metadata.keyspaces['<<KEYSPACENAME>>'] # replace <<KEYSPACENAME>> with the name of the keyspace you created in DataStax

from pprint import pprint
pprint(vars(row))

In [None]:
# -✔️-- Create UDT to store subscription fees & library contents

session.execute("create type sub_fees_pre_month ( Basic float, Standard float, Premium float );")
session.execute("create type library ( Shows int, Movies int );")

In [None]:
# -✔️-- Create Netflix Subscription table

session.execute("""
    create table if not exists mykeyspace.netflix_subscription (
        Country text,
        Subscription sub_fees_pre_month,
        LibrarySize int,
        Library library,
        Primary key (Country)
 	);
""")

In [None]:
# -✔️-- Insert entries from CSV to table + Show the table after

import pandas as pd

# replace <</PATH/TO/>> with the path where your copy of netflix price in different countries.csv is
df = pd.read_csv('<</PATH/TO/>>/netflix price in different countries.csv')
df = df.rename(columns={"Total Library Size": "Library", "No. of TV Shows": "Shows", "No. of Movies": "Movies", "Cost Per Month - Basic ($)":
                        "Basic", "Cost Per Month - Standard ($)": "Standard", "Cost Per Month - Premium ($)": "Premium"})
i,j = df.shape

for x in range(0,i):
    session.execute("""insert into mykeyspace.netflix_subscription(Country, Subscription, LibrarySize, Library)
    values
    ('%s', {Basic:%d, Standard:%d, Premium:%d},
    %d,
    {Shows:%d, Movies:%d});"
    % (df.loc[x].Country,
    float(df.loc[x].Basic),
    float(df.loc[x].Standard),
    float(df.loc[x].Premium),
    int(df.loc[x].Library),
    int(df.loc[x].Shows),
    int(df.loc[x].Movies)))
    print(df.loc[x].Country,
    "has been inserted""")

rows = session.execute("select * from mykeyspace.netflix_subscription;")
for row in rows:
    print(row)

In [None]:
# -✔️-- Creating a secondary index on library size + selecting on library size < 5000

session.execute("create index on mykeyspace.netflix_subscription(librarysize);")

rows = session.execute("select * from mykeyspace.netflix_subscription where librarysize>5000;")
for row in rows:
    print(row)

In [None]:
# -✔️-- Deleteing entries with library size > 7000

rows = session.execute("select * from mykeyspace.netflix_subscription where librarysize>7000;")
num = 0
country_list = []
for row in rows:
    num = num+1
    country_list = country_list + [row[0]]
    print(row)

for n in country_list:
    # {0} is a placeholder for the variable n. "format(n)" inserts the value of n into the placeholder.
    session.execute("delete from mykeyspace.netflix_subscription where country='{0}';".format(n))

    # instead of using "format" we can do this too:
    # session.execute("DELETE FROM mykeyspace.netflix_subscription WHERE country=%s", (n,))



# If we are quereing on multiple values:
# for country, librarysize in zip(country_list, librarysize_list):
#     query = "DELETE FROM mykeyspace.netflix_subscription WHERE country='{0}' AND librarysize={1};".format(country, librarysize)
#     session.execute(query)
# --OR--
# for country, librarysize in zip(country_list, librarysize_list):
#     session.execute(
#         "DELETE FROM mykeyspace.netflix_subscription WHERE country=%s AND librarysize=%s",
#         (country, librarysize)
#     )


print(num,"rows deleted")

In [None]:
# -✔️-- Adding extra column to table

session.execute("alter table mykeyspace.netflix_subscription add capacity text;")

In [None]:
# -✔️-- Updating specific entries in table based on library size --> <3000 set capacity=low

rows = session.execute("select * from mykeyspace.netflix_subscription where librarysize<3000;")
num = 0
country_list = []
for row in rows:
    num = num+1
    country_list = country_list + [row[0]]
    print(row)

for n in country_list:
    session.execute("update mykeyspace.netflix_subscription set capacity='low' where country='{0}';".format(n))

print(num,"rows updated")

rows = session.execute("select * from mykeyspace.netflix_subscription where librarysize<3000;")
for row in rows:
    print(row)