In [2]:
!pip install cassandra-driver

Defaulting to user installation because normal site-packages is not writeable
Collecting cassandra-driver
  Downloading cassandra_driver-3.24.0-cp38-cp38-win_amd64.whl (2.8 MB)
Collecting geomet<0.3,>=0.1
  Downloading geomet-0.2.1.post1-py3-none-any.whl (18 kB)
Collecting click
  Downloading click-7.1.2-py2.py3-none-any.whl (82 kB)
Installing collected packages: click, geomet, cassandra-driver
Successfully installed cassandra-driver-3.24.0 click-7.1.2 geomet-0.2.1.post1


In [6]:
import cassandra

In [7]:
from cassandra.cluster import Cluster
try:
  cluster = Cluster(['127.0.0.1'])
  session = cluster.connect()
except Exception as e:
  print(e)

In [8]:
try:
  session.execute("select * from music_library")
except Exception as e:
  print(e)

Error from server: code=2200 [Invalid query] message="No keyspace has been specified. USE a keyspace, or explicitly specify keyspace.tablename"


In [11]:
# create a keyspace in Cassandra
try:
    session.execute("""
    CREATE KEYSPACE IF NOT EXISTS udacity 
    WITH REPLICATION = 
    { 'class' : 'SimpleStrategy', 'replication_factor' : 1 }"""
)

except Exception as e:
    print(e)

In [12]:
## Connect to the keyspace 
try:
    session.set_keyspace('udacity')
except Exception as e:
    print(e)

# Cassandra is all about modeling the data based on the query, on how you'll access that data so before proceeding, before creating partitions we must define how we will access that data later!

In [9]:
# Let's say that I would like to be able to query every song that was released in a particular year by an author.
# that would be like below for example:
# select * from songs where year=1970 AND artist_name="Beatles"

# therefore I beed to be able to WHERE on year. 
# YEAR will be my partition key and artist name will be a clustering column to make each Primary Key unique. (there are no duplicates in Cassandra)
# Table name: songs
# song_title
# artist_name
# year
# album_name
# single
# PRIMARY KEY(year, artist name) ===> although the name is as in RDBMS the concept is slightly differemt it is a "partition key"

In [15]:
# in CQL
# TABLE is a partition, remember, this naming convention is just to make CQL similar to SQL, the concepts are different
query = "CREATE TABLE IF NOT EXISTS songs "
query = query + "(song_title text, artist_name text, year int, album_name text, single boolean, PRIMARY KEY (year, artist_name))"
try:
  session.execute(query)
except Exception as e:
  print(e)

In [18]:
# Insert the following two rows in your table
# First Row:  "Across The Universe", "The Beatles", "1970", "False", "Let It Be"
# Second Row: "The Beatles", "Think For Yourself", "False", "1965", "Rubber Soul"
query = "INSERT INTO songs (song_title, artist_name, year, album_name, single)" 
query = query + " VALUES (%s, %s, %s, %s, %s)"

try:
    session.execute(query, ("Across The Universe", "The Beatles", 1970, "Let It Be", False))
    session.execute(query, ("Think For Yourself", "The Beatles", 1965, "Rubber Soul", False))
except Exception as e:
    print(e)

In [20]:
# Is it correct?
try:
    rows = session.execute("select * from songs")
except Exception as e:
    print(e)

for row in rows:
  print(row)

Row(year=1965, artist_name='The Beatles', album_name='Rubber Soul', single=False, song_title='Think For Yourself')
Row(year=1970, artist_name='The Beatles', album_name='Let It Be', single=False, song_title='Across The Universe')


In [22]:
# ok, lets validate our model query
try:
    rows = session.execute("select * from songs WHERE year=1970 AND artist_name='The Beatles'")
except Exception as e:
    print(e)
  
for row in rows:
  print(row)

Row(year=1970, artist_name='The Beatles', album_name='Let It Be', single=False, song_title='Across The Universe')


In [23]:
session.shutdown()
cluster.shutdown()