In [1]:
import os

In [2]:
def create_keyspace(name, replication_factor):
    return """
    CREATE KEYSPACE IF NOT EXISTS %s WITH replication = {
        'class': 'SimpleStrategy', 
        'replication_factor': '%s'
    };
    """ % (name, replication_factor)

In [3]:
create_keyspace('foo', 3)

"\n    CREATE KEYSPACE IF NOT EXISTS foo WITH replication = {\n        'class': 'SimpleStrategy', \n        'replication_factor': '3'\n    };\n    "

In [4]:
"""
CREATE KEYSPACE IF NOT EXISTS surface_data WITH replication = {
    'class': 'SimpleStrategy', 
    'replication_factor': '%s'
};
""" % ()

TypeError: not enough arguments for format string

In [None]:
def read_file(path):
    with open(path, 'r') as content_file:
        return content_file.read()

files = os.listdir('cql')
files.sort()

for path in files:
    query = read_file(f'cql/{path}')
    print(query)
    print()

In [114]:
from confluent_kafka.admin import AdminClient, NewPartitions, NewTopic
from confluent_kafka import TopicPartition, Consumer

In [30]:
from confluent_kafka.admin import AdminClient, NewTopic

class create_admin_client:
    def __init__(self):
        
        # KAFKA INSTANCES
        ports = [11001, 11002]
        brokers = ','.join([f'localhost:{str(x)}' for x in ports])

        # ATTEMPT TO CONNECT TO THE CLUSTER
        self.instance = AdminClient({
            'bootstrap.servers': brokers,
        })
        
    # FETCH ALL EXISTING TOPICS
    def all_topics(self):
        container = {}
        
        for name, parts in self.instance.list_topics().topics.items():
            container[name] = len(parts.partitions)
        
        return container
    
    # CHECK IF TOPIC ALREADY EXISTS
    def topic_exists(self, target_topic):
        for topic in list(self.all_topics().keys()):
            if topic == target_topic:
                return True
        
        return False
    
    # ATTEMPT TO CREATE A NEW TOPIC
    def create_topic(self, name, num_partitions):

        # THROW ERROR IF TOPIC ALREADY EXISTS
        if self.topic_exists(name):
            raise Exception('ERROR: THIS TOPIC ALREADY EXISTS')

        # OTHERWISE, CREATE IT
        self.instance.create_topics(
            new_topics=[NewTopic(
                topic=name,
                num_partitions=num_partitions,
                replication_factor=1,
            )]
        )

In [83]:
kafka_admin = create_admin_client()

In [84]:
kafka_admin.all_topics()

{'eyylmao': 2, 'eyylmaoZ': 5, 'foobarz': 1, 'eyy': 2, 'foobar': 2}

In [None]:
kafka_admin.create_topic(
    name='foobarz',
    num_partitions=1
)

In [36]:
import requests

In [79]:
def get_req(endpoint):
    response = requests.get(endpoint)
    
    print('STATUS:', response.status_code)
    return response.json()

In [80]:
def post_req(endpoint, body):
    response = requests.post(endpoint, json=body)
    
    print('STATUS:', response.status_code)
    return response.json()

In [81]:
get_req('http://localhost:3003/kafka')

STATUS: 200


{'eyylmao': 2, 'eyylmaoZ': 5, 'foobarz': 1, 'eyy': 2, 'foobar': 2}

In [128]:
post_req('http://localhost:3003/kafka/create', {
    'name': 'eyylmaoZ',
    'num_partitions': 5
})

STATUS: 201


{'topic_name': 'eyylmaoZ', 'num_partitions': 5}

In [221]:
post_req('http://localhost:3003/cassandra/create', {
    'domain': 'project.foo',
    'columns': {
        'first': 'text',
        'second': 'text',
        'third': 'text',
    },
    'indexing': [
        'second', 
        'third'
    ]
})

STATUS: 201


In [129]:
from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider

In [134]:
cluster = Cluster([('localhost', 12001), ('localhost', 12002)])
instance = cluster.connect()

In [182]:
class create_cassandra_instance:
    def __init__(self):
        cluster = Cluster([('localhost', 12001), ('localhost', 12002)])
        self.instance = cluster.connect()
    
    def table_overview(self):
        container = []
        
        # BLACKLIST SYSTEM NAMESPACES
        blacklist = ['system_auth', 'system_schema', 'system_distributed', 'system', 'system_traces']
        
        raw_result = cassandra.instance.execute(
            "SELECT keyspace_name, table_name FROM system_schema.tables"
        )
        
        for row in results:
            if row.keyspace_name not in blacklist:
                container.append(row)

In [139]:
cassandra = create_cassandra_instance()

In [142]:
dir(cassandra.instance)

['__class__',
 '__del__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__enter__',
 '__eq__',
 '__exit__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_check_graph_paging_available',
 '_create_response_future',
 '_default_consistency_level',
 '_default_serial_consistency_level',
 '_default_timeout',
 '_graph_paging_available',
 '_initial_connect_futures',
 '_lock',
 '_maybe_get_execution_profile',
 '_maybe_set_graph_paging',
 '_metrics',
 '_monitor_reporter',
 '_on_analytics_master_result',
 '_on_request',
 '_pools',
 '_profile_manager',
 '_protocol_version',
 '_request_init_callbacks',
 '_resolve_execution_profile_options',
 '_row_factory',
 '_set_keyspace_for_all_pools',
 '_target_analytics_master',
 '_transform_params',
 '_vali

In [181]:
blacklist = ['system_auth', 'system_schema', 'system_distributed', 'system', 'system_traces']

In [180]:
results = cassandra.instance.execute("SELECT keyspace_name, table_name FROM system_schema.tables")

for row in results:
    if row.keyspace_name not in blacklist:
        print(row)

Row(keyspace_name='system_traces', table_name='events')
Row(keyspace_name='system_traces', table_name='sessions')


In [169]:
results = cassandra.instance.execute("""
    SELECT * FROM system_schema.columns 
    WHERE keyspace_name = %s AND table_name = %s
""", ('system_auth', 'network_permissions'))

for row in results:
    print(row)

Row(keyspace_name='system_auth', table_name='network_permissions', column_name='dcs', clustering_order='none', column_name_bytes=b'dcs', kind='regular', position=-1, type='frozen<set<text>>')
Row(keyspace_name='system_auth', table_name='network_permissions', column_name='role', clustering_order='none', column_name_bytes=b'role', kind='partition_key', position=0, type='text')


In [186]:
cassandra.instance.execute("""
CREATE KEYSPACE IF NOT EXISTS experiment WITH replication = {
    'class': 'SimpleStrategy', 
    'replication_factor': '%s'
};
""" % 1).all()

[]

In [189]:
cassandra.instance.execute("""
CREATE TABLE experiment.sensor_1C (timestamp text, serial_number text, vector list<double>, PRIMARY KEY(timestamp, serial_number));
""").all()

[]

In [207]:
def create_table_query(self, tbl_name, columns, primary_keys):
    
    # MAKE SURE PRIMARY KEYS ARE OK
    for key in primary_keys:
        col_list = list(columns.keys())
        
        if key not in col_list:
            raise Exception(f"PRIMARY KEY '{key}' IS NOT A VALID COLUMN")
    
    # BASE QUERY
    query = f'CREATE TABLE experiment.{tbl_name} ('
    
    # LOOP IN COLUMNS
    for column_name, column_type in columns.items():
        query += f'{column_name} {column_type}, '
        
    # ADD PRIMARY KEYS
    key_string = ', '.join(primary_keys)
    query += f'PRIMARY KEY({key_string}));' 
    
    return query

In [211]:
create_tbl('expanse', {
    'foo': 'bar',
    'biz': 'baz'
}, ['foo'])

'CREATE TABLE experiment.expanse (foo bar, biz baz, PRIMARY KEY(foo));'

In [None]:
base = """
CREATE TABLE experiment.sensor_1B (
    timestamp text,
    serial_number text,
    vector list<double>,
    PRIMARY KEY(timestamp, serial_number)
);
"""

In [15]:
import re

In [46]:
test_string = "cnn::my_cool_model::v3"

In [47]:
match = re.match(r'(.+?)::(.+?)::v(\d+)', test_string)

In [48]:
match.groups()

('cnn', 'my_cool_model', '3')

In [64]:
def split_model_id(input_data: str):
    match = re.match(r'(.+?)::(.+?)::v(\d+)', input_data)
    
    if not match:
        raise Exception(f'MODEL NAME DID NOT FOLLOW REGEX PATTERN ({input_data})')
    
    model_type, model_name, model_version = match.groups()
    
    if not model_version.isnumeric():
        raise Exception(f'MODEL NAME IS NOT NUMERIC ({model_version})')
    
    return [model_type, model_name, int(model_version)]

In [67]:
split_model_id('cnn::my_cool_model::v3')

['cnn', 'my_cool_model', 3]

In [None]:
# Example string
text = "part_one::part_two::part_three"

# Regular expression to match three parts separated by ::
pattern = r'(.+?)::(.+?)::(.+?)'

# Using re.match to find the match
match = re.match(pattern, text)

if match:
    part_one, part_two, part_three = match.groups()
    print(f"Part One: {part_one}")
    print(f"Part Two: {part_two}")
    print(f"Part Three: {part_three}")
else:
    print("No match found.")


In [45]:
'str'.isnumeric()

False

In [42]:
dir('3')

['__add__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getnewargs__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__mod__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmod__',
 '__rmul__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'capitalize',
 'casefold',
 'center',
 'count',
 'encode',
 'endswith',
 'expandtabs',
 'find',
 'format',
 'format_map',
 'index',
 'isalnum',
 'isalpha',
 'isascii',
 'isdecimal',
 'isdigit',
 'isidentifier',
 'islower',
 'isnumeric',
 'isprintable',
 'isspace',
 'istitle',
 'isupper',
 'join',
 'ljust',
 'lower',
 'lstrip',
 'maketrans',
 'partition',
 'removeprefix',
 'removesuffix',
 'replace',
 'rfind',
 'rindex',
 'rjust',
 'rpartition',
 'rsplit',
 'rstrip',
 'split',
 'splitlines',
 'startswith',
 'strip',
 'swapcase',
