In [7]:
%reload_ext autoreload
%autoreload 2

# Downloading packages from Pypi

In [6]:
import os
from src import pypi_api

In [7]:
def disp_size_of(path):
    total_size = 0
    for dirpath, dirnames, filenames in os.walk(path):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            # Skip if it is symbolic link
            if not os.path.islink(fp):
                total_size += os.path.getsize(fp)

    # Convert bytes to megabytes
    total_size_mb = total_size / (1024 * 1024 * 1024)
    print(f"The total size of {path} is: {total_size_mb:.2f} GB")

In [9]:
# get all packages
packages = pypi_api.get_packages()

# collect a subet of package metadata
package_metadata = {}
for package in packages[:500]:
    package_metadata[package] = pypi_api.get_package_metadata(package)

In [None]:
# download source code for each package
download_dir = 'downloads'
for name, metadata in package_metadata.items():
    
    # Skip packages with errors
    if 'msg' in metadata:
        print(f"({name}) {metadata['msg']}")
        continue
    # Download source code
    pypi_api.download_source_code(metadata, download_dir)

# Display the size of the download directory
disp_size_of(download_dir)

(0) No source distribution found for 0-0.0.0
(0-._.-._.-._.-._.-._.-._.-0) No source distribution found for 0-._.-._.-._.-._.-._.-._.-0-0.1
(00101s) Failed to retrieve metadata for 00101s: Not Found
(00SMALINUX) No source distribution found for 00SMALINUX-1.0
(01changer) No source distribution found for 01changer-1.0.0
(0411-test) No source distribution found for 0411-test-1.0.1
(0805nexter) Failed to read downloads/0805nexter.tar.gz.
(0lever-so) No source distribution found for 0lever-so-1.2.0
(0lever-utils) No source distribution found for 0lever-utils-0.1.6
(0wneg) Failed to retrieve metadata for 0wneg: Not Found
(0x2nac0nda) No source distribution found for 0x2nac0nda-0.1
(1) No source distribution found for 1-1.0.0
(100bot) No source distribution found for 100bot-1.0.1
(101703488-sargun) Failed to retrieve metadata for 101703488-sargun: Not Found
(101703573-Topsis-pkg-suruchipundir) No source distribution found for 101703573-Topsis-pkg-suruchipundir-0.0.1
(101903697-Topsis-code) N

# Attempting to create SymbolGraph

In [2]:
# from src.automata_symbols import process_embeddings_for_project
# process_embeddings_for_project(
#     project_name='a10_octavia',
#     project_root_fpath='/Users/jamievoynow/Desktop/code/playground/pypi/downloads/a10-octavia-2.2.0',
# )

In [1]:
import logging
import os
from typing import List

from tqdm import tqdm

from automata.cli.cli_utils import initialize_py_module_loader
from automata.llm import OpenAIEmbeddingProvider
from automata.memory_store import SymbolCodeEmbeddingHandler
from automata.singletons.dependency_factory import (
    DependencyFactory,
    dependency_factory,
)
from automata.symbol import Symbol, SymbolGraph, get_rankable_symbols
from automata.symbol_embedding import (
    ChromaSymbolEmbeddingVectorDatabase,
    SymbolCodeEmbedding,
)

logger = logging.getLogger(__name__)


def initialize_resources(
    project_name: str,
    scip_base_path: str = "embeddings/scip/",
    persist_directory_path: str = "embeddings/code",
) -> tuple[SymbolGraph, SymbolCodeEmbeddingHandler]:
    """Initialize the resources needed to build the code embeddings."""
    scip_path = os.path.join(scip_base_path, f"{project_name}.scip")
    symbol_graph = SymbolGraph(scip_path)

    code_embedding_db = ChromaSymbolEmbeddingVectorDatabase(
        project_name,
        persist_directory=persist_directory_path,
        factory=SymbolCodeEmbedding.from_args,
    )
    embedding_provider = OpenAIEmbeddingProvider()

    dependency_factory.set_overrides(
        **{
            "symbol_graph": symbol_graph,
            "code_embedding_db": code_embedding_db,
            "embedding_provider": embedding_provider,
            "disable_synchronization": True,  # We spoof synchronization locally
        }
    )

    symbol_code_embedding_handler: SymbolCodeEmbeddingHandler = dependency_factory.get(
        "symbol_code_embedding_handler"
    )

    # Mock synchronization to allow us to build the initial embedding handler
    symbol_graph.is_synchronized = True
    symbol_code_embedding_handler.is_synchronized = True

    return symbol_graph, symbol_code_embedding_handler


def collect_symbols(symbol_graph: SymbolGraph) -> List[Symbol]:
    """Collect all symbols that can be ranked."""

    all_defined_symbols = symbol_graph.get_sorted_supported_symbols()
    return sorted(get_rankable_symbols(all_defined_symbols), key=lambda x: x.dotpath)


def process_embeddings(
    symbol_code_embedding_handler: SymbolCodeEmbeddingHandler,
    filtered_symbols: List[Symbol],
) -> None:
    """Process the embeddings for the filtered symbols."""

    for symbol in tqdm(filtered_symbols):
        try:
            symbol_code_embedding_handler.process_embedding(symbol)
        except Exception as e:
            logger.error(f"Failed to update embedding for {symbol.dotpath}: {e}")

    symbol_code_embedding_handler.flush()  # Final flush for any remaining symbols that didn't form a complete batch


def temp_process_embeddings_for_project(
    project_root_fpath: str,
    project_name: str,
) -> None:
    """Process the embeddings for a given project"""
    initialize_py_module_loader(
        project_root_fpath=project_root_fpath,
        project_name=project_name,
    )
    symbol_graph, symbol_code_embedding_handler = initialize_resources(
        project_name=project_name
    )
    return collect_symbols(symbol_graph)

    # dependency_factory.create_subgraph()
    # process_embeddings(symbol_code_embedding_handler, filtered_symbols)


In [2]:
filtered_symbols = temp_process_embeddings_for_project(
    project_name='a10_octavia',
    project_root_fpath='/Users/jamievoynow/Desktop/code/playground/pypi/downloads/a10-octavia-2.2.0',
)

******** /Users/jamievoynow/Desktop/code/playground/pypi/downloads/a10-octavia-2.2.0 a10_octavia
[32mLoading modules with root path: /Users/jamievoynow/Desktop/code/playground/pypi/downloads/a10-octavia-2.2.0 and py path: /Users/jamievoynow/Desktop/code/playground/pypi/downloads/a10-octavia-2.2.0/a10_octavia[0m
[32mAnonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.[0m
[32mSuccessfully imported ClickHouse Connect C data optimizations[0m
[36mSuccessfully import ClickHouse Connect C/Numpy optimizations[0m
[32mUsing python library for writing JSON byte strings[0m
[32mNo existing DB found in embeddings/code, skipping load[0m
[32mNo existing DB found in embeddings/code, skipping load[0m
[36mStarting component System[0m
[36mStarting component Posthog[0m
[36mStarting component PersistentDuckDB[0m
[36mStarting component LocalAPI[0m
[32mCreating dependency symbol_code_embedding_handler[0m
[32mAnonymized telemetry enabled. See htt

In [3]:
dependency_factory.create_subgraph()

[32mPre-computing bounding boxes for all rankable symbols[0m
('/Users/jamievoynow/Desktop/code/playground/pypi/downloads/a10-octavia-2.2.0', 'a10_octavia')
top_descriptor.name: 
[31mError computing bounding box for scip-python python a10_octavia 05ee3878644dca8ddabb6b809c2167fcbde09eb5 /BaseDataModel#__eq__().: Module  not found[0m
top_descriptor.name: 
[31mError computing bounding box for scip-python python a10_octavia 05ee3878644dca8ddabb6b809c2167fcbde09eb5 /BaseDataModel#__ne__().: Module  not found[0m
top_descriptor.name: 
[31mError computing bounding box for scip-python python a10_octavia 05ee3878644dca8ddabb6b809c2167fcbde09eb5 /BaseDataModel#_find_in_graph().: Module  not found[0m
top_descriptor.name: 
[31mError computing bounding box for scip-python python a10_octavia 05ee3878644dca8ddabb6b809c2167fcbde09eb5 /BaseDataModel#_get_unique_key().: Module  not found[0m
top_descriptor.name: 
[31mError computing bounding box for scip-python python a10_octavia 05ee3878644dca

  0%|          | 0/1203 [00:00<?, ?it/s]

top_descriptor.name: 
[31mError processing scip-python python a10_octavia 05ee3878644dca8ddabb6b809c2167fcbde09eb5 /BaseDataModel#__eq__().: Module  not found[0m
top_descriptor.name: 
[31mError processing scip-python python a10_octavia 05ee3878644dca8ddabb6b809c2167fcbde09eb5 /BaseDataModel#__ne__().: Module  not found[0m
top_descriptor.name: 
[31mError processing scip-python python a10_octavia 05ee3878644dca8ddabb6b809c2167fcbde09eb5 /BaseDataModel#_find_in_graph().: Module  not found[0m
top_descriptor.name: 
[31mError processing scip-python python a10_octavia 05ee3878644dca8ddabb6b809c2167fcbde09eb5 /BaseDataModel#_get_unique_key().: Module  not found[0m
top_descriptor.name: 
[31mError processing scip-python python a10_octavia 05ee3878644dca8ddabb6b809c2167fcbde09eb5 /BaseDataModel#_name().: Module  not found[0m
top_descriptor.name: 
[31mError processing scip-python python a10_octavia 05ee3878644dca8ddabb6b809c2167fcbde09eb5 /BaseDataModel#from_dict().: Module  not found[

 15%|█▍        | 180/1203 [00:00<00:00, 1799.71it/s]

top_descriptor.name: downloads.a10-octavia-2.2.0.a10_octavia.common.utils
[31mError processing scip-python python a10_octavia 05ee3878644dca8ddabb6b809c2167fcbde09eb5 `downloads.a10-octavia-2.2.0.a10_octavia.common.utils`/get_vip_security_group_name().: Module downloads.a10-octavia-2.2.0.a10_octavia.common.utils not found[0m
top_descriptor.name: downloads.a10-octavia-2.2.0.a10_octavia.common.utils
[31mError processing scip-python python a10_octavia 05ee3878644dca8ddabb6b809c2167fcbde09eb5 `downloads.a10-octavia-2.2.0.a10_octavia.common.utils`/get_vrid_floating_ip_for_project().: Module downloads.a10-octavia-2.2.0.a10_octavia.common.utils not found[0m
top_descriptor.name: downloads.a10-octavia-2.2.0.a10_octavia.common.utils
[31mError processing scip-python python a10_octavia 05ee3878644dca8ddabb6b809c2167fcbde09eb5 `downloads.a10-octavia-2.2.0.a10_octavia.common.utils`/is_dual_stack().: Module downloads.a10-octavia-2.2.0.a10_octavia.common.utils not found[0m
top_descriptor.name: d

 31%|███       | 372/1203 [00:00<00:00, 1686.54it/s]

top_descriptor.name: downloads.a10-octavia-2.2.0.a10_octavia.controller.worker.flows.vthunder_flows
[31mError processing scip-python python a10_octavia 05ee3878644dca8ddabb6b809c2167fcbde09eb5 `downloads.a10-octavia-2.2.0.a10_octavia.controller.worker.flows.vthunder_flows`/VThunderFlows#get_failover_vcs_vthunder_flow().: Module downloads.a10-octavia-2.2.0.a10_octavia.controller.worker.flows.vthunder_flows not found[0m
top_descriptor.name: downloads.a10-octavia-2.2.0.a10_octavia.controller.worker.flows.vthunder_flows
[31mError processing scip-python python a10_octavia 05ee3878644dca8ddabb6b809c2167fcbde09eb5 `downloads.a10-octavia-2.2.0.a10_octavia.controller.worker.flows.vthunder_flows`/VThunderFlows#get_glm_license_subflow().: Module downloads.a10-octavia-2.2.0.a10_octavia.controller.worker.flows.vthunder_flows not found[0m
top_descriptor.name: downloads.a10-octavia-2.2.0.a10_octavia.controller.worker.flows.vthunder_flows
[31mError processing scip-python python a10_octavia 05ee38

 57%|█████▋    | 688/1203 [00:00<00:00, 2319.78it/s]

top_descriptor.name: downloads.a10-octavia-2.2.0.a10_octavia.db.migration.alembic_migrations.versions.4028e5f7a198_added_vrid_table
[31mError processing scip-python python a10_octavia 05ee3878644dca8ddabb6b809c2167fcbde09eb5 `downloads.a10-octavia-2.2.0.a10_octavia.db.migration.alembic_migrations.versions.4028e5f7a198_added_vrid_table`/upgrade().: Module downloads.a10-octavia-2.2.0.a10_octavia.db.migration.alembic_migrations.versions.4028e5f7a198_added_vrid_table not found[0m
top_descriptor.name: downloads.a10-octavia-2.2.0.a10_octavia.db.migration.alembic_migrations.versions.873ee83aef63_update_project_id_column_of_vrid_table_
[31mError processing scip-python python a10_octavia 05ee3878644dca8ddabb6b809c2167fcbde09eb5 `downloads.a10-octavia-2.2.0.a10_octavia.db.migration.alembic_migrations.versions.873ee83aef63_update_project_id_column_of_vrid_table_`/downgrade().: Module downloads.a10-octavia-2.2.0.a10_octavia.db.migration.alembic_migrations.versions.873ee83aef63_update_project_id

 77%|███████▋  | 925/1203 [00:00<00:00, 1906.11it/s]

top_descriptor.name: downloads.a10-octavia-2.2.0.a10_octavia.tests.unit.controller.worker.tasks.test_a10_network_tasks
[31mError processing scip-python python a10_octavia 05ee3878644dca8ddabb6b809c2167fcbde09eb5 `downloads.a10-octavia-2.2.0.a10_octavia.tests.unit.controller.worker.tasks.test_a10_network_tasks`/TestNetworkTasks#setUp().: Module downloads.a10-octavia-2.2.0.a10_octavia.tests.unit.controller.worker.tasks.test_a10_network_tasks not found[0m
top_descriptor.name: downloads.a10-octavia-2.2.0.a10_octavia.tests.unit.controller.worker.tasks.test_a10_network_tasks
[31mError processing scip-python python a10_octavia 05ee3878644dca8ddabb6b809c2167fcbde09eb5 `downloads.a10-octavia-2.2.0.a10_octavia.tests.unit.controller.worker.tasks.test_a10_network_tasks`/TestNetworkTasks#tearDown().: Module downloads.a10-octavia-2.2.0.a10_octavia.tests.unit.controller.worker.tasks.test_a10_network_tasks not found[0m
top_descriptor.name: downloads.a10-octavia-2.2.0.a10_octavia.tests.unit.control

 94%|█████████▎| 1127/1203 [00:00<00:00, 1853.09it/s]

top_descriptor.name: downloads.a10-octavia-2.2.0.a10_octavia.tests.unit.controller.worker.tasks.test_vthunder_tasks
[31mError processing scip-python python a10_octavia 05ee3878644dca8ddabb6b809c2167fcbde09eb5 `downloads.a10-octavia-2.2.0.a10_octavia.tests.unit.controller.worker.tasks.test_vthunder_tasks`/TestVThunderTasks#test_AmphoraePostMemberNetworkPlug_execute_for_reload_reboot().: Module downloads.a10-octavia-2.2.0.a10_octavia.tests.unit.controller.worker.tasks.test_vthunder_tasks not found[0m
top_descriptor.name: downloads.a10-octavia-2.2.0.a10_octavia.tests.unit.controller.worker.tasks.test_vthunder_tasks
[31mError processing scip-python python a10_octavia 05ee3878644dca8ddabb6b809c2167fcbde09eb5 `downloads.a10-octavia-2.2.0.a10_octavia.tests.unit.controller.worker.tasks.test_vthunder_tasks`/TestVThunderTasks#test_AmphoraePostNetworkUnplug_amophora_not_available().: Module downloads.a10-octavia-2.2.0.a10_octavia.tests.unit.controller.worker.tasks.test_vthunder_tasks not found

100%|██████████| 1203/1203 [00:00<00:00, 1895.64it/s]

[32mBuilt the rankable symbol subgraph[0m





<networkx.classes.digraph.DiGraph at 0x107a3a6e0>