In [1]:
# demo_nosql_dummy.py

from NoSQL_package import NoSql, pretty_print_nosql


def demo_nosql_dummy():
    # 1) Initialize dummy data
    print("=== 1) Initialize dummy data ===")
    ns = NoSql()  # uses DUMMY_COUNTRY_LANGUAGE_DATA by default
    print(f"Total docs: {len(ns.data)}")
    print("First 5 docs:")
    pretty_print_nosql(NoSql(ns.data[:5]))

    # 2) Filter: language == 'English'
    print("\n=== 2) Filter: language == 'English' ===")
    query = {"language": "English"}
    filtered = ns.find(query)
    print(f"Matched docs: {len(filtered.data)}")
    pretty_print_nosql(NoSql(filtered.data[:5]))  # show first 5

    # 3) groupby + aggregate: count countries per language
    print("\n=== 3) group_by + aggregate: count per language ===")
    agg_spec = {
        "*": ["count"]   # special '*' = count docs in each group
    }
    grouped = ns.aggregate("language", agg_spec)
    for row in grouped:
        print(row)

    # 4) project: keep only code + country
    print("\n=== 4) project: only code, country ===")
    projected = ns.project({"code": 1, "country": 1})
    pretty_print_nosql(NoSql(projected.data[:5]))  # first 5

    # 5) self join on 'code'
    print("\n=== 5) self join on 'code' ===")
    joined = ns.join(
        from_field = ns,         # self join
        local_field = "code",
        foreign_field = "code",
        as_field = "matches"
    )
    print(f"Joined docs: {len(joined.data)}")
    pretty_print_nosql(NoSql(joined.data[:5]))  # first 5


if __name__ == "__main__":
    demo_nosql_dummy()

=== 1) Initialize dummy data ===
Total docs: 97
First 5 docs:
Document 1:
{
    code: 'IQ',
    country: 'Iraq',
    language: 'Arabic'
}

Document 2:
{
    code: 'CY',
    country: 'Cyprus',
    language: 'Greek'
}

Document 3:
{
    code: 'RS',
    country: 'Serbia',
    language: 'Serbian'
}

Document 4:
{
    code: 'CL',
    country: 'Chile',
    language: 'Spanish'
}

Document 5:
{
    code: 'LV',
    country: 'Latvia',
    language: 'Latvian'
}


=== 2) Filter: language == 'English' ===
Matched docs: 14
Document 1:
{
    code: 'IE',
    country: 'Ireland',
    language: 'English'
}

Document 2:
{
    code: 'NG',
    country: 'Nigeria',
    language: 'English'
}

Document 3:
{
    code: 'US',
    country: 'United States',
    language: 'English'
}

Document 4:
{
    code: 'GH',
    country: 'Ghana',
    language: 'English'
}

Document 5:
{
    code: 'AU',
    country: 'Australia',
    language: 'English'
}


=== 3) group_by + aggregate: count per language ===
{'language': 'Arabic'

In [2]:
from NoSQL_package import NoSql, pretty_print_nosql

def demo_nosql_dummy_chunks_direct():
    print("=== 1) Initialize dummy data with chunk_size = 50 ===")

    # Read dummy data as chunks (generator), then wrap as single NoSql
    # so .data is a list of NoSql chunks.
    dummy_chunks = list(NoSql.read_dummy(chunk_size = 50))
    dummy_chunks = NoSql(dummy_chunks)   # << this is now our working object

    print(f"Chunked NoSql: {dummy_chunks!r}")
    # (Optional: quick sanity check on internal chunks)
    # for i, ch in enumerate(dummy_chunks.data, 1):
    #     print(f"  Inner chunk {i}: {len(ch.data)} docs")

    # 2) FILTER: language == 'English'
    print("\n=== 2) Filter on chunked NoSql: language == 'English' ===")
    query = {"language": "English"}
    filtered = dummy_chunks.find(query)
    print(f"Matched docs: {len(filtered.data)}")
    pretty_print_nosql(NoSql(filtered.data[:5]))  # show first 5 docs

    # 3) GROUPBY + AGG: count per language
    print("\n=== 3) group_by + aggregate on chunked NoSql: count per language ===")
    agg_spec = {
        "*": ["count"],   # special '*' means "count docs in each group"
    }
    grouped_rows = dummy_chunks.aggregate("language", agg_spec)
    for row in grouped_rows:
        print(row)

    # 4) PROJECT: only code, country
    print("\n=== 4) project on chunked NoSql: only code, country ===")
    projected = dummy_chunks.project({"code": 1, "country": 1})
    pretty_print_nosql(NoSql(projected.data[:5]))  # first 5 docs

    # 5) SELF JOIN on 'code'
    print("\n=== 5) self join on 'code' (chunked NoSql) ===")
    joined = dummy_chunks.join(
        from_field = dummy_chunks,   # self-join
        local_field = "code",
        foreign_field = "code",
        as_field = "matches",
    )
    print(f"Joined docs: {len(joined.data)}")
    pretty_print_nosql(NoSql(joined.data[:5]))  # first 5 docs


if __name__ == "__main__":
    demo_nosql_dummy_chunks_direct()

=== 1) Initialize dummy data with chunk_size = 50 ===
Chunked NoSql: NoSql(n_docs=2)

=== 2) Filter on chunked NoSql: language == 'English' ===
Matched docs: 14
Document 1:
{
    code: 'IE',
    country: 'Ireland',
    language: 'English'
}

Document 2:
{
    code: 'NG',
    country: 'Nigeria',
    language: 'English'
}

Document 3:
{
    code: 'US',
    country: 'United States',
    language: 'English'
}

Document 4:
{
    code: 'GH',
    country: 'Ghana',
    language: 'English'
}

Document 5:
{
    code: 'AU',
    country: 'Australia',
    language: 'English'
}


=== 3) group_by + aggregate on chunked NoSql: count per language ===
{'language': 'Arabic', 'count': 9}
{'language': 'Greek', 'count': 2}
{'language': 'Serbian', 'count': 1}
{'language': 'Spanish', 'count': 15}
{'language': 'Latvian', 'count': 1}
{'language': 'English', 'count': 14}
{'language': 'Macedonian', 'count': 1}
{'language': 'Filipino', 'count': 1}
{'language': 'Turkish', 'count': 1}
{'language': 'Finnish', 'count'

In [3]:
from NoSQL_package import NoSql, pretty_print_nosql

def demo_nosql_json_chunks_direct(json_path: str, chunk_size: int = 50):
    print(f"=== 1) Initialize JSON data from: {json_path} (chunk_size = {chunk_size}) ===")

    # Load JSON file using chunking
    try:
        # read_json with chunk_size is expected to return a generator of NoSql chunks
        loaded_chunks = list(NoSql.read_json(json_path, chunk_size=chunk_size))
    except Exception as e:
        print(f"ERROR reading JSON: {e}")
        return

    # Wrap chunks inside a single NoSql object so .data is a list of NoSql chunks
    ns_obj = NoSql(loaded_chunks)

    print(f"Chunked NoSql: {ns_obj!r}")

    # 2) FILTER: example query (customize fields to your schema)
    print("\n=== 2) Filter on chunked NoSql: language == 'English' ===")
    query = {"language": "English"}  # change field name if needed
    try:
        filtered = ns_obj.find(query)
        print(f"Matched docs: {len(filtered.data)}")
        pretty_print_nosql(NoSql(filtered.data[:5]))  # show first 5 docs
    except Exception as e:
        print(f"Error during filter/find: {e}")

    # 3) GROUPBY + AGG: example grouping (customize fields to your schema)
    print("\n=== 3) group_by + aggregate on chunked NoSql: count per language ===")
    agg_spec = {
        "*": ["count"],   # special '*' means "count docs in each group"
    }
    try:
        grouped_rows = ns_obj.aggregate("language", agg_spec)
        for row in grouped_rows:
            print(row)
    except Exception as e:
        print(f"Error during aggregate: {e}")

    # 4) PROJECT: example projection (customize fields to your schema)
    print("\n=== 4) project on chunked NoSql: only code, country ===")
    try:
        projected = ns_obj.project({"code": 1, "country": 1})
        pretty_print_nosql(NoSql(projected.data[:5]))  # first 5 docs
    except Exception as e:
        print(f"Error during project: {e}")

    # 5) SELF JOIN: example self-join by 'code' (customize to your schema)
    print("\n=== 5) self join on 'code' (chunked NoSql) ===")
    try:
        joined = ns_obj.join(
            from_field = ns_obj,   # self-join
            local_field = "code",
            foreign_field = "code",
            as_field = "matches",
        )
        print(f"Joined docs: {len(joined.data)}")
        pretty_print_nosql(NoSql(joined.data[:5]))  # first 5 docs
    except Exception as e:
        print(f"Error during join: {e}")


# ---- Run the demo on your JSON file ----
json_path = "salaries.json"  # <-- put your JSON filename here
demo_nosql_json_chunks_direct(json_path, chunk_size=50)

=== 1) Initialize JSON data from: salaries.json (chunk_size = 50) ===


KeyboardInterrupt: 