In [19]:
import json
from pprint import pprint

data = {
    "name": "ACME",
    "shares": 100,
    "price": 542.23
}
json_str = json.dumps(data)
pprint(json_str)
l_data = json.loads(json_str)
pprint(l_data)

'{"name": "ACME", "shares": 100, "price": 542.23}'
{'name': 'ACME', 'price': 542.23, 'shares': 100}


In [20]:
with open("dataset/data.json", "w") as f:
    json.dump(data, f)

In [57]:
from urllib.parse import urlencode, urlparse, parse_qs

query_params = {
    "key1": "hello world",
    "key2": ["value1", "value2"],
    "key3": 42,
    "key4": 123.45,
    "key5": True,
    "key6": [42, 88],
    "key7": None,
    "key8": ""
}
query_string = urlencode(query_params, doseq=True)

In [58]:
query_string

'key1=hello+world&key2=value1&key2=value2&key3=42&key4=123.45&key5=True&key6=42&key6=88&key7=None&key8='

In [66]:
parse_qs(query_string)

{'key1': ['hello world'],
 'key2': ['value1', 'value2'],
 'key3': ['42'],
 'key4': ['123.45'],
 'key5': ['True'],
 'key6': ['42', '88'],
 'key7': ['None']}

In [59]:
url = "https://example.com/page/query/"+"?"+query_string
url

'https://example.com/page/query/?key1=hello+world&key2=value1&key2=value2&key3=42&key4=123.45&key5=True&key6=42&key6=88&key7=None&key8='

In [60]:
o = urlparse(url)
o

ParseResult(scheme='https', netloc='example.com', path='/page/query/', params='', query='key1=hello+world&key2=value1&key2=value2&key3=42&key4=123.45&key5=True&key6=42&key6=88&key7=None&key8=', fragment='')

In [61]:
o._replace(query="").geturl()

'https://example.com/page/query/'

In [62]:
o

ParseResult(scheme='https', netloc='example.com', path='/page/query/', params='', query='key1=hello+world&key2=value1&key2=value2&key3=42&key4=123.45&key5=True&key6=42&key6=88&key7=None&key8=', fragment='')

In [63]:
query_params = parse_qs(query_string)

In [64]:
query_params

{'key1': ['hello world'],
 'key2': ['value1', 'value2'],
 'key3': ['42'],
 'key4': ['123.45'],
 'key5': ['True'],
 'key6': ['42', '88'],
 'key7': ['None']}

In [65]:
from urllib.parse import unquote, ParseResult

In [46]:
for k, v in query_params.items():
    print([unquote(vi) for vi in v])

['hello world']
['value1', 'value2']
['42']
['123.45']
['True']
['42', '88']
['None']


In [52]:
ParseResult??

[31mInit signature:[39m ParseResult(scheme, netloc, path, params, query, fragment)
[31mDocstring:[39m     
ParseResult(scheme, netloc, path, params, query, fragment)

A 6-tuple that contains components of a parsed URL.
[31mSource:[39m        
[38;5;28;01mclass[39;00m ParseResult(_ParseResultBase, _NetlocResultMixinStr):
    __slots__ = ()
    [38;5;28;01mdef[39;00m geturl(self):
        [38;5;28;01mreturn[39;00m urlunparse(self)
[31mFile:[39m           ~/miniconda3/envs/athena/lib/python3.12/urllib/parse.py
[31mType:[39m           type
[31mSubclasses:[39m     

In [76]:
from typing import Union
from urllib.parse import parse_qs, unquote, urlparse


def parse_url_querystring(url: str, drop_na: bool = True, na_str: str = "None") -> tuple[str, dict[str, Union[str, list[str]]]]:
    parsed_url = urlparse(url)  # urllib.parse.ParseResult
    query_params = {}
    if parsed_url.query and len(parsed_url.query) > 0:
        for key, values in parse_qs(parsed_url.query).items():
            decoded_values = [unquote(value) for value in values if not drop_na or unquote(value) != na_str]
            if decoded_values:  # only non-None value
                if len(decoded_values) == 1:
                    query_params[key] = decoded_values[0]
                else:
                    query_params[key] = decoded_values
    return parsed_url._replace(query="").geturl(), query_params


In [77]:
parse_url_querystring('https://example.com/page/query/?key1=hello+world&key2=value1&key2=value2&key3=42&key4=123.45&key5=True&key6=42&key6=88&key7=None&key8=',
                     drop_na=False)

('https://example.com/page/query/',
 {'key1': 'hello world',
  'key2': ['value1', 'value2'],
  'key3': '42',
  'key4': '123.45',
  'key5': 'True',
  'key6': ['42', '88'],
  'key7': 'None'})