# Preprocess Input Data for `bwsample.sample`
The input data should be structured as `Dict[ID,DATA]` with unique IDs as keys.

In [1]:
examples = {
    "id1": "data...",
    "id2": ["other", "data"],
    "id3": {"key", "value"},
    "id4": "lorem",
    "id5": "ipsum",
    "id6": "blind",
    "id7": "text"
}
examples

{'id1': 'data...',
 'id2': ['other', 'data'],
 'id3': {'key', 'value'},
 'id4': 'lorem',
 'id5': 'ipsum',
 'id6': 'blind',
 'id7': 'text'}

## Convert `List[DATA]` 
You can convert a `List[DATA]` to `Dict[ID,DATA]` as follows:

In [2]:
# our raw data
raw = ["data...", ["other", "data"], {"key", "value"}, "lorem", "ipsum", "blind", "text"]

# convert 
examples = {str(k): val for k, val in enumerate(raw)} 

examples

{'0': 'data...',
 '1': ['other', 'data'],
 '2': {'key', 'value'},
 '3': 'lorem',
 '4': 'ipsum',
 '5': 'blind',
 '6': 'text'}

## Convert other data models
Data can be organized in different ways, e.g.

In [3]:
# out raw data
raw = [
    {"id": "id1", "data": "data..."},
    {"id": "id2", "data": ["other", "data"]},
    {"id": "id3", "data": {"key", "value"}},
    {"id": "id4", "data": "lorem"},
    {"id": "id5", "data": "ipsum"},
    {"id": "id6", "data": "blind"},
    {"id": "id7", "data": "text"}
]

# convert
examples = {r.get("id"): r.get("data") for r in raw}

examples

{'id1': 'data...',
 'id2': ['other', 'data'],
 'id3': {'key', 'value'},
 'id4': 'lorem',
 'id5': 'ipsum',
 'id6': 'blind',
 'id7': 'text'}