In [77]:
from typing import Iterator, Any
import csv

In [78]:
filepath = "steam_reviews.csv"

csv.field_size_limit(2 << 24)

33554432

In [None]:
def map_items(items : Iterator[list[str]]) -> Iterator[tuple[int, bool]]:
    header = items.__next__()
    app_id_index = header.index('app_id')
    recommend_index = header.index('recommended')
    for item in items: 
        app_id = int(item[app_id_index])
        recommend = True if item[recommend_index] == "True" else False
        yield (app_id, recommend)


def shuffle_items(items : Iterator[tuple[int, bool]]) -> Iterator[tuple[int, list[bool]]]:
    sorted_items = sorted(items)
    buffer = []
    current_index = None
    for (index, item) in sorted_items:
        if(current_index == None):
            buffer = [item]
        elif current_index == index:
            buffer.append(item)
        else:
            yield   (current_index, buffer)
            buffer = [item]
        current_index = index
    yield (current_index, buffer)

def reduce_items(items : Iterator[tuple[int, list[bool]]]) -> Iterator[tuple[int, float]]:
    rating_base = 5
    for (app_id, recommentations) in items:
        total_recommentations = 0
        positive_recommentations = 0
        for recommentation in recommentations:
            if(recommentation):
                positive_recommentations += 1
            total_recommentations += 1  
        rating = rating_base * positive_recommentations / total_recommentations
        yield (app_id, rating)

In [None]:
with open(filepath, 'r', encoding='utf-8') as file:
    csv_reader = csv.reader(file)
    mapped = map_items(csv_reader)
    shuffled = shuffle_items(mapped)
    reduced = reduce_items(shuffled)

    for item in reduced:
        print(item)
    
    

(70, 4.8248429867411025)
(240, 4.803058917183967)
(420, 4.830638297872341)
(620, 4.938470875353485)
(2870, 2.1211255534923583)
(4000, 4.824827161171826)
(7510, 3.0659536541889483)
(8870, 4.764301760862067)
(8930, 4.80726820844321)
(32470, 4.856225368559334)
(35140, 4.808930526044684)
(39210, 4.380782205003173)
(40800, 4.743189557321226)
(47890, 4.26770997720498)
(48700, 4.884790767119971)
(55230, 4.801197200910547)
(72850, 4.737071391278995)
(105600, 4.895409585101402)
(107410, 4.520230548182596)
(113200, 4.781727343462058)
(200900, 4.70763530927835)
(203160, 4.814392391220299)
(204360, 4.803112819691387)
(205100, 4.865807550987806)
(206190, 4.868217054263566)
(206440, 4.82479426599416)
(207610, 4.841821938320981)
(212680, 4.806059235025929)
(213670, 4.877143303083681)
(214560, 4.850009676795046)
(214950, 4.203135662916236)
(218620, 4.388914744734462)
(219150, 4.868162599460665)
(219740, 4.840811865362919)
(221380, 4.7750627073387815)
(221640, 4.846056895930861)
(225540, 3.970949761464