In [1]:
class FileReader:
    def __init__(self, file_path):
        self.file_path = file_path
        self.load()
        self.line_count = len(self.lines)
        self.check_file_format()
    
    def load(self):
        with open(self.file_path,'r') as f:
            self.lines = f.readlines()

    def print(self, n = 10):
        for line in self.lines[:n]:
            print(line.strip())
    
    def check_file_format(self):
        file_extension = self.file_path.split(".")[-1]
        match file_extension:
            case "csv":
                self.file_format = "csv"
            case "tsv":
                self.file_format = "tsv"
            case "json":
                self.file_format = "json"
            case _:
                self.file_format = "unknown"

In [2]:
file_reader = FileReader("../Data/countries.tsv")
file_reader.file_format
file_reader.line_count

74

In [3]:
file_reader.print()

AD	Andorra	42.5	1.5
AE	United Arab Emirates	24	54
AF	Afghanistan	33	65
AG	Antigua and Barbuda	17.05	-61.8
AI	Anguilla	18.25	-63.17
AL	Albania	41	20
AM	Armenia	40	45
AN	Netherlands Antilles	12.25	-68.75
AO	Angola	-12.5	18.5
AP	Asia/Pacific Region	35	105


In [4]:
file_reader.print(5)

AD	Andorra	42.5	1.5
AE	United Arab Emirates	24	54
AF	Afghanistan	33	65
AG	Antigua and Barbuda	17.05	-61.8
AI	Anguilla	18.25	-63.17


In [22]:
class CSVReader(FileReader):
    def __init__(self, file_path, header=False):
        super().__init__(file_path)
        self.header = header
        if self.file_format not in ["csv", "tsv"]:
            raise TypeError(f"only .csv or .tsv supported : check {self.file_path} ")

        if self.header == True:
            self.header_lines = self.lines[0]

    def split(self):
        match self.file_format:
            case "tsv":
                self.splitted_lines = [line.strip().split("\t") for line in self.lines]
                return self.splitted_lines
            case "csv":
                self.splitted_lines = [line.strip().split(",") for line in self.lines]
                return self.splitted_lines
            
    def print(self, n=10):
        if not hasattr(self, "splitted_lines"):
            self.split()
        start_idx = 1 if self.header else 0
        lines_to_print = self.splitted_lines[start_idx : start_idx + n]
        for i, line in enumerate(lines_to_print, 1):
            print(f"{i}: {line}")


In [23]:
csv_file = CSVReader("../Data/countries.tsv")

In [24]:
csv_file.split()
csv_file.line_count

74

In [25]:
csv_file.print(2)

1: ['AD', 'Andorra', '42.5', '1.5']
2: ['AE', 'United Arab Emirates', '24', '54']


In [21]:
csv_file = CSVReader("../Data/epa_air_quality.csv",header= False)
csv_file.print()

start_idx - 0
1 : ['date', 'site_id', 'daily_mean_pm10_conentration', 'daily_aqi_value']
2 : ['2020-01-01', '60070008', '27', '25']
3 : ['2020-01-02', '60070008', '22', '20']
4 : ['2020-01-03', '60070008', '30', '28']
5 : ['2020-01-04', '60070008', '17', '16']
6 : ['2020-01-05', '60070008', '18', '17']
7 : ['2020-01-06', '60070008', '13', '12']
8 : ['2020-01-07', '60070008', '26', '24']
9 : ['2020-01-08', '60070008', '24', '22']
10 : ['2020-01-09', '60070008', '9', '8']
