In [35]:
%%file find_max_col_min_row.py

import re
import json
import sys

from mrjob.job import MRJob

class MaxColMinRow(MRJob):
    
    def mapper(self, _, line):
        values = line.strip().split(",")
        # first value is the column
        # second value is the row
        # third value is cell value
        # return the column
        yield values[0], int(values[2])
        ## return the row 
        yield values[1], int(values[2])
        
    def reducer(self, key, values):
        ## the key is either the column or row 
        ## we have to check if its column or row
        if self.isRow(key):
            ## find the min 
            lowestValue = sys.maxsize
            for value in values:
                lowestValue = min(lowestValue, value)
            yield key, lowestValue    
        if  self.isColumn(key):
            ## find the max 
            highestValue = 0
            for value in values:
                highestValue = max(highestValue, value)
            yield key, highestValue       
        
   ## helper method to determine the colum 
    def isColumn(self, value):
        columnsRegex = re.compile(r"[A-J]")
        return re.match(columnsRegex, value)
    
    ## helper method to determine the row 
    def isRow(self, value):
        rowsRegex = re.compile(r"[K-T]")
        return re.match(rowsRegex, value)
    
if __name__ == '__main__':
    MaxColMinRow.run()

Overwriting find_max_col_min_row.py


In [36]:
!python find_max_col_min_row.py cs4650hw1.dat

"A"	994
"B"	999
"C"	987
"D"	995
"E"	998
"F"	997
"G"	997
"H"	997
"I"	995
"J"	992
"K"	0
"L"	3
"M"	0
"N"	13
"O"	1
"P"	12
"Q"	0
"R"	2
"S"	5
"T"	2


No configs found; falling back on auto-configuration
No configs specified for inline runner
Creating temp directory C:\Users\moham\AppData\Local\Temp\find_max_col_min_row.moham.20231002.055029.374139
Running step 1 of 1...
job output is in C:\Users\moham\AppData\Local\Temp\find_max_col_min_row.moham.20231002.055029.374139\output
Streaming final output from C:\Users\moham\AppData\Local\Temp\find_max_col_min_row.moham.20231002.055029.374139\output...
Removing temp directory C:\Users\moham\AppData\Local\Temp\find_max_col_min_row.moham.20231002.055029.374139...


In [1]:
%%file find_max_col_min_row_part_2.py

import re
import json
import sys

from mrjob.job import MRJob

class MaxColMinRow2(MRJob):
    
    def mapper(self, _, line):
        values = line.strip().split(",")
        # first value is the column
        # second value is the row
        # third value is cell value
        # return the column as key , object of value, one row name
        yield values[0] , {"value" : int(values[2]), "example": values[1]}
        ## return the row as key , object of value, one row name
        yield values[1] , {"value" : int(values[2]), "example": values[0]}
        
    def reducer(self, key, values):
        ## the key is either the column or row 
        ## we have to check if its column or row
        if self.isRow(key):
            ## find the min 
            lowestValue = sys.maxsize
            exampleColumn = ''
            for row in values:
                lowestValue = min(lowestValue, row['value'])
                exampleColumn = row['example']
                    
            yield key, {"value": lowestValue, "example": exampleColumn}
           
        if  self.isColumn(key):
            ## find the max 
            highestValue = 0
            exampleRow = ''
            for column in values:
                highestValue = max(highestValue, column['value'])
                exampleRow = column['example']
                                
            yield key, {"value": highestValue, "example": exampleRow}
        
   ## helper method to determine the colum 
    def isColumn(self, value):
        columnsRegex = re.compile(r"[A-J]")
        return re.match(columnsRegex, value)
    
    ## helper method to determine the row 
    def isRow(self, value):
        rowsRegex = re.compile(r"[K-T]")
        return re.match(rowsRegex, value)
    
if __name__ == '__main__':
    MaxColMinRow2.run()

Overwriting find_max_col_min_row_part_2.py


In [2]:
!python find_max_col_min_row_part_2.py cs4650hw1.dat

"A"	{"value":994,"example":"T"}
"B"	{"value":999,"example":"T"}
"C"	{"value":987,"example":"T"}
"D"	{"value":995,"example":"T"}
"E"	{"value":998,"example":"T"}
"F"	{"value":997,"example":"T"}
"G"	{"value":997,"example":"T"}
"H"	{"value":997,"example":"T"}
"I"	{"value":995,"example":"T"}
"J"	{"value":992,"example":"T"}
"K"	{"value":0,"example":"J"}
"L"	{"value":3,"example":"J"}
"M"	{"value":0,"example":"J"}
"N"	{"value":13,"example":"J"}
"O"	{"value":1,"example":"J"}
"P"	{"value":12,"example":"J"}
"Q"	{"value":0,"example":"J"}
"R"	{"value":2,"example":"J"}
"S"	{"value":5,"example":"J"}
"T"	{"value":2,"example":"J"}


No configs found; falling back on auto-configuration
No configs specified for inline runner
Creating temp directory C:\Users\moham\AppData\Local\Temp\find_max_col_min_row_part_2.moham.20231005.061958.178262
Running step 1 of 1...
job output is in C:\Users\moham\AppData\Local\Temp\find_max_col_min_row_part_2.moham.20231005.061958.178262\output
Streaming final output from C:\Users\moham\AppData\Local\Temp\find_max_col_min_row_part_2.moham.20231005.061958.178262\output...
Removing temp directory C:\Users\moham\AppData\Local\Temp\find_max_col_min_row_part_2.moham.20231005.061958.178262...


In [55]:
%%file find_max_col_min_row_part_3.py

import re
import json
import sys

from mrjob.job import MRJob

class MaxColMinRow3(MRJob):
    
    def mapper(self, _, line):
        values = line.strip().split(",")
        # first value is the column
        # second value is the row
        # third value is cell value
        # return the column as key , object of value, one row name
        yield values[0] , {"value" : int(values[2]), "example": values[1]}
        ## return the row as key , object of value, one row name
        yield values[1] , {"value" : int(values[2]), "example": values[0]}
        
    def reducer(self, key, values):
        ## the key is either the column or row 
        ## we have to check if its column or row
        if self.isRow(key):
            ## find the min 
            lowestValue = sys.maxsize
            exampleColumn = []
            for row in values:
                lowestValue = min(lowestValue, row['value'])
                exampleColumn.append(row['example'])
                    
            yield key, {"value": lowestValue, "examples": list(set(exampleColumn))}
           
        if  self.isColumn(key):
            ## find the max 
            highestValue = 0
            exampleRow = []
            for column in values:
                highestValue = max(highestValue, column['value'])
                exampleRow.append(column['example'])
                                
            yield key, {"value": highestValue, "examples": list(set(exampleRow))}
        
   ## helper method to determine the colum 
    def isColumn(self, value):
        columnsRegex = re.compile(r"[A-J]")
        return re.match(columnsRegex, value)
    
    ## helper method to determine the row 
    def isRow(self, value):
        rowsRegex = re.compile(r"[K-T]")
        return re.match(rowsRegex, value)
    
if __name__ == '__main__':
    MaxColMinRow3.run()

Overwriting find_max_col_min_row_part_3.py


In [56]:
!python find_max_col_min_row_part_3.py cs4650hw1.dat

"A"	{"value":994,"examples":["M","L","P","Q","R","N","T","K","O","S"]}

No configs found; falling back on auto-configuration
No configs specified for inline runner
Creating temp directory C:\Users\moham\AppData\Local\Temp\find_max_col_min_row_part_3.moham.20231002.063742.750390
Running step 1 of 1...
job output is in C:\Users\moham\AppData\Local\Temp\find_max_col_min_row_part_3.moham.20231002.063742.750390\output
Streaming final output from C:\Users\moham\AppData\Local\Temp\find_max_col_min_row_part_3.moham.20231002.063742.750390\output...
Removing temp directory C:\Users\moham\AppData\Local\Temp\find_max_col_min_row_part_3.moham.20231002.063742.750390...



"B"	{"value":999,"examples":["M","L","P","Q","R","N","T","K","O","S"]}
"C"	{"value":987,"examples":["M","L","P","Q","R","N","T","K","O","S"]}
"D"	{"value":995,"examples":["M","L","P","Q","R","N","T","K","O","S"]}
"E"	{"value":998,"examples":["M","L","P","Q","R","N","T","K","O","S"]}
"F"	{"value":997,"examples":["M","L","P","Q","R","N","T","K","O","S"]}
"G"	{"value":997,"examples":["M","L","P","Q","R","N","T","K","O","S"]}
"H"	{"value":997,"examples":["M","L","P","Q","R","N","T","K","O","S"]}
"I"	{"value":995,"examples":["M","L","P","Q","R","N","T","K","O","S"]}
"J"	{"value":992,"examples":["M","L","P","Q","R","N","T","K","O","S"]}
"K"	{"value":0,"examples":["F","E","D","H","I","J","B","A","C","G"]}
"L"	{"value":3,"examples":["F","E","D","H","I","J","B","A","C","G"]}
"M"	{"value":0,"examples":["F","E","D","H","I","J","B","A","C","G"]}
"N"	{"value":13,"examples":["F","E","D","H","I","J","B","A","C","G"]}
"O"	{"value":1,"examples":["F","E","D","H","I","J","B","A","C","G"]}
"P"	{"value":1