In [None]:
import pytesseract
from PIL import Image, ImageOps
import requests
from bs4 import BeautifulSoup
import os
import time
import csv
import re
pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
PATH = './Bluestacks_screenshots/'
DATA_HEADER = ['Timestamp', 'Day of year', 'Year', 'Minutes of day (to nearest 10)', \
    'guests', 'laax', 'flims', 'falera', 'mountain']
SLOPES_URL = "https://live.flimslaax.com/slopes"

def mins_to_nearest_10():
    lt = time.localtime()
    return 10 * round((lt.tm_hour*60 + lt.tm_min + lt.tm_sec/60) / 10)

def sleep_mode():
    # Sleep during the time where lifts are definitely closed
    return True if mins_to_nearest_10() < 7*60 + 15 or mins_to_nearest_10() > 17*60 else False

def should_record(prev_day):
    return True if time.localtime().tm_yday > prev_day and mins_to_nearest_10() > 17*60 and mins_to_nearest_10() < 19*60 else False

def build_data(counts):
    lt = time.localtime()
    data = [int(time.time()), lt.tm_yday, lt.tm_year, mins_to_nearest_10(), \
            counts[0], counts[1], counts[2], counts[3], counts[4]]
    return data

def OCR_to_int(s):
    # We only expect whitespace and commas to be read, if we read more then the image is 
    # bad and we _should_ throw an error (elsewhere in the code)
    return int(s.replace(",", "").strip())

def record_slopes_info():
    response_text = requests.get(SLOPES_URL).text
    soup = BeautifulSoup(response_text, 'html.parser')

    temp = re.search(r"(\d+)(km Pisten offen)", response_text)
    kms_open = int(temp.group(1)) if temp is not None else -1

    slopes = soup.findAll('div', attrs={"class": "widget slope"})
    slope_info = {}
    for slope in slopes:
        name = slope.text.replace("\n", "").strip()
        num_open = 1 if len(slope.findAll('div', attrs={"class": "indicator open"})) > 0 else 0
        slope_info[name] = num_open


    # Get headers to make sure that data is aligned, in case they change the website
    header = []
    with open('slopes_info.csv', 'r') as f:
        d_reader = csv.DictReader(f)
        header = d_reader.fieldnames
        f.close()

    # Now write
    data = [-1 for _ in header]
    with open('slopes_info.csv', 'a') as f:
        data[0] = time.localtime().tm_yday
        data[1] = time.localtime().tm_year
        data[2] = kms_open

        for i in range(3, len(data)):
            data[i] = slope_info.get(header[i], -1)
        
        writer = csv.writer(f)
        writer.writerow(data)
        print("Wrote slopes data")
        f.close()


prev_rounded_minutes = -1
prev_recording_day = time.localtime().tm_yday - 1
while True:
    if should_record(prev_recording_day):
        record_slopes_info()
        # record_lifts_info()
        prev_recording_day = time.localtime().tm_yday
    if sleep_mode():
        files = os.listdir(PATH)
        for file in files: 
            try: 
                os.remove(PATH + file)
            except:
                continue
        time.sleep(20*60)
        continue
    time.sleep(40)
    
    files = os.listdir(PATH)
    for file in files:
        try: 
            img = ImageOps.grayscale(Image.open(PATH + files[-1]))
            img = img.point(lambda p: 255 if p > 40 else 0) # Threshold: https://stackoverflow.com/a/68957790 
        except: 
            print("Failed to open image, probably permissions issues")
            continue

        # Get the numbers of people
        # Total guests (which is actually just sum of the other numbers)
        # psm 7 tells Tesseract that we're extracting just a line of text,
        # needed since otherwise get errors when the number to read is 0.
        # TODO: validate what's read? numbers should not decrease (except when they reset at end of day)
        try:
            # Guests, Laax, Flims, Falera, on the mountain
            crop_list = [(5, 440, 299, 534), (550, 365, 715, 410), (550, 460, 715, 510), (550, 555, 715, 610), (550, 655, 715, 710)]
            counts = []
            for crop in crop_list:
                temp = ImageOps.grayscale(img.crop(crop))
                extractedInformation = pytesseract.image_to_string(temp, config='--psm 7')
                count = OCR_to_int(extractedInformation)
                counts.append(count)
                
            # Write to csv if the time is new
            with open('data.csv', 'a') as f:
                writer = csv.writer(f)
                data = build_data(counts)
                rounded_minutes = data[3]
                if rounded_minutes == 0 or rounded_minutes > prev_rounded_minutes:
                    writer.writerow(data)
                    prev_rounded_minutes = rounded_minutes
                    print(data)
                    break # We found a good screenshot to save, so stop searching
                else: 
                    print("Good, but too recent")

        except:
            print("Issue with processing images, probably wrong screenshot: " + str(file))

    # Delete all images after either finding a good screenshot, or processing them all 
    for file in files: 
        try: 
            os.remove(PATH + file)
        except:
            continue