In [1]:
import numpy as np
import time
import datetime
import json
import pandas as pd

import mysql.connector
from mysql.connector import Error

from bokeh.plotting import *
from bokeh.layouts import gridplot
from bokeh.models import ColumnDataSource, LogColorMapper, ColorMapper, LogTicker, ColorBar, BasicTicker, ColorBar, LinearColorMapper, PrintfTickFormatter
# from .chart_constants import (PLOT_FORMATS, ORANGE, BLUE, DARK_GRAY, AXIS_FORMATS, ORANGE_SHADOW, 
#                              FONT_PROPS_SM, FONT_PROPS_MD, FONT_PROPS_LG, GREEN)
from bokeh.io import show, output_notebook
import bokeh.palettes as bp

import urllib
import matplotlib.pyplot as plt
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter, AutoMinorLocator)

In [2]:
json_url = urllib.request.urlopen('https://api.helioviewer.org/?action=getDataSources')
hv_keys = json.loads(json_url.read())

In [3]:
start_time=time.time()
try:
    connection = mysql.connector.connect(host='localhost',
                                         database='hv',
                                         user='hv_varun',
                                         password='Helioviewer@2020')

    sql_select_Query = "SELECT filepath, date, sourceid FROM data WHERE sourceId=%s;"%hv_keys['SDO']['AIA']['1600']['sourceId']
#     sql_select_Query = "SELECT count(*) FROM data WHERE filepath LIKE '/AIA/1600/%';"
#     sql_select_Query = "SELECT * FROM data LIMIT 20;"
    cursor = connection.cursor()
    cursor.execute(sql_select_Query)
    records = cursor.fetchall()
    print("Total number of rows in data is: ", cursor.rowcount)

    print("\nPrinting each laptop record")
#     for row in records:
#         print("Index = ", row[0])
#         print("Location = ", row[1])
#         print("Filename  = ", row[2])
#         print("OBS_DATE  = ", row[3], "\n")

except Error as e:
    print("Error reading data from MySQL table", e)
finally:
    if (connection.is_connected()):
        connection.close()
        cursor.close()
        print("MySQL connection is closed", time.time()-start_time )

Total number of rows in data is:  7056352

Printing each laptop record
MySQL connection is closed 107.39623022079468


In [4]:
hv = pd.DataFrame(records, columns=cursor.column_names)
hv = hv.sort_values('date').reset_index(drop=True)
hv

Unnamed: 0,filepath,date,sourceid
0,/AIA/1600/2010/06/02,2010-06-02 00:05:30,15
1,/AIA/1600/2010/06/02,2010-06-02 00:05:54,15
2,/AIA/1600/2010/06/02,2010-06-02 00:06:18,15
3,/AIA/1600/2010/06/23,2010-06-23 00:00:17,15
4,/AIA/1600/2010/06/23,2010-06-23 00:00:41,15
...,...,...,...
7056347,/AIA/2020/08/03/1600,2020-08-03 13:59:26,15
7056348,/AIA/2020/08/03/1600,2020-08-03 14:00:14,15
7056349,/AIA/2020/08/03/1600,2020-08-03 14:01:02,15
7056350,/AIA/2020/08/03/1600,2020-08-03 14:01:50,15


In [5]:
ym_min = hv['date'].min().year + hv['date'].min().month/12
ym_max = hv['date'].max().year + hv['date'].max().month/12

In [6]:
day, ym = np.meshgrid(range(1,33), np.arange(ym_min, ym_max+1.1/12, 1/12))
coverage = np.zeros((ym.shape))

In [None]:
for i in range(coverage.shape[0]):
    for j in range(coverage.shape[1]):
        try:
            edge = datetime.datetime(int(round(ym[i,j]//1)), int(round(ym[i,j]%1*12)), day[i,j])
            coverage[i,j] = len(hv[(hv['date'] >= edge) & (hv['date'] < (edge + datetime.timedelta(days=1)))])
        except ValueError as V:
            coverage[i,j] = np.nan
#             print('ValueError', V, '::' ,ym[i,j],day[i,j], int(round(ym[i,j]//1)), int(round(ym[i,j]%1*12)), day[i,j])

In [None]:
print("{:%b %d, %Y}".format(datetime.datetime(2003,1,1)))

In [None]:
year = pd.date_range(hv['date'].min(), hv['date'].max(), freq='M').to_period('M').to_timestamp()
year = year.year.astype(str) + ' ' + year.month_name()# (year.year+year.month/12)#.astype(str)
date = np.arange(1,32).astype(str)

hv_cov = pd.DataFrame(data=coverage[:122,:31], index=year, columns=date)
hv_cov.index.name='Year'
hv_cov.columns.name = 'Date'
hv_cov

In [None]:
df = pd.DataFrame(hv_cov.stack(), columns=['coverage']).reset_index()
# df['Year2'] = df['Year'].dt.year.astype(str) + ' ' + df['Year'].dt.month_name().astype('str')# (year.year+year.month/12)#.astype(str)

df['Year']=df['Year'].astype(str)

years = hv_cov.index.values#.astype(str)
dates = hv_cov.columns.values.astype(str)

colors = bp.Viridis[256]# ["#75968f", "#a5bab7", "#c9d9d3", "#e2e2e2", "#dfccce", "#ddb7b1", "#cc7878", "#933b41", "#550b1d"]
mapper = LinearColorMapper(palette=colors, low=np.nanmin(df.coverage), high=np.nanmax(df.coverage))

TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom"

# output_file('AIA1600_coverage.html')
p = figure(title="AIA 1600 Coverage ({0} - {1})".format(years[0], years[-1]),
#            x_axis_type='datetime',
           x_range=years, y_range=list(reversed(dates)),
           x_axis_location="above", plot_width=1400, plot_height=800,
           x_axis_label="Year Month", y_axis_label="Date",
           tooltips=[('Date', '@Year @Date'), ('#Data Files', '@coverage')],
           tools=TOOLS, toolbar_location='below')

# p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "7px"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = np.pi / 3
p.xaxis.axis_label_text_font_size = "12pt"
p.yaxis.axis_label_text_font_size = "12pt"
p.xaxis.visible = True
p.xgrid.visible = True

p.xaxis.major_label_text_font_size = "7pt"
p.yaxis.major_label_text_font_size = "8pt"


p.rect(x="Year", y="Date", width=1, height=1,
       source=df,
       fill_color={'field': 'coverage', 'transform': mapper},
       line_color=None)

color_bar = ColorBar(color_mapper=mapper, major_label_text_font_size="10px",
                     ticker=BasicTicker(desired_num_ticks=10),
                     formatter=PrintfTickFormatter(format="%d"),
                     label_standoff=6, border_line_color=None, location=(0, 0))
p.add_layout(color_bar, 'right')


In [None]:
output_notebook()
show(p)