In [1]:
# Function that returns one year's fits.fz file list.  
from astropy.io import fits
from astropy.utils.data import get_pkg_data_filename
from datetime import datetime, timedelta
from PIL import Image
import matplotlib.pyplot as plt 
import pandas as pd 
from bs4 import BeautifulSoup
import numpy as np 
import calendar
import logging
import requests
import os
import cv2 


In [2]:
#Download one image from URL, you can plot it before save it
def save_image_as_jpg (image_url, file_dir):
    
    try:
        original_image_data = fits.open(image_url)[1].data
        im_resized = cv2.resize(original_image_data, (512, 512), interpolation=cv2.INTER_LINEAR)  
        fits_file_name = image_url.rsplit('/', 1)[-1]
        jpg_file_name = file_dir +fits_file_name[0:12] + '.jpg'
        plt.imsave(jpg_file_name, im_resized, cmap='gray')
    except:
        print('Error During Saving Image File: ', image_url)
        #save to a log file??? 
        pass



In [3]:
#Test save one file to my directory
data_dir = "/jzhang/NoteBookProjects/Projects/Convert_Gong_Fits_To_JPG/Data/"
save_image_as_jpg ("https://gong2.nso.edu/HA/haf/201006/20100601/20100601000054Bh.fits.fz", data_dir)

In [4]:
#Download only one image per date time. 
def download_one_day_images (year, month, day, image_dir, frenqucy_hours=6):
    
    date_time = datetime(year, month, day, 0, 0, 0)
    year_string = str(year)
    month_string = str(month).zfill(2)
    day_string = str(day).zfill(2) 
    
    #day combined string
    day_combined_string = year_string + month_string + day_string
     
    day_url = 'https://gong2.nso.edu/HA/haf/' + year_string + month_string + '/' + day_combined_string +'/'
    
    #check all URLs under base_url directory
    response = requests.get(day_url)
    soup = BeautifulSoup(response.content, "html.parser")

    # find all the links on the webpage
    links = soup.find_all("a") 
    
    #Download one day's data at one time
    while (date_time.day == day):
            
        combined_string = day_combined_string + str(date_time.hour).zfill(2) + '00'
        #print(combined_string)
        
        for link in links:
        
            #link text on the page
            hrefText = link.text
            if (hrefText.endswith('.fits.fz') and combined_string in hrefText):
                image_url = os.path.join(day_url, hrefText) 
            
                #There are too many files per minute, save the first one on the page
                #Should we care about the location? 
                #download the image from the image_url
                save_image_as_jpg (image_url, image_dir)  
                print(image_url) 
                break
         
        date_time = date_time + timedelta(hours=frenqucy_hours)
            
            

In [5]:
data_dir = "/jzhang/NoteBookProjects/Projects/Convert_Gong_Fits_To_JPG/Data/"
download_one_day_images (2010, 7, 1, data_dir, 6)

https://gong2.nso.edu/HA/haf/201007/20100701/20100701000034Lh.fits.fz
https://gong2.nso.edu/HA/haf/201007/20100701/20100701060034Lh.fits.fz
https://gong2.nso.edu/HA/haf/201007/20100701/20100701120014Th.fits.fz
https://gong2.nso.edu/HA/haf/201007/20100701/20100701180014Th.fits.fz


In [None]:
data_dir = "/jzhang/NoteBookProjects/Projects/Convert_Gong_Fits_To_JPG/Data/"
start_year = 2010
total_years = 1 
for year in (start_year + n for n in range(total_years)):
    for month in range(1, 13): 
        days = (calendar.monthcalendar(year, month))
        for week in days:
            for day in week:
                if day > 0: 
                    download_one_day_images (year, month, day, data_dir, frenqucy_hours=6)

https://gong2.nso.edu/HA/haf/201006/20100601/20100601000054Bh.fits.fz
