In [1]:
import pandas as pd
import requests
import os

min_images_in_movie = 100
base_directory = "movies/"
url_base = "https://windows-on-earth.sdsc.osn.xsede.org/"

movies_real_time = False
frame_rate = 30

In [2]:
# Read the CSV file
winearth_images = pd.read_csv("winearth-20240514.csv")
# Convert the date_time_captured to datetime
winearth_images["date_time_captured"]= pd.to_datetime(winearth_images["date_time_captured"])

In [3]:
winearth_images.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4273091 entries, 0 to 4273090
Data columns (total 6 columns):
 #   Column              Dtype         
---  ------              -----         
 0   nasa_filename       object        
 1   date_time_captured  datetime64[ns]
 2   iss_latitude        float64       
 3   iss_longitude       float64       
 4   iss_altitude        float64       
 5   iss_velocity        float64       
dtypes: datetime64[ns](1), float64(4), object(1)
memory usage: 195.6+ MB


In [4]:
winearth_images.head()

Unnamed: 0,nasa_filename,date_time_captured,iss_latitude,iss_longitude,iss_altitude,iss_velocity
0,ISS011-E-9887.JPG,2005-05-19 12:20:35,46.24433,40.622379,356996.7912,27725.7541
1,ISS011-E-9888.JPG,2005-05-19 12:20:37,46.30285,40.783422,357001.4705,27725.78342
2,ISS011-E-9889.JPG,2005-05-19 12:20:40,46.39016,41.025656,357008.4026,27725.82735
3,ISS011-E-9890.JPG,2005-05-19 12:20:42,46.44804,41.187596,357012.9656,27725.85662
4,ISS011-E-9891.JPG,2005-05-19 12:20:45,46.53439,41.431184,357019.722,27725.90047


In [5]:
# set the first last_date_time to the first date_time_captured
last_date_time = winearth_images["date_time_captured"].min()

# create a list to store the time difference between each image
time_diff = []

# loop through the rows of the dataframe
for index, row in winearth_images.iterrows():

    # get the date_time_captured of the current row
    row_date_time = row["date_time_captured"]

    # calculate the difference in seconds between the current row and the last row
    diff = int((row_date_time - last_date_time).total_seconds())

    # if the difference is 0, set it to 1
    if diff == 0:
        diff = 1

    # append the time difference to the list
    time_diff.append(diff)

    # set the last_date_time to the current row
    last_date_time = row_date_time

In [6]:
# create a list to store the image indexes of the movies
movies = []
# create a list to store the history of the image time differences
history = []
# create a list to store the indexes of the images in the history
index_history = []

# loop through the time differences
for i in range(len(time_diff)):
    # if the history is empty, append the time difference to the history
    if len(history) == 0:
        history.append(time_diff[i])
        index_history.append(i)
    # if the time difference is the same as the last time difference in the history, 
    # append the time difference to the history
    elif time_diff[i] == history[-1]:
        history.append(time_diff[i])
        index_history.append(i)
    # if the time difference is different from the last time difference in the history...
    else:
        # if the history is longer than the minimum number of images in a movie, 
        # append the indexes of the images in the history to the movies list
        if len(history) > min_images_in_movie:
            movies.append(index_history)
        # clear the history and index_history lists
        history = []
        index_history = []


In [7]:
count = 0

for i in range(len(movies)):
    # get the rows of the movie from the winearth_images dataframe
    movie = winearth_images.iloc[movies[i]]

    # get the first and last images of the movie
    first_image = movie["nasa_filename"].iloc[0]
    last_image = movie["nasa_filename"].iloc[-1]

    # get the mission of the movie from the filename
    iis_mission = first_image[:6]

    # assign a directory to save the movie
    movie_directory = (
        base_directory
        + iis_mission
        + "-E-"
        + first_image.split("-")[2].split(".")[0]
        + "-"
        + last_image.split("-")[2].split(".")[0]
        + "/"
    )

    # if the directory to save the movie does not exist, create it
    if not os.path.exists(movie_directory):
        os.makedirs(movie_directory)

    # if the movie csv file does not exist, create it
    if not os.path.exists(movie_directory + "movie.csv"):
        movie.to_csv(movie_directory + "movie.csv", index=False)

    # download the images of the movie
    for index, row in movie.iterrows():
        # get the filename of the image
        nasa_filename = row["nasa_filename"]
        # construct the url of the image
        url = url_base + iis_mission + "/" + nasa_filename
        # download the image
        r = requests.get(url)
        # save the image
        with open(movie_directory + nasa_filename, "wb") as f:
            f.write(r.content)

    count += 1

    if count > 20:
        break

In [8]:
# get the directories in the base directory
movie_directories = [name for name in os.listdir(base_directory) if os.path.isdir(os.path.join(base_directory, name))]

# loop through the movie directories
for movie_directory in movie_directories:
    # if the movies are in real time, calculate the frame rate, otherwise use the default frame rate
    if movies_real_time:
        # read the movie csv file
        movie = pd.read_csv(base_directory + movie_directory + "/movie.csv")
        # convert the date_time_captured to datetime
        movie["date_time_captured"]= pd.to_datetime(movie["date_time_captured"])

        # get the start and end time of the movie
        start_time = movie["date_time_captured"].iloc[0]
        end_time = movie["date_time_captured"].iloc[-1]

        # calculate the duration of the movie in seconds
        movie_seconds = int((end_time - start_time).total_seconds())
        # get the number of images in the movie
        number_of_images = len(movie)

        # calculate the frame rate and round it to 3 decimal places
        frame_rate = round(number_of_images / movie_seconds, 3)

    # construct the ffmpeg command
    ffmpeg_command = "ffmpeg -y -framerate %s -pattern_type glob -i \"%s/*.JPG\" -s 2k -c:v libx265 -crf 18 -tag:v hvc1 %s.mp4" % (frame_rate, movie_directory, movie_directory)

    # append ffmpeg_command to base_directory/create_movies.sh
    with open(base_directory + "create_movies.sh", "a") as f:
        f.write(ffmpeg_command + "\n")


