In [None]:
from airflow import DAG
from airflow.operators.python import PythonOperator
from airflow.operators.dummy import DummyOperator
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
from airflow.utils.dates import days_ago
from airflow.exceptions import AirflowException
import requests
import tempfile
import json


# function to retrieve rocket launches data & store in s3
def download_launches(api_url, s3_conn_id, bucket_name, object_key):
    resp = requests.get(api_url)
    if resp.status_code!=200:
        raise AirflowException(f"Rocket API call fail, the resp staus[{resp.status_code}]")
    # create s3 connection
    # 透過Connection來取得S3Hook物件的實例
    s3_hook = S3Hook(aws_conn_id=s3_conn_id)
    # write rocket launch data to s3
    with tempfile.NamedTemporaryFile('wb+') as fp:
        temp_filename = fp.name  # 暫存檔案名
        try:
            fp.write(resp.content)
            fp.flush()
            s3_hook.load_file(filename=temp_filename,
                                     bucket_name=bucket_name,
                                     key=object_key,
                                     replace=True)
            print(f'Upload Rocket launches data to s3: [{bucket_name}] -> {object_key}, success!')
        except Exception as e:
            print(f"Upload Rocket launches data to s3 fail:{e}")


# function to retrieve rocket launches data & store in s3
def get_pictures(s3_conn_id, bucket_name, launch_data_object_key, image_object_key_prefix):
    s3_hook = S3Hook(aws_conn_id=s3_conn_id)
    # download rocket launches data form s3
    try:
        # 暫存檔案名
        temp_filename = s3_hook.download_file(
            bucket_name=bucket_name,
            key=launch_data_object_key)
        print(f'Download rocket launch data to local: {launch_data_object_key}, success!')
    except Exception as e:
        print(f"Download rocket launch data from s3 fail:{e}")
    # load content in json format
    with open(temp_filename) as json_file:
        launches = json.load(json_file)
        for launch in launches["results"]:
            image_url = launch["image"]
            print(f'rocket image to download: {image_url}')
            # 取得images的檔名
            image_filename = image_url.split("/")[-1]
            # 下載圖檔
            resp = requests.get(image_url)
            target_obj_key = f'{image_object_key_prefix}/{image_filename}'
            # 把圖檔寫到本地目錄
            with tempfile.NamedTemporaryFile('wb+') as img_file:
                target_temp_filename = img_file.name  # 暫存檔案名
                img_file.write(resp.content)
                img_file.flush()
                try:
                    s3_hook.load_file(filename=target_temp_filename,
                              bucket_name=bucket_name,
                              key=target_obj_key,
                              replace=True)
                except Exception as e:
                    raise AirflowException(e)


default_args = {
    'owner':'EMPLOYEE_ID', # owner是DAG的開發者, 例如: 員工8703147
}

dag = DAG(
    dag_id="deXX_collect_rocket_launch", # prefix必需是tenant id, 例如: de00
    description="dag to collect rocket launch images",
    start_date=days_ago(2),
    schedule_interval=None,
    catchup=False,
    default_args=default_args,
    access_control={
        'deXX': {'can_read', 'can_edit'} # 設定DAG歸屬那個團隊[tenant id]與權限
    }
)

# task to retrieve rocket data and save to s3
task_download_launches = PythonOperator(
    task_id='download_launches',
    python_callable=download_launches,
    op_kwargs={
        'api_url': 'http://10.34.124.114:7706/2.0.0/launch/upcoming',
        's3_conn_id': 'deXX_minio',
        'bucket_name': 'EMPLOYEE_ID', # 有英文字元請小寫
        'object_key': 'de07/data/rocket_launches.json'
    },
    dag=dag,
)

# task to get rocket picture
task_get_pictures = PythonOperator(
    task_id='get_pictures',
    python_callable=get_pictures,
    op_kwargs={
        's3_conn_id': 'deXX_minio',
        'bucket_name': 'EMPLOYEE_ID',
        'launch_data_object_key': 'de07/data/rocket_launches.json',
        'image_object_key_prefix':'de07/images',
    },
    dag=dag,
)

# task to notify someone when task is done
task_notify = DummyOperator(task_id='notify', dag=dag)

# Set dependencies between all tasks
task_download_launches >> task_get_pictures >> task_notify

