In [None]:
from airflow import DAG
from airflow.operators.python import PythonOperator
from airflow.operators.dummy import DummyOperator
from pendulum.datetime import DateTime
import datetime as dt


default_args = {
    'owner':'EMPLOYEE_ID', # owner是DAG的開發者, 例如: 員工8703147
}

# task to download wikipedia pageviews data
def get_data(execution_date: DateTime):
    year, month, day, hour, *_ = execution_date.timetuple()
    url = (
        "https://dumps.wikimedia.org/other/pageviews/"
        f"{year}/{year}-{month:0>2}/"
        f"pageviews-{year}{month:0>2}{day:0>2}-{hour:0>2}0000.gz"
    )
    print(url) # 打印URL來確認是否符合預期

dag = DAG(
    dag_id="deXX_pageviews_url", # prefix必需是tenant id, 例如: de00
    description="dag to verify wikipedia pageviews url",
    start_date=dt.datetime(2019,7,1),
    schedule_interval="@hourly",
    end_date=dt.datetime(2019,7,2),
    catchup=True,
    default_args=default_args,
    access_control={
        'deXX': {'can_read', 'can_edit'} # 設定DAG歸屬那個團隊[tenant id]與權限
    },
    tags=['de08'],
)

# task to download wikipedia pageviews data
task_get_data = PythonOperator(
    task_id='get_data',
    python_callable=get_data,
    dag=dag,
)

task_extract_data = DummyOperator(
    task_id='extract_data',
    dag=dag,
)

task_load_data = DummyOperator(
    task_id='load_data',
    dag=dag,
)

# Set dependencies between all tasks
task_get_data >> task_extract_data >> task_load_data