Skip to content

Commit

Permalink
feat(pipeline): use gsutil cp instead of rsync
Browse files Browse the repository at this point in the history
  • Loading branch information
Andreas Helms authored and Andreas Helms committed Mar 14, 2024
1 parent bf97d69 commit ee4e576
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions pipeline/dags/task_factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,10 @@ def fn(filename: str):
def gcloud_upload_dir(layer_id: str, layer_variable: str, directory: str):
return BashOperator(
task_id='gcloud_upload',
bash_command='gcloud auth activate-service-account --key-file $KEY_FILE && gsutil -m rsync -d -r $UPLOAD_DIR $BUCKET',
bash_command='gcloud auth activate-service-account --key-file $KEY_FILE && gsutil -q -m cp -r $UPLOAD_DIR/* $BUCKET',
env={
"UPLOAD_DIR": directory,
"BUCKET": 'gs://{{ dag_run.conf["output_bucket"] }}/{{ dag_run.conf["layer_version"] }}/' + f'{layer_id}.{layer_variable}',
"BUCKET": 'gs://{{ dag_run.conf["output_bucket"] }}/{{ dag_run.conf["layer_version"] }}/' + f'{layer_id}.{layer_variable}/',
"KEY_FILE": '/opt/airflow/plugins/service-account.json',
"CLOUDSDK_PYTHON": '/usr/local/bin/python'
}
Expand Down

0 comments on commit ee4e576

Please sign in to comment.