diff --git a/cob_datapipeline/dspace_harvest_dag.py b/cob_datapipeline/dspace_harvest_dag.py index 87c71b13..86243e01 100644 --- a/cob_datapipeline/dspace_harvest_dag.py +++ b/cob_datapipeline/dspace_harvest_dag.py @@ -125,7 +125,7 @@ task_id="s3_to_sftp", provide_context=True, sftp_conn_id="DSPACESFTP", - xcom_id="{{ ti.xcom_pull(task_ids='list_s3_files') }}", + xcom_id="list_s3_files", sftp_base_path="production/", s3_conn_id="AIRFLOW_S3", s3_bucket=AIRFLOW_DATA_BUCKET, diff --git a/cob_datapipeline/scripts/transform.sh b/cob_datapipeline/scripts/transform.sh index 5fbcc3af..74b9775f 100755 --- a/cob_datapipeline/scripts/transform.sh +++ b/cob_datapipeline/scripts/transform.sh @@ -34,10 +34,8 @@ do java -jar $SAXON_CP -xsl:$XSL -s:$SOURCE_URL -o:$SOURCE_XML-1.xml -t - sed -e "s|||g" $SOURCE_XML-1.xml > $SOURCE_XML-2.xml - echo "" >> $SOURCE_XML-2.xml + java -jar $SAXON_CP -xsl:$BATCH_TRANSFORM -s:$SOURCE_XML-1.xml -o:$SOURCE_XML-transformed.xml -t - java -jar $SAXON_CP -xsl:$BATCH_TRANSFORM -s:$SOURCE_XML-2.xml -o:$SOURCE_XML-transformed.xml -t COUNT=$(cat $SOURCE_XML-transformed.xml | grep -o "" | wc -l) TOTAL_TRANSFORMED=$(expr $TOTAL_TRANSFORMED + $COUNT) aws s3 cp $SOURCE_XML-transformed.xml s3://$BUCKET/$TRANSFORM_XML