From 0f04c6c1c0f58878cb633c3454366324949ad2a2 Mon Sep 17 00:00:00 2001 From: Vikas Gupta Date: Wed, 24 Apr 2024 11:19:58 +0530 Subject: [PATCH] aws-s3 coming blank on site and amazons3 not coming , so combining the docs --- docs/connectors/amazons3.md | 25 ------------------------- docs/connectors/aws-s3.md | 23 +++++++++++++++++++++++ 2 files changed, 23 insertions(+), 25 deletions(-) delete mode 100644 docs/connectors/amazons3.md diff --git a/docs/connectors/amazons3.md b/docs/connectors/amazons3.md deleted file mode 100644 index 5ee47236f..000000000 --- a/docs/connectors/amazons3.md +++ /dev/null @@ -1,25 +0,0 @@ -# S3 - -1. Set a bucket e.g. zingg28032023 and a folder inside it e.g. zingg - -2. Create aws access key and export via env vars (ensure that the user with below keys has read/write access to above): - -export AWS_ACCESS_KEY_ID= -export AWS_SECRET_ACCESS_KEY= - -(if mfa is enabled AWS_SESSION_TOKEN env var would also be needed ) - -3. Download hadoop-aws-3.1.0.jar and aws-java-sdk-bundle-1.11.271.jar via maven - -4. Set above in zingg.conf : -spark.jars=//hadoop-aws-3.1.0.jar,//aws-java-sdk-bundle-1.11.271.jar - -5. Run using: - - ./scripts/zingg.sh --phase findTrainingData --properties-file config/zingg.conf --conf examples/febrl/config.json --zinggDir s3a://zingg28032023/zingg - ./scripts/zingg.sh --phase label --properties-file config/zingg.conf --conf examples/febrl/config.json --zinggDir s3a://zingg28032023/zingg - ./scripts/zingg.sh --phase train --properties-file config/zingg.conf --conf examples/febrl/config.json --zinggDir s3a://zingg28032023/zingg - ./scripts/zingg.sh --phase match --properties-file config/zingg.conf --conf examples/febrl/config.json --zinggDir s3a://zingg28032023/zingg - -6. Models etc. would get saved in -Amazon S3 > Buckets > zingg28032023 >zingg > 100 diff --git a/docs/connectors/aws-s3.md b/docs/connectors/aws-s3.md index f4be12b78..b263139bc 100644 --- a/docs/connectors/aws-s3.md +++ b/docs/connectors/aws-s3.md @@ -1,2 +1,25 @@ # AWS S3 +1. Set a bucket e.g. zingg28032023 and a folder inside it e.g. zingg + +2. Create aws access key and export via env vars (ensure that the user with below keys has read/write access to above): + +export AWS_ACCESS_KEY_ID= +export AWS_SECRET_ACCESS_KEY= + +(if mfa is enabled AWS_SESSION_TOKEN env var would also be needed ) + +3. Download hadoop-aws-3.1.0.jar and aws-java-sdk-bundle-1.11.271.jar via maven + +4. Set above in zingg.conf : +spark.jars=//hadoop-aws-3.1.0.jar,//aws-java-sdk-bundle-1.11.271.jar + +5. Run using: + + ./scripts/zingg.sh --phase findTrainingData --properties-file config/zingg.conf --conf examples/febrl/config.json --zinggDir s3a://zingg28032023/zingg + ./scripts/zingg.sh --phase label --properties-file config/zingg.conf --conf examples/febrl/config.json --zinggDir s3a://zingg28032023/zingg + ./scripts/zingg.sh --phase train --properties-file config/zingg.conf --conf examples/febrl/config.json --zinggDir s3a://zingg28032023/zingg + ./scripts/zingg.sh --phase match --properties-file config/zingg.conf --conf examples/febrl/config.json --zinggDir s3a://zingg28032023/zingg + +6. Models etc. would get saved in +Amazon S3 > Buckets > zingg28032023 >zingg > 100