From 88ee80581b9ee4a7abab0d0dde1198f0ee2f1efd Mon Sep 17 00:00:00 2001
From: cooper-lzy <78672629+cooper-lzy@users.noreply.github.com>
Date: Wed, 15 Feb 2023 10:28:21 +0800
Subject: [PATCH] exchange update export

---
 .../about-exchange/ex-ug-what-is-exchange.md  |   2 +-
 .../use-exchange/ex-ug-export-from-nebula.md  | 253 ++++++++++++++----
 2 files changed, 205 insertions(+), 50 deletions(-)

diff --git a/docs-2.0/nebula-exchange/about-exchange/ex-ug-what-is-exchange.md b/docs-2.0/nebula-exchange/about-exchange/ex-ug-what-is-exchange.md
index 797acfefde6..52752caaccd 100644
--- a/docs-2.0/nebula-exchange/about-exchange/ex-ug-what-is-exchange.md
+++ b/docs-2.0/nebula-exchange/about-exchange/ex-ug-what-is-exchange.md
@@ -113,7 +113,7 @@ Exchange {{exchange.release}} supports converting data from the following format
 
 In addition to importing data as nGQL statements, Exchange supports generating SST files for data sources and then [importing SST](../use-exchange/ex-ug-import-from-sst.md) files via Console.
 
-In addition, Exchange Enterprise Edition also supports [exporting data to a CSV file](../use-exchange/ex-ug-export-from-nebula.md) using NebulaGraph as data sources.
+In addition, Exchange Enterprise Edition also supports [exporting data to a CSV file or another graph space](../use-exchange/ex-ug-export-from-nebula.md) using NebulaGraph as data sources.
 
 ## Release note
 
diff --git a/docs-2.0/nebula-exchange/use-exchange/ex-ug-export-from-nebula.md b/docs-2.0/nebula-exchange/use-exchange/ex-ug-export-from-nebula.md
index b6bbd818784..3266b123991 100644
--- a/docs-2.0/nebula-exchange/use-exchange/ex-ug-export-from-nebula.md
+++ b/docs-2.0/nebula-exchange/use-exchange/ex-ug-export-from-nebula.md
@@ -1,14 +1,10 @@
 # Export data from NebulaGraph
 
-This topic uses an example to illustrate how to use Exchange to export data from NebulaGraph to a CSV file.
+The Exchange allows you to export data from NebulaGraph to a CSV file or another NebulaGraph space (supporting different NebulaGraph clusters). This topic describes the specific procedure.
 
 !!! enterpriseonly
 
-    Only Exchange Enterprise Edition supports exporting data from NebulaGraph to a CSV file.
-
-!!! note
-
-    SSL encryption is not supported when exporting data from NebulaGraph.
+    Only Exchange Enterprise Edition supports exporting data from NebulaGraph.
 
 ## Preparation
 
@@ -53,81 +49,238 @@ As the data source, NebulaGraph stores the [basketballplayer dataset](https://do
 
 2. Modify the configuration file.
   
-  Exchange Enterprise Edition provides the configuration template `export_application.conf` for exporting NebulaGraph data. For details, see [Exchange parameters](../parameter-reference/ex-ug-parameter.md). The core content of the configuration file used in this example is as follows:
+  Exchange Enterprise Edition provides the configuration template `export_to_csv.conf` and `export_to_nebula.conf` for exporting NebulaGraph data. For details, see [Exchange parameters](../parameter-reference/ex-ug-parameter.md). The core content of the configuration file used in this example is as follows:
   
+  - Export to a CSV file:
+
   ```conf
-  ...
+  # Use the command to submit the exchange job:
+  
+  # spark-submit \
+  # --master "spark://master_ip:7077" \
+  # --driver-memory=2G --executor-memory=30G  \
+  # --total-executor-cores=60 --executor-cores=20 \
+  # --class com.vesoft.nebula.exchange.Exchange \
+  # nebula-exchange-3.0-SNAPSHOT.jar -c export_to_csv.conf
+  
+  {
+    # Spark config
+    spark: {
+      app: {
+        name: NebulaGraph Exchange
+      }
+    }
+  
+    # Nebula Graph config
+    # if you export nebula data to csv, please ignore these nebula config
+    nebula: {
+      address:{
+        graph:["127.0.0.1:9669"]
+
+        # the address of any of the meta services
+        meta:["127.0.0.1:9559"]
+      }
+      user: root
+      pswd: nebula
+      space: test
+  
+      # nebula client connection parameters
+      connection {
+        # socket connect & execute timeout, unit: millisecond
+        timeout: 30000
+      }
+  
+      error: {
+        # max number of failures, if the number of failures is bigger than max, then exit the   application.
+        max: 32
+        # failed data will be recorded in output path, format with ngql
+        output: /tmp/errors
+      }
+  
+      # use google's RateLimiter to limit the requests send to NebulaGraph
+      rate: {
+        # the stable throughput of RateLimiter
+        limit: 1024
+        # Acquires a permit from RateLimiter, unit: MILLISECONDS
+        # if it can't be obtained within the specified timeout, then give up the request.
+        timeout: 1000
+      }
+    }
   
     # Processing tags
-    # There are tag config examples for different dataSources.
     tags: [
-      # export NebulaGraph tag data to csv, only support export to CSV for now.
       {
-        name: player
+        # you can ignore the tag name when export nebula data to csv
+        name: tag-name-1
         type: {
-          source: Nebula
-          sink: CSV
+          source: nebula
+          sink: csv
         }
-        # the path to save the NebulaGrpah data, make sure the path doesn't exist.
-        path:"hdfs://192.168.8.177:9000/vertex/player"
-        # if no need to export any properties when export NebulaGraph tag data
-        # if noField is configured true, just export vertexId
-        noField:false
-        # define properties to export from NebulaGraph tag data
-        # if return.fields is configured as empty list, then export all properties
-        return.fields:[]
-        # nebula space partition number
-        partition:10
-      }
-  
-  ...
   
+        # config the fields you want to export from nebula
+        fields: [nebula-field-0, nebula-field-1, nebula-field-2]
+        noFields:false  # default false, if true, just export id
+        partition: 60
+        # config the path to save your csv file. if your file in not in hdfs, config "file:///path/  test.csv"
+        path: "hdfs://ip:port/path/person"
+        separator: ","
+        header: true
+      }
     ]
   
-    # Processing edges
-    # There are edge config examples for different dataSources.
+    # process edges
     edges: [
-      # export NebulaGraph tag data to csv, only support export to CSV for now.
       {
-        name: follow
+        # you can ignore the edge name when export nebula data to csv
+        name: edge-name-1
         type: {
-          source: Nebula
-          sink: CSV
+          source: nebula
+          sink: csv
         }
-        # the path to save the NebulaGrpah data, make sure the path doesn't exist.
-        path:"hdfs://192.168.8.177:9000/edge/follow"
-        # if no need to export any properties when export NebulaGraph edge data
-        # if noField is configured true, just export src,dst,rank
-        noField:false
-        # define properties to export from NebulaGraph edge data
-        # if return.fields is configured as empty list, then export all properties
-        return.fields:[]
-        # nebula space partition number
-        partition:10
+        # config the fields you want to export from nebula
+        fields: [nebula-field-0, nebula-field-1, nebula-field-2]
+        noFields:false  # default false, if true, just export id
+        partition: 60
+        # config the path to save your csv file. if your file in not in hdfs, config "file:///path/  test.csv"
+        path: "hdfs://ip:port/path/friend"
+        separator: ","
+        header: true
       }
+    ]
+  }
+  ```
+
+  - Export to another graph space:
+
+  ```conf
+  # Use the command to submit the exchange job:
+  
+  # spark-submit \
+  # --master "spark://master_ip:7077" \
+  # --driver-memory=2G --executor-memory=30G  \
+  # --total-executor-cores=60 --executor-cores=20 \
+  # --class com.vesoft.nebula.exchange.Exchange \
+  # nebula-exchange-3.0-SNAPSHOT.jar -c export_to_nebula.conf
   
-  ...
+  {
+    # Spark config
+    spark: {
+      app: {
+        name: NebulaGraph Exchange
+      }
+    }
   
+    # Nebula Graph config, just config the sink nebula information
+    nebula: {
+      address:{
+        graph:["127.0.0.1:9669"]
+
+        # the address of any of the meta services
+        meta:["127.0.0.1:9559"]
+      }
+      user: root
+      pswd: nebula
+      space: test
+  
+      # nebula client connection parameters
+      connection {
+        # socket connect & execute timeout, unit: millisecond
+        timeout: 30000
+      }
+  
+      error: {
+        # max number of failures, if the number of failures is bigger than max, then exit the   application.
+        max: 32
+        # failed data will be recorded in output path, format with ngql
+        output: /tmp/errors
+      }
+  
+      # use google's RateLimiter to limit the requests send to NebulaGraph
+      rate: {
+        # the stable throughput of RateLimiter
+        limit: 1024
+        # Acquires a permit from RateLimiter, unit: MILLISECONDS
+        # if it can't be obtained within the specified timeout, then give up the request.
+        timeout: 1000
+      }
+    }
+  
+    # Processing tags
+    tags: [
+      {
+        name: tag-name-1
+        type: {
+          source: nebula
+          sink: client
+        }
+        # data source nebula config
+        metaAddress:"127.0.0.1:9559"
+        space:"test"
+        label:"person"
+        # mapping the fields of the original NebulaGraph to the fields of the target NebulaGraph.
+        fields: [source_nebula-field-0, source_nebula-field-1, source_nebula-field-2]
+        nebula.fields: [target_nebula-field-0, target_nebula-field-1, target_nebula-field-2]
+        limit:10000
+        vertex: _vertexId  # must be `_vertexId`
+        batch: 2000
+        partition: 60
+      }
     ]
+  
+    # process edges
+    edges: [
+      {
+        name: edge-name-1
+        type: {
+          source: csv
+          sink: client
+        }
+        # data source nebula config
+        metaAddress:"127.0.0.1:9559"
+        space:"test"
+        label:"friend"
+        fields: [source_nebula-field-0, source_nebula-field-1, source_nebula-field-2]
+        nebula.fields: [target_nebula-field-0, target_nebula-field-1, target_nebula-field-2]
+        limit:1000
+        source: _srcId # must be `_srcId`
+        target: _dstId # must be `_dstId`
+        ranking: source_nebula-field-2
+        batch: 2000
+        partition: 60
+      }
+    ]   
   }
   ```
 
 3. Export data from NebulaGraph with the following command.
   
+  !!! note
+
+        The parameters of the Driver and Executor process can be modified based on your own machine configuration.
+
   ```bash
-  <spark_install_path>/bin/spark-submit --master "local" --class com.vesoft.nebula.exchange.Exchange nebula-exchange-x.y.z.jar_path> -c <export_application.conf_path>
+  <spark_install_path>/bin/spark-submit --master "spark://<master_ip>:7077" \
+  --driver-memory=2G --executor-memory=30G \
+  --total-executor-cores=60 --executor-cores=20 \
+  --class com.vesoft.nebula.exchange.Exchange nebula-exchange-x.y.z.jar_path> \
+  -c <conf_file_path>
   ```
   
-  The command used in this example is as follows.
+  The following is an example command to export the data to a CSV file.
   
   ```bash
-  $ ./spark-submit --master "local" --class com.vesoft.nebula.exchange.Exchange \
-    ~/exchange-ent/nebula-exchange-ent-{{exchange.release}}.jar -c ~/exchange-ent/export_application.conf
+  $ ./spark-submit --master "spark://192.168.10.100:7077" \
+  --driver-memory=2G --executor-memory=30G \
+  --total-executor-cores=60 --executor-cores=20 \
+  --class com.vesoft.nebula.exchange.Exchange ~/exchange-ent/nebula-exchange-ent-{{exchange.release}}.jar \
+  -c ~/exchange-ent/export_to_csv.conf
   ```
   
 4. Check the exported data.
 
-  1. Check whether the CSV file is successfully generated under the target path.
+  - Export to a CSV file:
+
+    Check whether the CSV file is successfully generated under the target path, and check the contents of the CSV file to ensure that the data export is successful.
   
     ```bash
     $ hadoop fs -ls /vertex/player
@@ -145,4 +298,6 @@ As the data source, NebulaGraph stores the [basketballplayer dataset](https://do
     -rw-r--r--   3 nebula supergroup        119 2021-11-05 07:36 /vertex/player/    part-00009-17293020-ba2e-4243-b834-34495c0536b3-c000.csv
     ```
   
-  2. Check the contents of the CSV file to ensure that the data export is successful.
+  - Export to another graph space:
+
+    Log in to the new graph space and check the statistics through `SUBMIT JOB STATS` and `SHOW STATS` commands to ensure the data export is successful.