Copying Files from SFTP to GCS using SFTP Action Plugins
Below is a simple sample pipeline for copying data from an SFTP server to a GCS bucket using the SFTP Copy Action plugin:
{
"name": "sftp-poc_151",
"description": "Data Pipeline Application",
"artifact": {
"name": "cdap-data-pipeline",
"version": "6.4.0",
"scope": "SYSTEM"
},
"config": {
"resources": {
"memoryMB": 2048,
"virtualCores": 1
},
"driverResources": {
"memoryMB": 2048,
"virtualCores": 1
},
"connections": [],
"comments": [],
"postActions": [],
"properties": {},
"processTimingEnabled": true,
"stageLoggingEnabled": false,
"stages": [
{
"name": "SFTPCopy2",
"plugin": {
"name": "SFTPCopy",
"type": "action",
"label": "SFTPCopy2",
"artifact": {
"name": "sftp-actions",
"version": "1.5.1",
"scope": "USER"
},
"properties": {
"host": "<sftp_server_host>",
"port": "<sftp_server_ip>",
"userName": "<username>",
"Authentication": "password-select",
"srcDirectory": ".",
"destDirectory": "gs://<my_bucket>/<path_to_bucket_directory>",
"variableNameHoldingFileList": "sftp.copied.file.names",
"extractZipFiles": "false",
"password": "<password>",
"fileSystemProperties": "fs.gs.impl=>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem,fs.AbstractFileSystem.gs.impl=>com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS,fs.gs.project.id=><gcp_project_id>,fs.gs.system.bucket=><gcs_bucket>,fs.gs.path.encoding=>uri-path,fs.gs.working.dir=>/,fs.gs.impl.disable.cache=>true"
}
},
"outputSchema": [
{
"name": "etlSchemaBody",
"schema": ""
}
],
"id": "SFTPCopy2",
"type": "action",
"label": "SFTPCopy2",
"icon": "icon-FTPcopy"
}
],
"schedule": "0 * * * *",
"engine": "spark",
"numOfRecordsPreview": 100,
"description": "Data Pipeline Application",
"maxConcurrentRuns": 1
}
}
Â
Created in 2020 by Google Inc.