...
Row Key | Column Key | Value | Note |
---|---|---|---|
MyNamespace:HRFile | Properties | inputDir=/data/2017/hr regex=*.csv failOnError=false | One Row per namespace per dataset |
MyNamespace: PersonFile | Properties | inputDir=/data/2017/person regex=*.csv failOnError=false | One Row per namespace per dataset |
MyNamespace:EmployeeData | Properties | rowid=ID /*should we store schema too? what if that changes per run?*/ | One Row per namespace per dataset |
MyNamespace:EmployeeData:AllFields | ID | /* We may not necessarily required to store any value*/ created_time:12345678 updated_time:12345678 last_updated_by:runid_X | One Row per namespace per dataset |
MyNamespace:EmployeeData:AllFields | Name | ||
MyNamespace:EmployeeData:AllFields | Department | ||
MyNamespace:EmployeeData:AllFields | ContactDetails | ||
MyNamespace:EmployeeData:AllFields | JoiningDate | ||
MyNamespace:EmployeeData:ID:<runidX-inverted-start-time>:runidX | Lineage | Please see the full JSON below.
|
Code Block |
---|
{ "sources": [ { "name": "PersonFile", "properties" |
...
: {
|
...
"inputPath": "/data/2017/persons",
|
...
"regex": "*.csv" } |
...
}, { "name": "HRFile", "properties" |
...
: { |
...
"inputPath": "/data/2017/hr", |
...
"regex": "*.csv" } } |
...
], "targets": [ { "name": "Employee Data" } ], |
...
"operations": [
|
...
{ |
...
"inputs": [ |
...
{ "name": "PersonRecord", |
...
|
...
"source": "PersonFile" } |
...
], "outputs": [ { |
...
"name": "PersonRecord.body" } |
...
], |
...
"name": "READ", |
...
|
...
"description": "Read Person file."
|
...
},
|
...
{
|
...
"inputs": |
...
[ { "name": "PersonRecord.body" |
...
|
...
}
|
...
],
|
...
"outputs": [
|
...
{ |
...
|
...
"name": "SSN"
|
...
}
|
...
], |
...
"name": "PARSE", |
...
|
...
"description": "Parse the body field"
|
...
},
|
...
{
|
...
"inputs": [ |
...
{ "name": "HRRecord", |
...
|
...
"source": "HRFile"
|
...
} ], "outputs": [ { |
...
"name": "HRRecord.body" } |
...
],
|
...
"name": "READ",
|
...
"description": "Read HR file." |
...
|
...
},
|
...
{
|
...
"inputs": [ |
...
{ "name": "PersonRecord.body" |
...
} |
...
],
|
...
"outputs": [
|
...
{ |
...
"name": "Employee_Name" |
...
|
...
},
|
...
{ |
...
|
...
"name": "Dept_Name"
|
...
}
|
...
], |
...
"name": "PARSE", |
...
"description": "Parse the body field"
|
...
},
|
...
{
|
...
"inputs": [
|
...
{ |
...
|
...
"name": "Employee_Name"
|
...
}, |
...
{ |
...
|
...
"name": "Dept_Name"
|
...
}, |
...
{ |
...
|
...
"name": "SSN"
|
...
} |
...
], |
...
|
...
"outputs": [
|
...
{ |
...
"name": "ID", |
...
|
...
"target": "Employee Data"
|
...
} |
...
], |
...
|
...
"name": "GenerateID",
|
...
"description": "Generate unique Employee Id"
|
...
}
|
...
] |
...
} |
...