Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

  1. Get the list of fields in the dataset.

    Code Block
    GET /v3/namespaces/<namespace-id>/datasets/<dataset-id>/fields?start=<start-ts>&end=<end-ts>
     
    Where:
    namespace-id: namespace name
    dataset-id: dataset name
    start-ts: starting timestamp(inclusive) in seconds
    end-ts: ending timestamp(exclusive) in seconds for lineage
     
    Sample Response:
    [
      {
        "name": "ID",
        "properties": {
          "creation_time": 12345678,
          "last_update_time": 12345688,
          "last_modified_run": "runid_x"
        }
      },
      {
        "name": "name",
        "properties": {
          "creation_time": 12345678,
          "last_update_time": 12345688,
          "last_modified_run": "runid_x"
        }
      },
      {
        "name": "Department",
        "properties": {
          "creation_time": 12345678,
          "last_update_time": 12345688,
          "last_modified_run": "runid_x"
        }
      },
      {
        "name": "ContactDetails",
        "properties": {
          "creation_time": 12345678,
          "last_update_time": 12345688,
          "last_modified_run": "runid_x"
        }
      },
      {
        "name": "JoiningDate",
        "properties": {
          "creation_time": 12345678,
          "last_update_time": 12345688,
          "last_modified_run": "runid_x"
        }
      }
    ]
  2. Get the properties associated with the dataset.

    Code Block
    GET /v3/namespaces/<namespace-id>/datasets/<dataset-id>/properties?start=<start-ts>&end=<end-ts>
    
    Where:
    namespace-id: namespace name
    dataset-id: dataset name
    start-ts: starting timestamp(inclusive) in seconds
    end-ts: ending timestamp(exclusive) in seconds for lineage
    Sample Response:
    [
       {
          "programRun": "run1",
          "properties": {
            "inputPath": "/data/2017/hr",
            "regex": "*.csv"
          } 
       },
       {
          "programRun": "run2",  
          "properties": {
            "inputPath": "/data/2017/anotherhrdata",
            "regex": "*.csv"
          }
       }
    ]
  3. Get the lineage associated with the particular field in a dataset.

    Code Block
    GET /v3/namespaces/<namespace-id>/datasets/<dataset-id>/fields/<field-name>/lineage?start=<start-ts>&end=<end-ts>
     
    Where:
    namespace-id: namespace name
    dataset-id: dataset name
    field-name: name of the field for which lineage information to be retrieved
    start-ts: starting timestamp(inclusive) in seconds
    end-ts: ending timestamp(exclusive) in seconds for lineage

    Sample response:

    Code Block
    {
      "startTimeInSeconds": 1442863938,
      "endTimeInSeconds": 1442881938,
      "paths": [
       ....
           list of paths which represent the different ways field is created 
       ....
      ] 
    }
     
    Each path will look as follows:
     {
      "sources": [
        {
          "name": "PersonFile",
          "properties": {
            "inputPath": "/data/2017/persons",
            "regex": "*.csv"
          }
        },
        {
          "name": "HRFile",
          "properties": {
            "inputPath": "/data/2017/hr",
            "regex": "*.csv"
          }
        }
      ],
      "targets": [
        {
          "name": "Employee Data"
        }
      ],
      "operations": [
        {
          "inputs": [
            {
              "name": "PersonRecord",
              "properties": {
                "source": "PersonFile"
              }
            }
          ],
          "outputs": [
            {
              "name": "body"
            }
          ],
          "name": "READ",
          "description": "Read Person file.",
          "properties": {
            "stage": "Person File Reader"
          }
        },
        {
          "inputs": [
            {
              "name": "body"
            }
          ],
          "outputs": [
            {
              "name": "SSN"
            }
          ],
          "name": "PARSE",
          "description": "Parse the body field",
          "properties": {
            "stage": "Person File Parser"
          }
        },
        {
          "inputs": [
            {
              "name": "HRRecord",
              "properties": {
                "source": "HRFile"
              }
            }
          ],
          "outputs": [
            {
              "name": "body"
            }
          ],
          "name": "READ",
          "description": "Read HR file.",
          "properties": {
            "stage": "HR File Reader"
          }
        },
        {
          "inputs": [
            {
              "name": "body"
            }
          ],
          "outputs": [
            {
              "name": "Employee_Name"
            },
            {
              "name": "Dept_Name"
            }
          ],
          "name": "PARSE",
          "description": "Parse the body field",
          "properties": {
            "stage": "HR File Parser"
          }
        },
        {
          "inputs": [
            {
              "name": "Employee_Name"
            },
            {
              "name": "Dept_Name"
            },
            {
              "name": "SSN"
            }
          ],
          "outputs": [
            {
              "name": "ID",
              "properties": {
                "target": "Employee Data"
              }
            }
          ],
          "name": "GenerateID",
          "description": "Generate unique Employee Id",
          "properties": {
            "stage": "Field Normalizer"
          }
        }
      ],
      "runs": [
        "runidX",
        "runidY",
        "runidZ"
      ]
    }

...