Trying to set up a basic pipeline in data fusion, where I get data from MySQL, massage the data via javascript code, and sink it to Google Cloud Storage, and Big Query.
The BigQuery sink throws an NPE error. Removing the BigQuery sink, and only sinking to GCS works just fine.
Stack trace:
2021-02-17 00:55:33,219 - ERROR [SparkRunnerphase-1:i.c.c.i.a.r.ProgramControllerServiceAdapter@92] - Spark Program 'phase-1' failed.
org.apache.tephra.TransactionFailureException: Exception raised from TxRunnable.run() io.cdap.cdap.internal.app.runtime.AbstractContext$$Lambda$318/1727018556@d3baf83
at io.cdap.cdap.data2.transaction.Transactions$CacheBasedTransactional.finishExecute(Transactions.java:226) ~[na:na]
at io.cdap.cdap.data2.transaction.Transactions$CacheBasedTransactional.execute(Transactions.java:211) ~[na:na]
at io.cdap.cdap.internal.app.runtime.AbstractContext.execute(AbstractContext.java:536) ~[na:na]
at io.cdap.cdap.internal.app.runtime.AbstractContext.execute(AbstractContext.java:524) ~[na:na]
at io.cdap.cdap.app.runtime.spark.BasicSparkClientContext.execute(BasicSparkClientContext.java:333) ~[io.cdap.cdap.cdap-spark-core2_2.11-6.3.0.jar:na]
at io.cdap.cdap.etl.common.submit.SubmitterPlugin.prepareRun(SubmitterPlugin.java:69) ~[na:na]
at io.cdap.cdap.etl.common.submit.PipelinePhasePreparer.prepare(PipelinePhasePreparer.java:149) ~[na:na]
at io.cdap.cdap.etl.spark.AbstractSparkPreparer.prepare(AbstractSparkPreparer.java:87) ~[na:na]
at io.cdap.cdap.etl.spark.batch.SparkPreparer.prepare(SparkPreparer.java:88) ~[na:na]
at io.cdap.cdap.etl.spark.batch.ETLSpark.initialize(ETLSpark.java:120) ~[na:na]
at io.cdap.cdap.api.spark.AbstractSpark.initialize(AbstractSpark.java:131) ~[na:na]
at io.cdap.cdap.api.spark.AbstractSpark.initialize(AbstractSpark.java:33) ~[na:na]
at io.cdap.cdap.app.runtime.spark.SparkRuntimeService$2.initialize(SparkRuntimeService.java:167) ~[io.cdap.cdap.cdap-spark-core2_2.11-6.3.0.jar:na]
at io.cdap.cdap.app.runtime.spark.SparkRuntimeService$2.initialize(SparkRuntimeService.java:162) ~[io.cdap.cdap.cdap-spark-core2_2.11-6.3.0.jar:na]
at io.cdap.cdap.internal.app.runtime.AbstractContext.lambda$initializeProgram$6(AbstractContext.java:624) ~[na:na]
at io.cdap.cdap.internal.app.runtime.AbstractContext.execute(AbstractContext.java:584) ~[na:na]
at io.cdap.cdap.internal.app.runtime.AbstractContext.initializeProgram(AbstractContext.java:621) ~[na:na]
at io.cdap.cdap.app.runtime.spark.SparkRuntimeService.initialize(SparkRuntimeService.java:433) ~[io.cdap.cdap.cdap-spark-core2_2.11-6.3.0.jar:na]
at io.cdap.cdap.app.runtime.spark.SparkRuntimeService.startUp(SparkRuntimeService.java:208) ~[io.cdap.cdap.cdap-spark-core2_2.11-6.3.0.jar:na]
at com.google.common.util.concurrent.AbstractExecutionThreadService$1$1.run(AbstractExecutionThreadService.java:47) ~[com.google.guava.guava-13.0.1.jar:na]
at io.cdap.cdap.app.runtime.spark.SparkRuntimeService$5$1.run(SparkRuntimeService.java:404) [io.cdap.cdap.cdap-spark-core2_2.11-6.3.0.jar:na]
at java.lang.Thread.run(Thread.java:748) [na:1.8.0_275]
Caused by: java.lang.NullPointerException: null
at java.util.Objects.requireNonNull(Objects.java:203) ~[na:1.8.0_275]
at io.cdap.plugin.gcp.bigquery.sink.AbstractBigQuerySink.generateTableFieldSchema(AbstractBigQuerySink.java:521) ~[na:na]
at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193) ~[na:1.8.0_275]
at java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1384) ~[na:1.8.0_275]
at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:482) ~[na:1.8.0_275]
at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:472) ~[na:1.8.0_275]
at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:708) ~[na:1.8.0_275]
at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234) ~[na:1.8.0_275]
at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:566) ~[na:1.8.0_275]
at io.cdap.plugin.gcp.bigquery.sink.AbstractBigQuerySink.generateTableFieldSchema(AbstractBigQuerySink.java:523) ~[na:na]
at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193) ~[na:1.8.0_275]
at java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1384) ~[na:1.8.0_275]
at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:482) ~[na:1.8.0_275]
at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:472) ~[na:1.8.0_275]
at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:708) ~[na:1.8.0_275]
at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234) ~[na:1.8.0_275]
at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:566) ~[na:1.8.0_275]
at io.cdap.plugin.gcp.bigquery.sink.AbstractBigQuerySink.getBigQueryTableFields(AbstractBigQuerySink.java:503) ~[na:na]
at io.cdap.plugin.gcp.bigquery.sink.AbstractBigQuerySink.initOutput(AbstractBigQuerySink.java:163) ~[na:na]
at io.cdap.plugin.gcp.bigquery.sink.BigQuerySink.prepareRunInternal(BigQuerySink.java:121) ~[na:na]
at io.cdap.plugin.gcp.bigquery.sink.AbstractBigQuerySink.prepareRun(AbstractBigQuerySink.java:113) ~[na:na]
at io.cdap.plugin.gcp.bigquery.sink.AbstractBigQuerySink.prepareRun(AbstractBigQuerySink.java:72) ~[na:na]
at io.cdap.cdap.etl.common.plugin.WrappedBatchSink.lambda$prepareRun$0(WrappedBatchSink.java:52) ~[na:na]
at io.cdap.cdap.etl.common.plugin.Caller$1.call(Caller.java:30) ~[na:na]
at io.cdap.cdap.etl.common.plugin.StageLoggingCaller.call(StageLoggingCaller.java:40) ~[na:na]
at io.cdap.cdap.etl.common.plugin.WrappedBatchSink.prepareRun(WrappedBatchSink.java:51) ~[na:na]
at io.cdap.cdap.etl.common.plugin.WrappedBatchSink.prepareRun(WrappedBatchSink.java:37) ~[na:na]
at io.cdap.cdap.etl.common.submit.SubmitterPlugin.lambda$prepareRun$2(SubmitterPlugin.java:71) ~[na:na]
at io.cdap.cdap.internal.app.runtime.AbstractContext.lambda$execute$3(AbstractContext.java:539) ~[na:na]
at io.cdap.cdap.data2.transaction.Transactions$CacheBasedTransactional.finishExecute(Transactions.java:224) ~[na:na]
... 21 common frames omitted
My source and sink schema is the same, which is:
Pastebin password: KWHsYQVb7R