Javaを使用して以下を使用してデータセットの結果を単一のCSVに書き込もうとしています
dataset.write().mode(SaveMode.Overwrite).option("header",true).csv("C:\\tmp\\csvs");
しかし、それはタイムアウトになるため、ファイルは書き込まれていません。
スローorg.Apache.spark.SparkException: Job aborted.
エラー:
org.Apache.spark.SparkException: Job aborted due to stage failure:
Task 0 in stage 13.0 failed 1 times, most recent failure: Lost task 0.0 in stage 13.0 (TID 16, localhost): Java.io.IOException: (null) entry in command string: null chmod 0644 C:\tmp\12333333testSpark\_temporary\0\_temporary\attempt_201712282255_0013_m_000000_0\part-r-00000-229fd1b6-ffb9-4ba1-9dc9-89dfdbd0be43.csv
at org.Apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.Java:770)
at org.Apache.hadoop.util.Shell.execCommand(Shell.Java:866)
at org.Apache.hadoop.util.Shell.execCommand(Shell.Java:849)
at org.Apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.Java:733)
at org.Apache.hadoop.fs.RawLocalFileSystem$LocalFSFileOutputStream.<init>(RawLocalFileSystem.Java:225)
at org.Apache.hadoop.fs.RawLocalFileSystem$LocalFSFileOutputStream.<init>(RawLocalFileSystem.Java:209)
at org.Apache.hadoop.fs.RawLocalFileSystem.createOutputStreamWithMode(RawLocalFileSystem.Java:307)
at org.Apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.Java:296)
at org.Apache.hadoop.fs.RawLocalFileSystem.create(RawLocalFileSystem.Java:328)
at org.Apache.hadoop.fs.ChecksumFileSystem$ChecksumFSOutputSummer.<init>(ChecksumFileSystem.Java:398)
at org.Apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.Java:461)
at org.Apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.Java:440)
at org.Apache.hadoop.fs.FileSystem.create(FileSystem.Java:911)
at org.Apache.hadoop.fs.FileSystem.create(FileSystem.Java:892)
at org.Apache.hadoop.fs.FileSystem.create(FileSystem.Java:789)
at org.Apache.hadoop.mapreduce.lib.output.TextOutputFormat.getRecordWriter(TextOutputFormat.Java:132)
at org.Apache.spark.sql.execution.datasources.csv.CsvOutputWriter.<init>(CSVRelation.scala:200)
at org.Apache.spark.sql.execution.datasources.csv.CSVOutputWriterFactory.newInstance(CSVRelation.scala:170)
at org.Apache.spark.sql.execution.datasources.BaseWriterContainer.newOutputWriter(WriterContainer.scala:131)
at org.Apache.spark.sql.execution.datasources.DefaultWriterContainer.writeRows(WriterContainer.scala:247)
at org.Apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply(InsertIntoHadoopFsRelationCommand.scala:143)
at org.Apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply(InsertIntoHadoopFsRelationCommand.scala:143)
at org.Apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70)
at org.Apache.spark.scheduler.Task.run(Task.scala:86)
at org.Apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
at Java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.Java:1142)
at Java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.Java:617)
at Java.lang.Thread.run(Thread.Java:745)