2017-04-11 126 views
0

我使用的獨立點火,當運行一個程序使用對RDD其寫入序列文件,我得到以下錯誤:在創建sequenceFile收到錯誤nativeio.NativeIO:無法初始化NativeIO庫

ERROR nativeio.NativeIO: Unable to initialize NativeIO libraries 
java.lang.NoSuchFieldError: workaroundNonThreadSafePasswdCalls 
at org.apache.hadoop.io.nativeio.NativeIO.initNative(Native Method) 
at org.apache.hadoop.io.nativeio.NativeIO.<clinit>(NativeIO.java:58) 
at org.apache.hadoop.fs.FileUtil.setPermission(FileUtil.java:653) 
at org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:509) 
at org.apache.hadoop.fs.FilterFileSystem.setPermission(FilterFileSystem.java:286) 
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:385) 
at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:364) 
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:555) 
at org.apache.hadoop.io.SequenceFile$Writer.<init>(SequenceFile.java:892) 
at org.apache.hadoop.io.SequenceFile.createWriter(SequenceFile.java:393) 
at org.apache.hadoop.io.SequenceFile.createWriter(SequenceFile.java:354) 
at org.apache.hadoop.io.SequenceFile.createWriter(SequenceFile.java:476) 
at org.apache.hadoop.mapred.SequenceFileOutputFormat.getRecordWriter(SequenceFileOutputFormat.java:58) 
at org.apache.spark.SparkHadoopWriter.open(SparkHadoopWriter.scala:89) 
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$13.apply(PairRDDFunctions.scala:980) 
at org.apache.spark.rdd.PairRDDFunctions$$anonfun$13.apply(PairRDDFunctions.scala:974) 
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62) 
at org.apache.spark.scheduler.Task.run(Task.scala:54) 
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:177) 
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source) 
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source) 
at java.lang.Thread.run(Unknown Source) 

下面是我使用的代碼:我已經在pom.xml中添加了所有的依賴

import java.util.ArrayList; 
import java.util.List; 
import scala.Tuple2; 
import org.apache.spark.api.java.JavaPairRDD; 
import org.apache.spark.api.java.JavaSparkContext; 
import org.apache.spark.api.java.function.PairFunction; 
import org.apache.hadoop.io.IntWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.mapred.SequenceFileOutputFormat; 

public class BasicSaveSequenceFile { 

    public static class ConvertToWritableTypes implements  PairFunction<Tuple2<String, Integer>, Text, IntWritable> { 
public Tuple2<Text, IntWritable> call(Tuple2<String, Integer> record) { 
    return new Tuple2(new Text(record._1), new IntWritable(record._2)); 
} 
    } 

    public static void main(String[] args) throws Exception { 
    if (args.length != 2) { 
    throw new Exception("Usage BasicSaveSequenceFile [sparkMaster] [output]"); 
    } 
String master = args[0]; 
String fileName = args[1]; 

    JavaSparkContext sc = new JavaSparkContext(
    master, "basicloadsequencefile", System.getenv("SPARK_HOME"), System.getenv("JARS")); 
List<Tuple2<String, Integer>> input = new ArrayList(); 
input.add(new Tuple2("coffee", 1)); 
input.add(new Tuple2("coffee", 2)); 
input.add(new Tuple2("pandas", 3)); 
JavaPairRDD<String, Integer> rdd = sc.parallelizePairs(input); 
JavaPairRDD<Text, IntWritable> result = rdd.mapToPair(new ConvertToWritableTypes()); 
result.saveAsHadoopFile(fileName, Text.class, IntWritable.class, SequenceFileOutputFormat.class); 
} 
} 

。你能幫忙嗎?

回答

0

我在pom.xml中將1.1版本的spark_core jar升級到1.3.1。另外,還從pom.xml中刪除了對hadoop-common jar的依賴。 它爲我工作!

相關問題