2016-08-24 61 views
1

我正在使用Spark Mllib在零售行業做鏈接分析項目。我的模式是:函數take()錯誤 - 鏈接分析使用Spark Mllib的研究

ID - 長 鏈 - 詮釋 部門 - 詮釋 類別 - 詮釋 公司 - 長 品牌 - 龍 日期 - 日期 ProductSize - 詮釋 ProductMeasure - Chararray PurchaseQuantity - 詮釋 PurchaseAmount - 雙

而且我正在使用的代碼是:

scala> import org.apache.spark._ 
scala> import org.apache.spark.rdd.RDD 
scala> import org.apache.spark.util.IntParam 
scala> import org.apache.spark.graphx._ 
scala> import org.apache.spark.graphx.util.GraphGenerators 

scala> case class Transactions(ID:Long,Chain:Int,Dept:Int,Category:Int,Company:Long,Brand:Long,Date:String,ProductSize:Int,ProductMeasure:String,PurchaseQuantity:Int,PurchaseAmount:Double) 
defined class Transactions 

scala> def parseTransactions(str:String): Transactions = { 
    | val line = str.split(",") 
    | Transactions(line(0).toLong,line(1).toInt,line(2).toInt,line(3).toInt,line(4).toInt,line(5).toInt,line(6),line(7).toInt,line(8),line(9).toInt,line(10).toInt) 
    | } 

scala> val textRDD = sc.textFile("/user/cloudera/transactions.csv") 

scala> val transactionsRDD = textRDD.map(parseTransactions).cache() 

scala> val products = transactionsRDD.map(Transactions => (Transactions.ID,Transactions.Chain,Transactions.Dept,Transactions.Category,Transactions.Company,Transactions.Brand,Transactions.Date)).distinct 

scala> products.take(1) 

但當我提交的最後一行,我發現了以下錯誤:

[Stage 0:>               (0 + 1)/7]16/08/24 04:56:13 ERROR executor.Executor: Exception in task 0.0 in stage 0.0 (TID 0) 
java.lang.NumberFormatException: For input string: "id" 
    at java.lang.NumberFormatException.forInputString(NumberFormatException.java:65) 
    at java.lang.Long.parseLong(Long.java:441) 
    at java.lang.Long.parseLong(Long.java:483) 
    at scala.collection.immutable.StringLike$class.toLong(StringLike.scala:230) 
\t at scala.collection.immutable.StringOps.toLong(StringOps.scala:31) 
\t at $line65.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.parseTransactions(<console>:38) 
\t at $line67.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42) 
    at $line67.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42) 
\t at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) 
\t at org.apache.spark.storage.MemoryStore.unrollSafely(MemoryStore.scala:285) 
\t at org.apache.spark.CacheManager.putInBlockManager(CacheManager.scala:171) 
\t at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:78) 
\t at org.apache.spark.rdd.RDD.iterator(RDD.scala:268) 
\t at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) 
\t at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) 
\t at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) 
\t at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) 
\t at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) 
\t at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) 
\t at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) 
\t at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) 
\t at org.apache.spark.scheduler.Task.run(Task.scala:89) 
\t at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) 
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) 
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) 
\t at java.lang.Thread.run(Thread.java:745) 
16/08/24 04:56:13 WARN scheduler.TaskSetManager: Lost task 0.0 in stage 0.0 (TID 0, localhost): java.lang.NumberFormatException: For input string: "id" 
\t at java.lang.NumberFormatException.forInputString(NumberFormatException.java:65) 
\t at java.lang.Long.parseLong(Long.java:441) 
\t at java.lang.Long.parseLong(Long.java:483) 
\t at scala.collection.immutable.StringLike$class.toLong(StringLike.scala:230) 
    at scala.collection.immutable.StringOps.toLong(StringOps.scala:31) 
    at $line65.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.parseTransactions(<console>:38) 
    at $line67.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42) 
\t at $line67.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42) 
\t at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) 
    at org.apache.spark.storage.MemoryStore.unrollSafely(MemoryStore.scala:285) 
    at org.apache.spark.CacheManager.putInBlockManager(CacheManager.scala:171) 
    at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:78) 
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:268) 
    at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) 
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) 
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) 
    at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) 
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) 
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) 
    at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) 
    at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) 
    at org.apache.spark.scheduler.Task.run(Task.scala:89) 
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) 
\t at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) 
\t at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) 
    at java.lang.Thread.run(Thread.java:745) 

16/08/24 04:56:13 ERROR scheduler.TaskSetManager: Task 0 in stage 0.0 failed 1 times; aborting job 
16/08/24 04:56:13 ERROR executor.Executor: Exception in task 1.0 in stage 0.0 (TID 1) 
java.lang.NumberFormatException: For input string: "6.67" 
    at java.lang.NumberFormatException.forInputString(NumberFormatException.java:65) 
    at java.lang.Integer.parseInt(Integer.java:492) 
    at java.lang.Integer.parseInt(Integer.java:527) 
    at scala.collection.immutable.StringLike$class.toInt(StringLike.scala:229) 
\t at scala.collection.immutable.StringOps.toInt(StringOps.scala:31) 
\t at $line65.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.parseTransactions(<console>:38) 
\t at $line67.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42) 
    at $line67.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42) 
\t at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) 
\t at org.apache.spark.storage.MemoryStore.unrollSafely(MemoryStore.scala:285) 
\t at org.apache.spark.CacheManager.putInBlockManager(CacheManager.scala:171) 
\t at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:78) 
\t at org.apache.spark.rdd.RDD.iterator(RDD.scala:268) 
\t at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) 
\t at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) 
\t at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) 
\t at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) 
\t at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) 
\t at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) 
\t at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) 
\t at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) 
\t at org.apache.spark.scheduler.Task.run(Task.scala:89) 
\t at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) 
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) 
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) 
\t at java.lang.Thread.run(Thread.java:745) 
16/08/24 04:56:13 WARN scheduler.TaskSetManager: Lost task 1.0 in stage 0.0 (TID 1, localhost): java.lang.NumberFormatException: For input string: "6.67" 
\t at java.lang.NumberFormatException.forInputString(NumberFormatException.java:65) 
\t at java.lang.Integer.parseInt(Integer.java:492) 
\t at java.lang.Integer.parseInt(Integer.java:527) 
\t at scala.collection.immutable.StringLike$class.toInt(StringLike.scala:229) 
    at scala.collection.immutable.StringOps.toInt(StringOps.scala:31) 
    at $line65.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.parseTransactions(<console>:38) 
    at $line67.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42) 
\t at $line67.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42) 
\t at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) 
    at org.apache.spark.storage.MemoryStore.unrollSafely(MemoryStore.scala:285) 
    at org.apache.spark.CacheManager.putInBlockManager(CacheManager.scala:171) 
    at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:78) 
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:268) 
    at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) 
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) 
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) 
    at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) 
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) 
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) 
    at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) 
    at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) 
    at org.apache.spark.scheduler.Task.run(Task.scala:89) 
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) 
\t at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) 
\t at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) 
    at java.lang.Thread.run(Thread.java:745) 

org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0, localhost): java.lang.NumberFormatException: For input string: "id" 
    at java.lang.NumberFormatException.forInputString(NumberFormatException.java:65) 
    at java.lang.Long.parseLong(Long.java:441) 
    at java.lang.Long.parseLong(Long.java:483) 
    at scala.collection.immutable.StringLike$class.toLong(StringLike.scala:230) 
\t at scala.collection.immutable.StringOps.toLong(StringOps.scala:31) 
\t at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.parseTransactions(<console>:38) 
\t at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42) 
\t at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42) 
\t at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) 
\t at org.apache.spark.storage.MemoryStore.unrollSafely(MemoryStore.scala:285) 
\t at org.apache.spark.CacheManager.putInBlockManager(CacheManager.scala:171) 
\t at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:78) 
\t at org.apache.spark.rdd.RDD.iterator(RDD.scala:268) 
\t at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) 
\t at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) 
\t at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) 
\t at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) 
\t at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) 
\t at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) 
\t at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) 
\t at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) 
\t at org.apache.spark.scheduler.Task.run(Task.scala:89) 
\t at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) 
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) 
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) 
    at java.lang.Thread.run(Thread.java:745) 

Driver stacktrace: 
    at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1431) 
\t at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1419) 
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1418) 
\t at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) 
\t at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) 
\t at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1418) 
\t at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799) 
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799) 
\t at scala.Option.foreach(Option.scala:236) 
\t at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:799) 
\t at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1640) 
\t at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599) 
\t at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588) 
\t at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) 
\t at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620) 
\t at org.apache.spark.SparkContext.runJob(SparkContext.scala:1843) 
\t at org.apache.spark.SparkContext.runJob(SparkContext.scala:1856) 
\t at org.apache.spark.SparkContext.runJob(SparkContext.scala:1869) 
\t at org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.scala:1328) 
\t at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) 
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111) 
\t at org.apache.spark.rdd.RDD.withScope(RDD.scala:316) 
\t at org.apache.spark.rdd.RDD.take(RDD.scala:1302) 
\t at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:47) 
\t at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:52) 
\t at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:54) 
    at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:56) 
\t at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:58) 
\t at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:60) 
\t at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:62) 
    at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:64) 
\t at $iwC$$iwC$$iwC$$iwC.<init>(<console>:66) 
\t at $iwC$$iwC$$iwC.<init>(<console>:68) 
\t at $iwC$$iwC.<init>(<console>:70) 
    at $iwC.<init>(<console>:72) 
\t at <init>(<console>:74) 
\t at .<init>(<console>:78) 
\t at .<clinit>(<console>) 
\t at .<init>(<console>:7) 
\t at .<clinit>(<console>) 
\t at $print(<console>) 
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) 
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 
    at java.lang.reflect.Method.invoke(Method.java:606) 
    at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1045) 
\t at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1326) 
    at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:821) 
\t at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:852) 
\t at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:800) 
\t at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857) 
    at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902) 
    at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814) 
    at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657) 
\t at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665) 
    at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670) 
\t at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997) 
    at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) 
\t at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) 
\t at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135) 
\t at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945) 
    at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1064) 
    at org.apache.spark.repl.Main$.main(Main.scala:31) 
\t at org.apache.spark.repl.Main.main(Main.scala) 
\t at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 
\t at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) 
\t at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 
\t at java.lang.reflect.Method.invoke(Method.java:606) 
\t at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731) 
\t at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181) 
\t at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206) 
\t at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121) 
\t at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) 
Caused by: java.lang.NumberFormatException: For input string: "id" 
\t at java.lang.NumberFormatException.forInputString(NumberFormatException.java:65) 
\t at java.lang.Long.parseLong(Long.java:441) 
\t at java.lang.Long.parseLong(Long.java:483) 
\t at scala.collection.immutable.StringLike$class.toLong(StringLike.scala:230) 
\t at scala.collection.immutable.StringOps.toLong(StringOps.scala:31) 
\t at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.parseTransactions(<console>:38) 
    at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42) 
    at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:42) 
    at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) 
    at org.apache.spark.storage.MemoryStore.unrollSafely(MemoryStore.scala:285) 
    at org.apache.spark.CacheManager.putInBlockManager(CacheManager.scala:171) 
    at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:78) 
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:268) 
    at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) 
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) 
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) 
    at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) 
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) 
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) 
    at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) 
    at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) 
    at org.apache.spark.scheduler.Task.run(Task.scala:89) 
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) 
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) 
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) 
    at java.lang.Thread.run(Thread.java:745) 

任何人都知道爲什麼我收到此錯誤?我本來應該返回我創建的陣列...

非常感謝!

回答

1

看起來您的toInt調用嘗試轉換包含非數字字符的字符串。

+0

我在想,錯誤可能是int「line(6)」,在那裏我有字段日期,我作爲字符串傳遞 –

+0

你永遠不會解析它到一個日期嗎?只是把它保持爲一個字符串。我不認識任何Scala,所以我可能會離開,但我知道Spark。這是一個NumberFormatException,所以它試圖將一個字符串解析爲數字。除了Scala中的構造之外,還有其他的嘗試嗎?在做出獨特之前,請嘗試記錄TransactionsRDD的幾行。刪除一些toInt,看看問題是否存在(可能全部刪除) –

+0

Jan,當我刪除所有.toInt我得到這個錯誤: def parseTransactions(str:String):Transactions = { | val line = str.split(「,」) |交易(線(0).toLong,線(1),線(2),線(3)​​,線(4),線(5),線(6),線(7),管線(8),線(9),行(10)) | } :47:error:type mismatch; (0).toLong,line(1),line(2),line(3),line(4),line(5),line(6),line(0) 7),行(8),行(9),行(10)) 你知道爲什麼嗎? –