spark程序异常:Exception in thread "main" java.io.IOException: No FileSystem for scheme: hdfs,hadoophdfs


命令:

java -jar myspark-1.0-SNAPSHOT.jar myspark-1.0-SNAPSHOT.jar hdfs://single:9000/input/word.txt hdfs://single:9000/output/out1


错误信息:

..........

14/11/23 06:14:18 INFO SparkDeploySchedulerBackend: Granted executor ID app-20141123061418-0011/0 on hostPort single:8091 with 8 cores, 200.0 MB RAM
14/11/23 06:14:18 INFO AppClient$ClientActor: Executor updated: app-20141123061418-0011/0 is now RUNNING
Exception in thread "main" java.io.IOException: No FileSystem for scheme: hdfs
at org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:2421)
at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2428)
at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:88)
at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2467)
at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2449)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:367)
at org.apache.hadoop.fs.Path.getFileSystem(Path.java:287)
at org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:221)
at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:270)
at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:140)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:207)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:205)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:205)
at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:207)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:205)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:205)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:898)
at org.apache.spark.rdd.RDD.count(RDD.scala:726)
at youling.studio.Main$.main(Main.scala:33)
at youling.studio.Main.main(Main.scala)



scala 代码:

package youling.studio


import org.apache.spark.SparkContext._
import org.apache.spark.{SparkConf, SparkContext}


import scala.collection.mutable.ListBuffer


/**
 * Created by Administrator on 2014/11/23.
 */
object Main {
  def main (args: Array[String]) {
    if(args.length!=3) {
      println("CMD:java -jar *.jar input output")
      System.exit(0)
    }


    val jars = ListBuffer[String]()
    args(0).split(',').map(jars += _)


    val conf = new SparkConf()
    conf.setMaster("spark://single:8081")
      .setSparkHome("/cloud/spark-0.9.1-bin-hadoop2")
      .setAppName("word count")
      .setJars(jars)
      .set("spark.executor.memory","200m")


    val sc = new SparkContext(conf)
    val data = sc.textFile(args(1))


    data.cache


    println(data.count)


    data.flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_).map(x=>(x._2,x._1)).sortByKey(false).map(x=>(x._2,x._1)).saveAsTextFile(args(2))


  }
}


错误原因:intellij idea打成的jar运行的时候没有找到hdfs类型的文件系统


解决:修改maven项目的pom文件,手动指定下面红色的部分

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>youling.studio.spark</groupId>
  <artifactId>myspark</artifactId>
  <version>1.0-SNAPSHOT</version>
  <inceptionYear>2008</inceptionYear>
  <properties>
    <scala.version>2.10.3</scala.version>
  </properties>


  <repositories>
    <repository>
      <id>scala-tools.org</id>
      <name>Scala-Tools Maven2 Repository</name>
      <url>http://scala-tools.org/repo-releases</url>
    </repository>
  </repositories>


  <pluginRepositories>
    <pluginRepository>
      <id>scala-tools.org</id>
      <name>Scala-Tools Maven2 Repository</name>
      <url>http://scala-tools.org/repo-releases</url>
    </pluginRepository>
  </pluginRepositories>


  <dependencies>
    <dependency>
      <groupId>org.scala-lang</groupId>
      <artifactId>scala-library</artifactId>
      <version>${scala.version}</version>
    </dependency>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>4.4</version>
      <scope>test</scope>
    </dependency>


    <dependency>
      <groupId>org.apache.spark</groupId>
      <artifactId>spark-core_2.10</artifactId>
      <version>0.9.1</version>
    </dependency>
    <dependency>
      <groupId>org.apache.spark</groupId>
      <artifactId>spark-streaming_2.10</artifactId>
      <version>0.9.1</version>
    </dependency>
      <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-common</artifactId>
      <version>2.2.0</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-hdfs</artifactId>
      <version>2.2.0</version>
    </dependency>
      <dependency>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-client</artifactId>
          <version>2.2.0</version>
      </dependency>




  </dependencies>


<build>
    <sourceDirectory>src/main/</sourceDirectory>
    <testSourceDirectory>src/test/</testSourceDirectory>


    <plugins>
        <plugin>
            <groupId>org.scala-tools</groupId>
            <artifactId>maven-scala-plugin</artifactId>
            <executions>
                <execution>
                    <goals>
                        <goal>compile</goal>
                        <goal>testCompile</goal>
                    </goals>
                </execution>
            </executions>
            <configuration>
                <scalaVersion>2.10.3</scalaVersion>
            </configuration>
        </plugin>


        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-shade-plugin</artifactId>
            <version>2.2</version>
            <executions>
                <execution>
                    <phase>package</phase>
                    <goals>
                        <goal>shade</goal>
                    </goals>
                    <configuration>
                        <filters>
                            <filter>
                                <artifact>*:*</artifact>
                                <excludes>
                                    <exclude>META-INF/*.SF</exclude>
                                    <exclude>META-INF/*.DSA</exclude>
                                    <exclude>META-INF/*.RSA</exclude>
                                </excludes>
                            </filter>
                        </filters>
                        <transformers>


                            <transformer
                                    implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
                                <resource>reference.conf</resource>
                            </transformer>


                            <transformer
                                    implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                                <manifestEntries>
                                    <Main-Class>youling.studio.Main</Main-Class>
                                </manifestEntries>
                            </transformer>


                            <transformer
                                    implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
                            <resource>META-INF/services/org.apache.hadoop.fs.FileSystem</resource>
                            </transformer>

                        </transformers>
                    </configuration>
                </execution>
            </executions>
        </plugin>
    </plugins>
</build>


<reporting>
        <plugins>
          <plugin>
            <groupId>org.scala-tools</groupId>
            <artifactId>maven-scala-plugin</artifactId>
            <configuration>
              <scalaVersion>${scala.version}</scalaVersion>
            </configuration>
          </plugin>
        </plugins>
</reporting>
</project>

相关内容

    暂无相关文章