hadoop Invalid byte 1 of 1-byte UTF-8 sequence


在hadoop编程的时候出现了下面的问题(使用myeclipse调用hadoop集群的方式):

14/03/03 11:19:31 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
14/03/03 11:19:31 WARN snappy.LoadSnappy: Snappy native library not loaded
14/03/03 11:19:31 INFO mapred.JobClient: Cleaning up the staging area hdfs://hbase:9000/home/hadoop/hadooptmp/mapred/staging/Administrator/.staging/job_201403031114_0003
14/03/03 11:19:31 ERROR security.UserGroupInformation: PriviledgedActionException as:Administrator cause:org.apache.hadoop.ipc.RemoteException: java.io.IOException: java.lang.RuntimeException: com.sun.org.apache.xerces.internal.impl.io.MalformedByteSequenceException: Invalid byte 1 of 1-byte UTF-8 sequence.
	at org.apache.hadoop.mapred.JobTracker.submitJob(JobTracker.java:3615)
	at org.apache.hadoop.mapred.JobTracker.submitJob(JobTracker.java:3561)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:606)
	at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:587)
	at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1432)
	at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1428)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.security.auth.Subject.doAs(Subject.java:415)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1190)
	at org.apache.hadoop.ipc.Server$Handler.run(Server.java:1426)
Caused by: java.lang.RuntimeException: com.sun.org.apache.xerces.internal.impl.io.MalformedByteSequenceException: Invalid byte 1 of 1-byte UTF-8 sequence.
	at org.apache.hadoop.conf.Configuration.loadResource(Configuration.java:1243)
	at org.apache.hadoop.conf.Configuration.loadResources(Configuration.java:1117)
	at org.apache.hadoop.conf.Configuration.getProps(Configuration.java:1053)
	at org.apache.hadoop.conf.Configuration.get(Configuration.java:397)
	at org.apache.hadoop.mapred.JobConf.checkAndWarnDeprecation(JobConf.java:1899)
	at org.apache.hadoop.mapred.JobConf.<init>(JobConf.java:399)
	at org.apache.hadoop.mapred.JobInProgress.<init>(JobInProgress.java:412)
	at org.apache.hadoop.mapred.JobTracker.submitJob(JobTracker.java:3613)
	... 12 more
Caused by: com.sun.org.apache.xerces.internal.impl.io.MalformedByteSequenceException: Invalid byte 1 of 1-byte UTF-8 sequence.
	at com.sun.org.apache.xerces.internal.impl.io.UTF8Reader.invalidByte(UTF8Reader.java:687)
	at com.sun.org.apache.xerces.internal.impl.io.UTF8Reader.read(UTF8Reader.java:557)
	at com.sun.org.apache.xerces.internal.impl.XMLEntityScanner.load(XMLEntityScanner.java:1753)
	at com.sun.org.apache.xerces.internal.impl.XMLEntityScanner.peekChar(XMLEntityScanner.java:497)
	at com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl$FragmentContentDriver.next(XMLDocumentFragmentScannerImpl.java:2649)
	at com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl.next(XMLDocumentScannerImpl.java:607)
	at com.sun.org.apache.xerces.internal.impl.XMLNSDocumentScannerImpl.next(XMLNSDocumentScannerImpl.java:116)
	at com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanDocument(XMLDocumentFragmentScannerImpl.java:489)
	at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:835)
	at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:764)
	at com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(XMLParser.java:123)
	at com.sun.org.apache.xerces.internal.parsers.DOMParser.parse(DOMParser.java:237)
	at com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderImpl.parse(DocumentBuilderImpl.java:300)
	at javax.xml.parsers.DocumentBuilder.parse(DocumentBuilder.java:121)
	at org.apache.hadoop.conf.Configuration.loadResource(Configuration.java:1169)
	... 19 more

Exception in thread "main" org.apache.hadoop.ipc.RemoteException: java.io.IOException: java.lang.RuntimeException: com.sun.org.apache.xerces.internal.impl.io.MalformedByteSequenceException: Invalid byte 1 of 1-byte UTF-8 sequence.
	at org.apache.hadoop.mapred.JobTracker.submitJob(JobTracker.java:3615)
	at org.apache.hadoop.mapred.JobTracker.submitJob(JobTracker.java:3561)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:606)
	at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:587)
	at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1432)
	at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1428)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.security.auth.Subject.doAs(Subject.java:415)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1190)
	at org.apache.hadoop.ipc.Server$Handler.run(Server.java:1426)
Caused by: java.lang.RuntimeException: com.sun.org.apache.xerces.internal.impl.io.MalformedByteSequenceException: Invalid byte 1 of 1-byte UTF-8 sequence.
	at org.apache.hadoop.conf.Configuration.loadResource(Configuration.java:1243)
	at org.apache.hadoop.conf.Configuration.loadResources(Configuration.java:1117)
	at org.apache.hadoop.conf.Configuration.getProps(Configuration.java:1053)
	at org.apache.hadoop.conf.Configuration.get(Configuration.java:397)
	at org.apache.hadoop.mapred.JobConf.checkAndWarnDeprecation(JobConf.java:1899)
	at org.apache.hadoop.mapred.JobConf.<init>(JobConf.java:399)
	at org.apache.hadoop.mapred.JobInProgress.<init>(JobInProgress.java:412)
	at org.apache.hadoop.mapred.JobTracker.submitJob(JobTracker.java:3613)
	... 12 more
Caused by: com.sun.org.apache.xerces.internal.impl.io.MalformedByteSequenceException: Invalid byte 1 of 1-byte UTF-8 sequence.
	at com.sun.org.apache.xerces.internal.impl.io.UTF8Reader.invalidByte(UTF8Reader.java:687)
	at com.sun.org.apache.xerces.internal.impl.io.UTF8Reader.read(UTF8Reader.java:557)
	at com.sun.org.apache.xerces.internal.impl.XMLEntityScanner.load(XMLEntityScanner.java:1753)
	at com.sun.org.apache.xerces.internal.impl.XMLEntityScanner.peekChar(XMLEntityScanner.java:497)
	at com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl$FragmentContentDriver.next(XMLDocumentFragmentScannerImpl.java:2649)
	at com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl.next(XMLDocumentScannerImpl.java:607)
	at com.sun.org.apache.xerces.internal.impl.XMLNSDocumentScannerImpl.next(XMLNSDocumentScannerImpl.java:116)
	at com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanDocument(XMLDocumentFragmentScannerImpl.java:489)
	at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:835)
	at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:764)
	at com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(XMLParser.java:123)
	at com.sun.org.apache.xerces.internal.parsers.DOMParser.parse(DOMParser.java:237)
	at com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderImpl.parse(DocumentBuilderImpl.java:300)
	at javax.xml.parsers.DocumentBuilder.parse(DocumentBuilder.java:121)
	at org.apache.hadoop.conf.Configuration.loadResource(Configuration.java:1169)
	... 19 more

	at org.apache.hadoop.ipc.Client.call(Client.java:1113)
	at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:229)
	at org.apache.hadoop.mapred.$Proxy2.submitJob(Unknown Source)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:606)
	at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:85)
	at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:62)
	at org.apache.hadoop.mapred.$Proxy2.submitJob(Unknown Source)
	at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:1013)
	at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:936)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.security.auth.Subject.doAs(Subject.java:415)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1190)
	at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:936)
	at org.apache.hadoop.mapreduce.Job.submit(Job.java:550)
	at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:580)
	at hadoop.jfreechar.WordCountCopy1.main(WordCountCopy1.java:96)

这个问题是在写Configuration的时候出的错,比如下面的配置:

String input ="/user/hadoop/input/picdata";
		String output="/user/hadoop/output/003";
		String chartFilePath="/user/hadoop/output/pic.png";
		String title ="第一列数据直方图";
	//	String title ="first column chart";
		String xStr="列号";
	//	String xStr = "column number";
		String yStr="数值";
	//	String yStr = "value number";
		String splitter = ",";
		Configuration conf = HadoopUtils.getConf();
		conf.set("chart", chartFilePath);
		conf.set("title", title);
		conf.set("xStr", xStr);
		conf.set("yStr", yStr);
		conf.set("splitter", splitter);
这样配置后,中文可能会出错。这个和myeclipse中文件的编码有关,myeclipse默认编码不尽相同,所以此处需要把其设置为UTF-8,如下图:



直接使用出现的错误来google,可以得到是xml的编码问题。这个就是和hadoop的job执行的步骤有关了。代码使用conf.set函数其实就是把我们的变量设置到默认的xml文件中,如果编码有问题,在执行的时候读取xml配置的时候就会报错,所以在使用ide的时候,最好在开始编码之前就设置编码,避免一些不必要的麻烦。


分享,成长,快乐

转载请注明blog地址:http://blog.csdn.net/fansy1990


相关内容

    暂无相关文章