【甘道夫】Win7环境下Eclipse连接Hadoop2.2.0

文章由LinuxBoy分享于2019-03-27 03:03:11热评（308）

【甘道夫】Win7环境下Eclipse连接Hadoop2.2.0

准备： 确保hadoop2.2.0集群正常运行

1.eclipse中建立java工程，导入hadoop2.2.0相关jar包

2.在src根目录下拷入log4j.properties，通过log4j查看详细日志 log4j.rootLogger=debug, stdout, R log4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout.layout=org.apache.log4j.PatternLayout log4j.appender.stdout.layout.ConversionPattern=%5p - %m%n log4j.appender.R=org.apache.log4j.RollingFileAppender log4j.appender.R.File=firestorm.log log4j.appender.R.MaxFileSize=100KB log4j.appender.R.MaxBackupIndex=1 log4j.appender.R.layout=org.apache.log4j.PatternLayout log4j.appender.R.layout.ConversionPattern=%p %t %c - %m%n log4j.logger.com.codefutures=DEBUG

3.拷入一个可执行的hadoop程序，我用的是一个HdfsDAO，可以先保证HDFS操作能执行 import java.io.IOException; import java.net.URI; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.mapred.JobConf; public class HdfsDAO { private static final String HDFS = "hdfs://192.168.0.160:9000/"; public HdfsDAO(Configuration conf) { this(HDFS, conf); } public HdfsDAO(String hdfs, Configuration conf) { this.hdfsPath = hdfs; this.conf = conf; } private String hdfsPath; private Configuration conf; public static void main(String[] args) throws IOException { JobConf conf = config(); HdfsDAO hdfs = new HdfsDAO(conf); // hdfs.copyFile("datafile/item.csv", "/tmp/new"); // hdfs.ls("/tmp/new"); hdfs.ls("/"); } public static JobConf config(){ JobConf conf = new JobConf(HdfsDAO.class); conf.setJobName("HdfsDAO"); conf.addResource("classpath:/hadoop/core-site.xml"); conf.addResource("classpath:/hadoop/hdfs-site.xml"); conf.addResource("classpath:/hadoop/mapred-site.xml"); return conf; } public void mkdirs(String folder) throws IOException { Path path = new Path(folder); FileSystem fs = FileSystem.get(URI.create(hdfsPath), conf); if (!fs.exists(path)) { fs.mkdirs(path); System.out.println("Create: " + folder); } fs.close(); } public void rmr(String folder) throws IOException { Path path = new Path(folder); FileSystem fs = FileSystem.get(URI.create(hdfsPath), conf); fs.deleteOnExit(path); System.out.println("Delete: " + folder); fs.close(); } public void ls(String folder) throws IOException { Path path = new Path(folder); FileSystem fs = FileSystem.get(URI.create(hdfsPath), conf); FileStatus[] list = fs.listStatus(path); System.out.println("ls: " + folder); System.out.println("=========================================================="); for (FileStatus f : list) { System.out.printf("name: %s, folder: %s, size: %d\n", f.getPath(), f.isDir(), f.getLen()); } System.out.println("=========================================================="); fs.close(); } public void createFile(String file, String content) throws IOException { FileSystem fs = FileSystem.get(URI.create(hdfsPath), conf); byte[] buff = content.getBytes(); FSDataOutputStream os = null; try { os = fs.create(new Path(file)); os.write(buff, 0, buff.length); System.out.println("Create: " + file); } finally { if (os != null) os.close(); } fs.close(); } public void copyFile(String local, String remote) throws IOException { FileSystem fs = FileSystem.get(URI.create(hdfsPath), conf); fs.copyFromLocalFile(new Path(local), new Path(remote)); System.out.println("copy from: " + local + " to " + remote); fs.close(); } public void download(String remote, String local) throws IOException { Path path = new Path(remote); FileSystem fs = FileSystem.get(URI.create(hdfsPath), conf); fs.copyToLocalFile(path, new Path(local)); System.out.println("download: from" + remote + " to " + local); fs.close(); } public void cat(String remoteFile) throws IOException { Path path = new Path(remoteFile); FileSystem fs = FileSystem.get(URI.create(hdfsPath), conf); FSDataInputStream fsdis = null; System.out.println("cat: " + remoteFile); try { fsdis =fs.open(path); IOUtils.copyBytes(fsdis, System.out, 4096, false); } finally { IOUtils.closeStream(fsdis); fs.close(); } } public void location() throws IOException { // String folder = hdfsPath + "create/"; // String file = "t2.txt"; // FileSystem fs = FileSystem.get(URI.create(hdfsPath), new // Configuration()); // FileStatus f = fs.getFileStatus(new Path(folder + file)); // BlockLocation[] list = fs.getFileBlockLocations(f, 0, f.getLen()); // // System.out.println("File Location: " + folder + file); // for (BlockLocation bl : list) { // String[] hosts = bl.getHosts(); // for (String host : hosts) { // System.out.println("host:" + host); // } // } // fs.close(); } }

4.运行HdfsDAO 报错： java.io.IOException: HADOOP_HOME or hadoop.home.dir are not set. at org.apache.hadoop.util.Shell.checkHadoopHome(Shell.java:225) at org.apache.hadoop.util.Shell.<clinit>(Shell.java:250) at org.apache.hadoop.util.StringUtils.<clinit>(StringUtils.java:76) at org.apache.hadoop.conf.Configuration.getTrimmedStrings(Configuration.java:1546) at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:519) at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:453) at org.apache.hadoop.hdfs.DistributedFileSystem.initialize(DistributedFileSystem.java:136) at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2433) at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:88) at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2467) at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2449) at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:367) at HdfsDAO.copyFile(HdfsDAO.java:94) at HdfsDAO.main(HdfsDAO.java:34) ERROR - Failed to locate the winutils binary in the hadoop binary path java.io.IOException: Could not locate executable null\bin\winutils.exe in the Hadoop binaries. at org.apache.hadoop.util.Shell.getQualifiedBinPath(Shell.java:278) at org.apache.hadoop.util.Shell.getWinUtilsPath(Shell.java:300) at org.apache.hadoop.util.Shell.<clinit>(Shell.java:293) at org.apache.hadoop.util.StringUtils.<clinit>(StringUtils.java:76) at org.apache.hadoop.conf.Configuration.getTrimmedStrings(Configuration.java:1546) at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:519) at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:453) at org.apache.hadoop.hdfs.DistributedFileSystem.initialize(DistributedFileSystem.java:136) at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2433) at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:88) at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2467) at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2449) at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:367) at HdfsDAO.copyFile(HdfsDAO.java:94) at HdfsDAO.main(HdfsDAO.java:34)
解决： 首先，在win7中设置环境变量HADOOP_HOME，指向win7中的hadoop2.2.0根目录。然后，到 https://github.com/srccodes/hadoop-common-2.2.0-bin 去下载hadoop2.2.0的bin，里面有winutils.exe 将其拷贝到 $HADOOP_HOME/bin 下。

5.重新运行，报错 Exception in thread "main" java.net.ConnectException: Call From WIN-CMM62V9I3VG/192.168.0.40 to singlehadoop:9000 failed on connection exception: java.net.ConnectException: Connection refused: no further information; For more details see: http://wiki.apache.org/hadoop/ConnectionRefused at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) at sun.reflect.NativeConstructorAccessorImpl.newInstance(Unknown Source) at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(Unknown Source) at java.lang.reflect.Constructor.newInstance(Unknown Source) at org.apache.hadoop.net.NetUtils.wrapWithMessage(NetUtils.java:783) at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:730) at org.apache.hadoop.ipc.Client.call(Client.java:1351) at org.apache.hadoop.ipc.Client.call(Client.java:1300) at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:206) at com.sun.proxy.$Proxy9.getListing(Unknown Source) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source) at java.lang.reflect.Method.invoke(Unknown Source) at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:186) at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102) at com.sun.proxy.$Proxy9.getListing(Unknown Source) at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getListing(ClientNamenodeProtocolTranslatorPB.java:482) at org.apache.hadoop.hdfs.DFSClient.listPaths(DFSClient.java:1660) at org.apache.hadoop.hdfs.DFSClient.listPaths(DFSClient.java:1643) at org.apache.hadoop.hdfs.DistributedFileSystem.listStatusInternal(DistributedFileSystem.java:640) at org.apache.hadoop.hdfs.DistributedFileSystem.access$600(DistributedFileSystem.java:92) at org.apache.hadoop.hdfs.DistributedFileSystem$14.doCall(DistributedFileSystem.java:702) at org.apache.hadoop.hdfs.DistributedFileSystem$14.doCall(DistributedFileSystem.java:698) at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) at org.apache.hadoop.hdfs.DistributedFileSystem.listStatus(DistributedFileSystem.java:698) at HdfsDAO.ls(HdfsDAO.java:69) at HdfsDAO.main(HdfsDAO.java:36) Caused by: java.net.ConnectException: Connection refused: no further information at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method) at sun.nio.ch.SocketChannelImpl.finishConnect(Unknown Source) at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206) at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:529) at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:493) at org.apache.hadoop.ipc.Client$Connection.setupConnection(Client.java:547) at org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:642) at org.apache.hadoop.ipc.Client$Connection.access$2600(Client.java:314) at org.apache.hadoop.ipc.Client.getConnection(Client.java:1399) at org.apache.hadoop.ipc.Client.call(Client.java:1318) ... 21 more DEBUG - Stopping client
解决： 发现core-site.xml中 <property> <name>fs.default.name</name> <value>hdfs://singlehadoop:8020</value> </property> 端口是8020，不是9000，所以修改程序中以下语句中的端口为8020 private static final String HDFS = "hdfs://192.168.0.160:8020/";

6.重新启动，顺利执行 DEBUG - field org.apache.hadoop.metrics2.lib.MutableRate org.apache.hadoop.security.UserGroupInformation$UgiMetrics.loginSuccess with annotation @org.apache.hadoop.metrics2.annotation.Metric(valueName=Time, about=, value=[Rate of successful kerberos logins and latency (milliseconds)], always=false, type=DEFAULT, sampleName=Ops) DEBUG - field org.apache.hadoop.metrics2.lib.MutableRate org.apache.hadoop.security.UserGroupInformation$UgiMetrics.loginFailure with annotation @org.apache.hadoop.metrics2.annotation.Metric(valueName=Time, about=, value=[Rate of failed kerberos logins and latency (milliseconds)], always=false, type=DEFAULT, sampleName=Ops) DEBUG - UgiMetrics, User and group related metrics DEBUG - Kerberos krb5 configuration not found, setting default realm to empty DEBUG - Creating new Groups object DEBUG - Trying to load the custom-built native-hadoop library... DEBUG - Failed to load native-hadoop with error: java.lang.UnsatisfiedLinkError: no hadoop in java.library.path DEBUG - java.library.path=D:\Program Files\Java\jre7\bin;C:\Windows\Sun\Java\bin;C:\Windows\system32;C:\Windows;C:\Program Files (x86)\NVIDIA Corporation\PhysX\Common;C:\Program Files (x86)\Intel\iCLS Client\;C:\Program Files\Intel\iCLS Client\;C:\Windows\system32;C:\Windows;C:\Windows\System32\Wbem;C:\Windows\System32\WindowsPowerShell\v1.0\;C:\Program Files\Intel\Intel(R) Management Engine Components\DAL;C:\Program Files\Intel\Intel(R) Management Engine Components\IPT;C:\Program Files (x86)\Intel\Intel(R) Management Engine Components\DAL;C:\Program Files (x86)\Intel\Intel(R) Management Engine Components\IPT;C:\Program Files (x86)\Intel\OpenCL SDK\3.0\bin\x86;C:\Program Files (x86)\Intel\OpenCL SDK\3.0\bin\x64;D:\Program Files\Java\jdk1.7.0_40\bin;D:\Program Files\Java\jdk1.7.0_40\jre\bin;D:\Program Files\TortoiseSVN\bin;D:\Program Files (x86)\ant\bin;D:\Program Files\maven3\bin;. WARN - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable DEBUG - Falling back to shell based DEBUG - Group mapping impl=org.apache.hadoop.security.ShellBasedUnixGroupsMapping DEBUG - Group mapping impl=org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback; cacheTimeout=300000 DEBUG - hadoop login DEBUG - hadoop login commit DEBUG - using local user:NTUserPrincipal: Administrator DEBUG - UGI loginUser:Administrator (auth:SIMPLE) DEBUG - dfs.client.use.legacy.blockreader.local = false DEBUG - dfs.client.read.shortcircuit = false DEBUG - dfs.client.domain.socket.data.traffic = false DEBUG - dfs.domain.socket.path = DEBUG - StartupProgress, NameNode startup progress DEBUG - multipleLinearRandomRetry = null DEBUG - rpcKind=RPC_PROTOCOL_BUFFER, rpcRequestWrapperClass=class org.apache.hadoop.ipc.ProtobufRpcEngine$RpcRequestWrapper, rpcInvoker=org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker@1afde4a3 DEBUG - Both short-circuit local reads and UNIX domain socket are disabled. DEBUG - The ping interval is 60000 ms. DEBUG - Connecting to /192.168.0.160:8020 DEBUG - IPC Client (60133785) connection to /192.168.0.160:8020 from Administrator: starting, having connections 1 DEBUG - IPC Client (60133785) connection to /192.168.0.160:8020 from Administrator sending #0 DEBUG - IPC Client (60133785) connection to /192.168.0.160:8020 from Administrator got value #0 DEBUG - Call: getListing took 136ms ls: / ========================================================== name: hdfs://192.168.0.160:8020/data, folder: true, size: 0 name: hdfs://192.168.0.160:8020/fulong, folder: true, size: 0 name: hdfs://192.168.0.160:8020/test, folder: true, size: 0 name: hdfs://192.168.0.160:8020/tmp, folder: true, size: 0 name: hdfs://192.168.0.160:8020/user, folder: true, size: 0 name: hdfs://192.168.0.160:8020/workspace, folder: true, size: 0 ========================================================== DEBUG - Stopping client DEBUG - IPC Client (60133785) connection to /192.168.0.160:8020 from Administrator: closed DEBUG - IPC Client (60133785) connection to /192.168.0.160:8020 from Administrator: stopped, remaining connections 0

下面尝试跑一个hadoop2.2.0自带的wordcount程序

1.首先还是将wordcount的源码拷入工程的src 运行报错： ERROR - PriviledgedActionException as:Administrator (auth:SIMPLE) cause:java.io.IOException: Cannot initialize Cluster. Please check your configuration for mapreduce.framework.name and the correspond server addresses. Exception in thread "main" java.io.IOException: Cannot initialize Cluster. Please check your configuration for mapreduce.framework.name and the correspond server addresses. at org.apache.hadoop.mapreduce.Cluster.initialize(Cluster.java:120) at org.apache.hadoop.mapreduce.Cluster.<init>(Cluster.java:82) at org.apache.hadoop.mapreduce.Cluster.<init>(Cluster.java:75) at org.apache.hadoop.mapreduce.Job$9.run(Job.java:1238) at org.apache.hadoop.mapreduce.Job$9.run(Job.java:1234) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Unknown Source) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1491) at org.apache.hadoop.mapreduce.Job.connect(Job.java:1233) at org.apache.hadoop.mapreduce.Job.submit(Job.java:1262) at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1286) at org.apache.hadoop.examples.WordCount.main(WordCount.java:84) DEBUG - Stopping client
解决： 拷入两个jar包 hadoop-mapreduce-client-common-2.2.0.jar hadoop-mapreduce-client-jobclient-2.2.0.jar

2.再次运行，以上错误解决，出现新的报错 Exception in thread "main" DEBUG - Stopping client DEBUG - IPC Client (152472387) connection to singlehadoop/192.168.0.160:8020 from Administrator: closed DEBUG - IPC Client (152472387) connection to singlehadoop/192.168.0.160:8020 from Administrator: stopped, remaining connections 0 java.lang.NoClassDefFoundError: org/apache/hadoop/yarn/util/Apps at java.lang.ClassLoader.defineClass1(Native Method) at java.lang.ClassLoader.defineClass(Unknown Source) at java.security.SecureClassLoader.defineClass(Unknown Source) at java.net.URLClassLoader.defineClass(Unknown Source) at java.net.URLClassLoader.access$100(Unknown Source) at java.net.URLClassLoader$1.run(Unknown Source) at java.net.URLClassLoader$1.run(Unknown Source) at java.security.AccessController.doPrivileged(Native Method) at java.net.URLClassLoader.findClass(Unknown Source) at java.lang.ClassLoader.loadClass(Unknown Source) at sun.misc.Launcher$AppClassLoader.loadClass(Unknown Source) at java.lang.ClassLoader.loadClass(Unknown Source) at org.apache.hadoop.mapred.LocalDistributedCacheManager.setup(LocalDistributedCacheManager.java:93) at org.apache.hadoop.mapred.LocalJobRunner$Job.<init>(LocalJobRunner.java:157) at org.apache.hadoop.mapred.LocalJobRunner.submitJob(LocalJobRunner.java:636) at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:430) at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1268) at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1265) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Unknown Source) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1491) at org.apache.hadoop.mapreduce.Job.submit(Job.java:1265) at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1286) at org.apache.hadoop.examples.WordCount.main(WordCount.java:84) Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.yarn.util.Apps at java.net.URLClassLoader$1.run(Unknown Source) at java.net.URLClassLoader$1.run(Unknown Source) at java.security.AccessController.doPrivileged(Native Method) at java.net.URLClassLoader.findClass(Unknown Source) at java.lang.ClassLoader.loadClass(Unknown Source) at sun.misc.Launcher$AppClassLoader.loadClass(Unknown Source) at java.lang.ClassLoader.loadClass(Unknown Source) ... 24 more
解决： 把share\hadoop\yarn下的jar包全引入工程

3.再次运行，以上错误解决，出现新的报错 Exception in thread "main" java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Z at org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Native Method) at org.apache.hadoop.io.nativeio.NativeIO$Windows.access(NativeIO.java:435) at org.apache.hadoop.fs.FileUtil.canRead(FileUtil.java:977) at org.apache.hadoop.util.DiskChecker.checkAccessByFileMethods(DiskChecker.java:177) at org.apache.hadoop.util.DiskChecker.checkDirAccess(DiskChecker.java:164) at org.apache.hadoop.util.DiskChecker.checkDir(DiskChecker.java:98) at org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.confChanged(LocalDirAllocator.java:285) at org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.getLocalPathForWrite(LocalDirAllocator.java:344) at org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:150) at org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:131) at org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:115) at org.apache.hadoop.mapred.LocalDistributedCacheManager.setup(LocalDistributedCacheManager.java:131) at org.apache.hadoop.mapred.LocalJobRunner$Job.<init>(LocalJobRunner.java:157) at org.apache.hadoop.mapred.LocalJobRunner.submitJob(LocalJobRunner.java:636) at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:430) at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1268) at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1265) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Unknown Source) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1491) at org.apache.hadoop.mapreduce.Job.submit(Job.java:1265) at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1286) at org.apache.hadoop.examples.WordCount.main(WordCount.java:84)
解决： 找到我们刚才下载过的hadoop-common-2.2.0-bin 这次直接来个彻底的，把整个下载的bin目录覆盖了$HADOOP_HOME\bin 并且在环境变量的PATH中加上%HADOOP_HOME%\bin

3.再次运行，以上错误解决，出现新的报错 Caused by: org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException): Permission denied: user=Administrator, access=WRITE, inode="/workspace/wordcount":casliyang:supergroup:drwxr-xr-x
解决： 在hdfs-site.xml中加入 <property> <name>dfs.permissions</name> <value>false</value> </property>

4.再次运行，程序顺利运行

总结： 1.将hadoop-2.2.0.tar.gz解压一份放到win7的程序目录下，注意hadoop版本一定要和集群的版本一致，然后拷贝集群中的以下几个配置文件覆盖到win7本地的对应目录： core-site.xml hdfs-site.xml mapred-site.xml yarn-site.xml
2.在eclipse中新建java工程后，最好直接引入所有hadoop2.2.0相关的jar包，包括以下几个目录下的jar包： share\hadoop\common share\hadoop\hdfs share\hadoop\mapreduce share\hadoop\yarn
注：如果使用hadoop的eclipse插件，就无需该步骤，但2.2.0的插件需自行编译，编译过程参见我的另一篇博客： http://blog.csdn.net/fulongfbi/article/details/23850575
3.需要在win7中设置环境变量%HADOOP_HOME%，并把%HADOOP_HOME%\bin加入PATH环境变量中
4.需要下载https://github.com/srccodes/hadoop-common-2.2.0-bin，解压后把下载的bin目录覆盖%HADOOP_HOME%\bin
5.注意参考hadoop集群的配置，Eclipse中的程序配置“hadoop地址：端口”的代码需和hadoop集群的配置一致 <property> <name>fs.default.name</name> <value>hdfs://singlehadoop:8020</value> </property>
6.在hadoop集群的hdfs-site.xml中加入如下属性，关闭权限校验。 <property> <name>dfs.permissions</name> <value>false</value> </property>

推荐文章：

【甘道夫】Win7环境下Eclipse连接Hadoop2.2.0