调试加安装了半天,怎么也没有配置好怎么通过Eclipse直接连接hdfs,最后我还是打成一个jar包放到Linux虚拟机中运行的。
运行命令Java -jar XXX.jar.
当中对hdfs的操作比較简单,主要就FileSystem这一个类,这个东西搞懂了,你对通过程序进行对hdfs的操作自然而然的也就很熟练了。
以下我简单的举一个简单的从hdfs上读取文件内容的样例。大家分享一下。
package com.pzoom.hdfs;import java.io.BufferedInputStream;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.io.OutputStream;import java.net.URI;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataInputStream;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IOUtils;import org.apache.hadoop.util.Progressable;public class PutFileToHdfs { /** * 从HDFS上读取文件 */ private static void readFromHdfs() throws FileNotFoundException, IOException { String dst = "hdfs://ubuntu:9000/"; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(dst), conf); String path = "/README.txt"; FSDataInputStream hdfsInStream = fs.open(new Path(path)); IOUtils.copyBytes(hdfsInStream, System.out, conf, true);/* OutputStream out = new FileOutputStream("/home/chenlongquan/output"); byte[] ioBuffer = new byte[1024]; int readLen = hdfsInStream.read(ioBuffer); while (-1 != readLen) { out.write(ioBuffer, 0, readLen); readLen = hdfsInStream.read(ioBuffer); } out.close(); hdfsInStream.close(); fs.close();*/ } /** * main函数 * * @param args * @throws Exception */ public static void main(String[] args) throws Exception { try { //uploadToHdfs(); readFromHdfs(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { } }}