从HDFS上读取带lzo压缩的SequenceFile文件

使用java代码从HDFS上读取带lzo压缩的SequenceFile文件

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.*;

import java.io.*;
import java.util.*;

/**
 * Created by lcc on 17-7-31.
 */
public class readweibo {

   public static void main(String[] args) throws Exception {

       Configuration conf = new Configuration();

       Path seqFile = new Path("/weibo/01");
       conf.set("io.compression.codecs", "com.hadoop.compression.lzo.LzoCodec");
       conf.set("fs.default.name", "hdfs://lcc-desktop:9000");
       FileSystem fs = FileSystem.get(conf);
       FileStatus[] fileStatuses = fs.listStatus(seqFile);
       List<Integer> l = new LinkedList<Integer>();
       int tmp = 0;
       for (int j = 0; j < fileStatuses.length; j++) {
           SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(fileStatuses[j].getPath()));

           BytesWritable key = new BytesWritable();
           MapWritable value = new MapWritable();


           while (reader.next(key, value)) {
               //System.out.println(key);
               Set<Writable> vs = value.keySet();
               Iterator<Writable> it = vs.iterator();
               int i = 0;
               List<String> outkey=new LinkedList<String>();
               while (it.hasNext()) {
                   Writable wt=it.next();
                   outkey.add(wt.toString());

                   //System.out.println(weibokey.length);
                   //it.next();
                   i = i + 1;
               }
               if (!l.contains(i)) {
                   l.add(i);
               }
               tmp = tmp + 1;
               if(i==29)
               {
                   Iterator<String> ok=outkey.iterator();
                   while(ok.hasNext())
                   {
                       String oktmp=ok.next();
                       System.out.println(oktmp);
                   }
                   break;
               }
           }

       }
       //System.out.println(tmp);
       Iterator<Integer> itl = l.iterator();
       while (itl.hasNext()) {
           Integer itv = itl.next();
           //System.out.println(itv);
       }

   }
}
HDFS数据用MapReduce导入Hbase

HDFS数据用MapReduce导入Hbase

断网环境下利用pip安装Python离线安装包

断网环境下利用pip安装Python离线安装包