create external table test.hb_range_keys(rowkey_range_start string)
row format serde 'org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe'
stored as inputformat 'org.apache.hadoop.mapred.TextInputFormat'
outputformat 'org.apache.hadoop.hive.ql.io.HiveNullValueSequenceFileOutputFormat'
location '/tmp/hbase_splits/region5';--指定数据存储目录,接下来的步骤会用到
add jar hdfs://hdfs_url/user/hive/jar/hive-contrib-2.1.1.jar; create temporary function row_sequence as 'org.apache.hadoop.hive.contrib.udf.UDFRowSequence'; select row_key from ( select row_key,row_sequence() as seq from test_db.test_data_table tablesample(bucket 1 out of 10000 on row_key) s order by row_key limit 10000000 ) x where (seq % 910000)=0 order by row_key limit 11;
insert overwrite table test.hb_range_keys select c2 from (select concat_ws(",",'2','4','6','8') c1) tlateral view explode(split(c1,',')) num as c2 ;
--添加相关jar包
add jar hdfs://hdfs_url/user/hive/jar/hbase-common-1.2.4.jar;
add jar hdfs://hdfs_url/user/hive/jar/hive-hbase-handler-2.1.1.jar;
add jar hdfs://hdfs_url/user/hive/jar/hbase-server-1.2.4.jar;
add jar hdfs://hdfs_url/user/hive/jar/hbase-client-1.2.4.jar;
add jar hdfs://hdfs_url/user/hive/jar/hbase-protocol-1.2.4.jar;
set hive.execution.engine=mr;
--reduce数为分区数+1
set mapred.reduce.tasks=6;
set hive.mapred.partitioner=org.apache.hadoop.mapred.lib.TotalOrderPartitioner;
--指定上一步骤生成的分区键文件地址
set mapreduce.totalorderpartitioner.path=hdfs://hdfs_url/tmp/hbase_splits/region5/000000_0;
set hfile.compression=snappy;--指定snapp压缩
create table test.hbsort(
row_key string,
column1 string,
column2 string,
column3 string,
column4 string )
stored as INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.hbase.HiveHFileOutputFormat'
TBLPROPERTIES ('hfile.family.path' = '/tmp/hbsort/info');
insert overwrite table test.hbsort
select
row_key,
column1,
column2,
column3,
column4
from test_db.test_date
cluster by row_key;
##执行shell命令
hadoop jar /usr/local/service/hbase/lib/hbase-server-1.2.4.jar completebulkload hdfs://hdfs_url/tmp/hbsort/ test:bulk_test
##将hbase相关jar包加到环境变量中
export HADOOP_CLASSESPATH=/user/loacal/service/hbase/lib/*
##将hbase配置文件加到hadoop环境中
hbase的hbase-site.xml文件添加到hadoop的etc配置文件夹中
欢迎光临 168大数据 (http://www.bi168.cn/) | Powered by Discuz! X3.2 |