t.order_id | t.order_time | t.user_id | t.order_status | t.start_date | t.end_date | t.dt | |
1 | 05 | 2018-01-01 07:10:31 | 86A08095B2 | 已完成 | 20180104 | 99991231 | 20180104 |
2 | C3 | 2018-01-02 17:15:01 | 5FECEB66FF | 待收货 | 20180104 | 99991231 | 20180104 |
3 | EF | 2018-01-01 18:41:00 | BB89D486A3 | 已完成 | 20180104 | 99991231 | 20180104 |
t.order_id | t.order_time | t.user_id | t.order_status | t.start_date | t.end_date | t.dt | |
1 | 05 | 2018-01-01 07:10:31 | 86A08095B2 | 待收货 | 20180103 | 20180103 | 20180104 |
2 | C3 | 2018-01-02 17:15:01 | 5FECEB66FF | 已支付 | 20180102 | 20180103 | 20180104 |
3 | EF | 2018-01-01 18:41:00 | BB89D486A3 | 待收货 | 20180103 | 20180103 | 20180104 |
CREATE EXTERNAL TABLE `tmp_order`(
`order_id` string COMMENT '订单ID',
`order_time` string COMMENT '下单时间',
`user_id` string COMMENT '用户ID',
`order_status` string COMMENT '订单状态')
PARTITIONED BY (
`dt` string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE
LOCATION
'hdfs://ns1/tmp/order';
ALTER TABLE tmp_order ADD IF NOT EXISTS partition(dt='20180101') LOCATION '20180101';
ALTER TABLE tmp_order ADD IF NOT EXISTS partition(dt='20180102') LOCATION '20180102';
ALTER TABLE tmp_order ADD IF NOT EXISTS partition(dt='20180103') LOCATION '20180103';
[mapred@gateway2 ~]$ hdfs dfs -cat /tmp/order/20180101/20180101.txt
052018-01-01 07:10:3186A08095B2待支付20180101
EF2018-01-01 18:41:00BB89D486A3已支付20180101
[mapred@gateway2 ~]$ hdfs dfs -cat /tmp/order/20180102/20180102.txt
052018-01-01 07:10:3186A08095B2已支付20180102
C32018-01-02 17:15:015FECEB66FF已支付20180102
[mapred@gateway2 ~]$ hdfs dfs -cat /tmp/order/20180103/20180103.txt
052018-01-01 07:10:3186A08095B2待收货20180103
EF2018-01-01 18:41:00BB89D486A3待收货20180103
t.order_id | t.order_time | t.user_id | t.order_status | t.dt | |
1 | 05 | 2018-01-01 07:10:31 | 86A08095B2 | 待支付 | 20180101 |
2 | EF | 2018-01-01 18:41:00 | BB89D486A3 | 已支付 | 20180101 |
3 | 05 | 2018-01-01 07:10:31 | 86A08095B2 | 已支付 | 20180102 |
4 | C3 | 2018-01-02 17:15:01 | 5FECEB66FF | 已支付 | 20180102 |
5 | 05 | 2018-01-01 07:10:31 | 86A08095B2 | 待收货 | 20180103 |
6 | EF | 2018-01-01 18:41:00 | BB89D486A3 | 待收货 | 20180103 |
订单ID | 下单时间 | 用户ID | 订单状态 | 分区key |
order_id | order_time | user_id | order_status | dt |
CREATE EXTERNAL TABLE `tmp_order_detail`(
`order_id` string COMMENT '订单ID',
`order_time` string COMMENT '下单时间',
`user_id` string COMMENT '用户ID',
`order_status` string COMMENT '订单状态',
`start_date` string COMMENT '开始生效日期',
`end_date` string COMMENT '失效日期')
PARTITIONED BY (
`dt` string)
STORED AS PARQUET
LOCATION
'hdfs://ns1/tmp/order_detail';
INSERT OVERWRITE TABLE tmp_order_detail PARTITION (dt='20180103')
SELECT
order_id,order_time,user_id,order_status,dt AS start_date,
regexp_replace (
split (
LAG (
date_add(
from_unixtime(
unix_timestamp(dt, 'yyyyMMdd')
),
- 1
),
1,
'9999-12-31 00:00:00'
) OVER (
PARTITION BY order_id
ORDER BY
dt DESC
),
' '
)[0], '-',
''
) AS end_date
FROM
tmp_order
WHERE dt BETWEEN '20180101' AND '20180103'
t.order_id | t.order_time | t.user_id | t.order_status | t.start_date | t.end_date | t.dt | |
1 | 05 | 2018-01-01 07:10:31 | 86A08095B2 | 待收货 | 20180103 | 99991231 | 20180103 |
2 | 05 | 2018-01-01 07:10:31 | 86A08095B2 | 已支付 | 20180102 | 20180102 | 20180103 |
3 | 05 | 2018-01-01 07:10:31 | 86A08095B2 | 待支付 | 20180101 | 20180101 | 20180103 |
4 | C3 | 2018-01-02 17:15:01 | 5FECEB66FF | 已支付 | 20180102 | 99991231 | 20180103 |
5 | EF | 2018-01-01 18:41:00 | BB89D486A3 | 待收货 | 20180103 | 99991231 | 20180103 |
6 | EF | 2018-01-01 18:41:00 | BB89D486A3 | 已支付 | 20180101 | 20180102 | 20180103 |
[mapred@gateway2 ~]$ hdfs dfs -cat /tmp/order/20180104/20180104.txt
052018-01-01 07:10:3186A08095B2已完成20180104
C32018-01-02 17:15:015FECEB66FF待收货20180104
EF2018-01-01 18:41:00BB89D486A3已完成20180104
INSERT OVERWRITE TABLE tmp_order_detail PARTITION (dt='20180104')
SELECT
order_id,order_time,user_id,order_status,start_date,
regexp_replace (
split (
LAG (
date_add(
from_unixtime(
unix_timestamp(start_date, 'yyyyMMdd')
),
- 1
),
1,
'9999-12-31 00:00:00'
) OVER (
PARTITION BY order_id
ORDER BY
start_date DESC
),
' '
)[0], '-',
''
) AS end_date
FROM
(
SELECT
order_id,order_time,user_id,order_status,dt AS start_date
FROM
tmp_order
WHERE dt = '20180104'
UNION ALL
SELECT
order_id,order_time,user_id,order_status,start_date
FROM
tmp_order_detail
WHERE dt = '20180103'
) tmp
t.order_id | t.order_time | t.user_id | t.order_status | t.start_date | t.end_date | t.dt | |
1 | 05 | 2018-01-01 07:10:31 | 86A08095B2 | 已完成 | 20180104 | 99991231 | 20180104 |
2 | 05 | 2018-01-01 07:10:31 | 86A08095B2 | 待收货 | 20180103 | 20180103 | 20180104 |
3 | 05 | 2018-01-01 07:10:31 | 86A08095B2 | 已支付 | 20180102 | 20180102 | 20180104 |
4 | 05 | 2018-01-01 07:10:31 | 86A08095B2 | 待支付 | 20180101 | 20180101 | 20180104 |
5 | C3 | 2018-01-02 17:15:01 | 5FECEB66FF | 待收货 | 20180104 | 99991231 | 20180104 |
6 | C3 | 2018-01-02 17:15:01 | 5FECEB66FF | 已支付 | 20180102 | 20180103 | 20180104 |
7 | EF | 2018-01-01 18:41:00 | BB89D486A3 | 已完成 | 20180104 | 99991231 | 20180104 |
8 | EF | 2018-01-01 18:41:00 | BB89D486A3 | 待收货 | 20180103 | 20180103 | 20180104 |
9 | EF | 2018-01-01 18:41:00 | BB89D486A3 | 已支付 | 20180101 | 20180102 | 20180104 |
欢迎光临 168大数据 (http://www.bi168.cn/) | Powered by Discuz! X3.2 |