razhangwei
3/3/2015 - 2:25 PM

Hive: get mention

Hive: get mention

-- @param: s_reg, p_reg, o_reg
-- @param: target_date


set delta_day=15;
set start_time=date_sub(${hiveconf:target_date}, ${delta_day});
set end_time=date_add(${hiveconf:target_date}, ${delta_day});
set start_year=year(${start_time});
set start_month=month(${start_time});
set start_day=day(${start_time});
set end_year=year(${end_time});
set end_month=month(${end_time});
set end_day=day(${end_time});

DROP TABLE IF EXISTS tt_spo;

CREATE TABLE tt_spo
as SELECT  id_str, text, created_at
from gh_rc2
where (year>${hiveconf:start_year} OR (year=${hiveconf:start_year} AND (month> ${hiveconf:start_month} OR month=${hiveconf:start_month} AND day >= ${hiveconf:start_day})))
	AND (year<${hiveconf:end_year} OR (year=${hiveconf:end_year} AND (month< ${hiveconf:end_month} OR month=${hiveconf:end_month} AND day <= ${hiveconf:end_day})))	 
	AND (lower(text) RLIKE ${hiveconf:s_reg} OR lower(text) RLIKE ${hiveconf:p_reg} OR lower(text) RLIKE ${hiveconf:o_reg});


DROP VIEW IF EXISTS view_s;
DROP VIEW IF EXISTS view_p;
DROP VIEW IF EXISTS view_o;

CREATE VIEW view_s
AS SELECT *
FROM tt_spo
WHERE lower(text) RLIKE ${hiveconf:s_reg} AND created_at IS NOT NULL;

CREATE VIEW view_p
AS SELECT *
FROM tt_spo
WHERE lower(text) RLIKE ${hiveconf:p_reg} AND created_at IS NOT NULL;

CREATE VIEW view_o
AS SELECT *
FROM tt_spo
WHERE lower(text) RLIKE ${hiveconf:o_reg} AND created_at IS NOT NULL;

-- Save the tabls

set hive.cli.print.header=true;

SELECT *
FROM view_s;

SELECT *
FROM view_p;


SELECT *
FROM view_o;


-- Join the table and save
SELECT *
FROM view_s 
where lower(text) RLIKE ${hiveconf:p_reg};

SELECT *
FROM view_s 
where lower(text) RLIKE ${hiveconf:o_reg};

SELECT *
FROM view_p
where lower(text) RLIKE ${hiveconf:o_reg};

SELECT *
FROM view_s 
where lower(text) RLIKE ${hiveconf:p_reg} AND lower(text) RLIKE ${hiveconf:o_reg};