fabsta
3/6/2017 - 12:22 PM

hive cheat sheet

hive cheat sheet

Metadata

#Selecting a database   
USE database;   USE database;

#Listing databases  
SHOW DATABASES; SHOW DATABASES;

#Listing tables in a database   
SHOW TABLES;    SHOW TABLES;

#Describing the format of a table   
DESCRIBE table; 
DESCRIBE (FORMATTED|EXTENDED) table;

#Creating a database    
CREATE DATABASE db_name;    
CREATE external DATABASE db_name location '/user/demo/stats';

# Partition
CREATE external DATABASE 
(transdate Date,
transid Date)
db_name location '/user/demo/stats' Partitioned by (store String);
  ## column 'store' doesn't need to exist in create statement
show partitions TABLENAME;

# ORC
Create table states_orc STORED AS ORC TBLPROPERTIES("ORC.COMPRESS"="SNAPPY") as SELECT * FROM STATES;

#Dropping a database    
DROP DATABASE db_name;  DROP DATABASE db_name (CASCADE);

Table properties


Select

SELECT from_columns FROM table WHERE conditions;

Command line

#Run Query  
hive -e 'select a.col from tab1 a'

#Run Query Silent Mode  
hive -S -e 'select a.col from tab1 a'

#Set Hive Config Variables  
hive -e 'select a.col from tab1 a' -hiveconf hive.root.logger=DEBUG,console

#Use Initialization Script  
hive -i initialize.sql

#Run Non-Interactive Script 
hive -f script.sql


# Run script inside shell
source file_name

# Run ls (dfs) commands 
dfs –ls /user

# Run ls (bash command) from shell 
!ls

# Set configuration variables 
set mapred.reduce.tasks=32

# TAB auto completion 
set hive.<TAB>

# Show all variables starting with hive 
set

# Revert all variables 
reset

# Add jar to distributed cache 
add jar jar_path

# Show all jars in distributed cache 
list jars

# Delete jar from distributed cache 
delete jar jar_name