1. First 10 rows
SELECT * FROM recent_grads LIMIT 10
2. Only where Woman is majority
SELECT Major, ShareWomen FROM recent_grads
WHERE ShareWomen >= 0.5
LIMIT 10
3. Operators
Less than: <
Less than or equal to: <=
Greater than: >
Greater than or equal to: >=
Equal to: =
Not equal to: !=
4. First what majors in engineering women is majority and then check all info about them.
SELECT Major FROM recent_grads
WHERE Major_category = 'Engineering' AND ShareWomen > 0.5
SELECT * FROM recent_grads
WHERE Major_category = 'Engineering' AND ShareWomen > 0.5
5. Parenthesis use.
SELECT Major, Major_category, ShareWomen, Unemployment_rate FROM recent_grads
WHERE Major_category = 'Engineering' AND (ShareWomen > 0.5 OR Unemployment_rate < 0.051)
SELECT Major_category, Major, Unemployment_rate FROM recent_grads
WHERE (Major_category = 'Engineering' OR Major_category = 'Physical Sciences') AND Unemployment_rate < 0.5
ORDER BY Unemployment_rate
6. Order by (ascending vs descending order)
ORDER BY Unemployment_rate
ORDER BY Unemployment_rate DESC
7. Using Functions (aggregate functions)
Count, Min, Max, Sum, Total, AVG (when there's floating numbers)
SELECT COUNT(Major) FROM recent_grads
WHERE ShareWomen < 0.5
SELECT MIN(Median) FROM recent_grads
WHERE Major_category = 'Engineering'
8. Multiple Functions
SELECT MIN(Median), MAX(Median), SUM(Total)
FROM recent_grads
9. List of Aggregate Functions
APPROX_COUNT_DISTINCT
AVG
MIN MAX
SUM
COUNT
VAR
VARP
10. AS
SELECT COUNT(*) AS "Total Students" FROM recent_grads
*The column won't be renamed, just the SQL result table
<p>**You don't need to write AS and if a_b_c_ no need for ""
11. DISTINCT
SELECT DISTINCT Major_category FROM recent_grads
SELECT DISTINCT Major, Major_category FROM recent_grads
12. Counting Unique Values
SELECT COUNT(DISTINCT(Major_category)) unique_major_categories FROM recent_grads
13. Counting Unique Values Multiple Columns
SELECT COUNT(DISTINCT(Major)) unique_majors,
COUNT(DISTINCT(Major_category)) unique_major_categories,
COUNT(DISTINCT(Major_code)) unique_major_codes
FROM recent_grads
14. Mixing up til now
SELECT Major, Major_category, (P75th - P25th) quartile_spread
FROM recent_grads
ORDER BY quartile_spread DESC
LIMIT 20
15. GROUP BY
SELECT Major_category, SUM(Employed)
FROM recent_grads
GROUP BY Major_category;
16. HAVING
SELECT Major_category, AVG(Employed) / AVG(Total) AS share_employed
FROM recent_grads
GROUP BY Major_category
HAVING share_employed > .8;
*When using GROUP BY, you can't use WHERE to filter (because it isn't a column) so you have to use HAVING instead.
17. ROUND
SELECT Major_category, ROUND(ShareWomen, 2) AS rounded_share_women
FROM recent_grads;
*Much better to see the numbers
18. CAST
*SQL might round down automatically results, if for example A/B = 0.123, it might show 0. To avoid this, use CAST to conver to FLOAT
SELECT CAST(Women as Float) / CAST(Total as Float) FROM recent_grads
19. Subqueries
SELECT Major, Unemployment_rate FROM recent_grads
WHERE Unemployment_rate < (SELECT AVG(Unemployment_rate) FROM recent_grads)
ORDER BY Unemployment_rate
SELECT CAST(COUNT(*) as float)/CAST((SELECT COUNT(*) from recent_grads) as float) proportion_abv_avg from recent_grads
WHERE ShareWomen >
(SELECT AVG(ShareWomen) from recent_grads)
*Subquery above is a challenging one.
20. IN
SELECT Major, Major_category FROM recent_grads
WHERE Major_category IN ('Business', 'Engineering')
*IN means = A OR B
21. Não entendi o query abaixo
SELECT Major, Major_category FROM recent_grads
WHERE Major_category IN (select Major_category from recent_grads
group by Major_category
order by SUM(Total) DESC
limit 5)
JOINS
SELECT * FROM facts<p>
INNER JOIN cities ON cities.facts_id = facts.id<p>
LIMIT 10<p>
2. One more
SELECT c.*, f.name country_name FROM facts f<p>
INNER JOIN cities c ON c.facts_id = f.id<p>
LIMIT 5;<p>
#Note that FROM when working with INNER JOIN might be one or the other, it doesn't matter
#INNER JOIN only include mutual matches (if a table country and a table cities,
#only countries that have a city there and only cities that find a country)
LEFT JOIN
SELECT f.name country, f.population FROM facts f<p>
LEFT JOIN cities ON cities.facts_id = f.id<p>
WHERE cities.facts_id IS NULL<p>
JOINING 3 OR MORE DATABASES
# SELECT t.track_id, t.name track_name, m.name track_type, il.unit_price, il.quantity FROM invoice_line il
# INNER JOIN track t ON t.track_id = il.track_id
# INNER JOIN media_type m ON t.media_type_id = m.media_type_id
# WHERE il.invoice_id = 4
Example 1 - Top 10 most sold artists
# SELECT
# ta.artist_name artist,
# COUNT(*) tracks_purchased
# FROM invoice_line il
# INNER JOIN (
# SELECT
# t.track_id,
# ar.name artist_name
# FROM track t
# INNER JOIN album al ON al.album_id = t.album_id
# INNER JOIN artist ar ON ar.artist_id = al.artist_id
# ) ta
# ON ta.track_id = il.track_id
# GROUP BY 1
# ORDER BY 2 DESC LIMIT 10;
Example 2 - Top 5 most sold albums
# SELECT
# ta.album_title album,
# ta.artist_name artist,
# COUNT(*) tracks_purchased
# FROM invoice_line il
# INNER JOIN (
# SELECT
# t.track_id,
# al.title album_title,
# ar.name artist_name
# FROM track t
# INNER JOIN album al ON al.album_id = t.album_id
# INNER JOIN artist ar ON ar.artist_id = al.artist_id
# ) ta
# ON ta.track_id = il.track_id
# GROUP BY 1, 2
# ORDER BY 3 DESC LIMIT 5;
Recursive Join and Concatatenation
# SELECT
# e1.first_name || " " || e1.last_name employee_name,
# e1.title employee_title,
# e2.first_name || " " || e2.last_name supervisor_name,
# e2.title supervisor_title
# FROM employee e1
# LEFT JOIN employee e2 ON e1.reports_to = e2.employee_id
# ORDER BY 1;
WITH (you create a temporary subquery)
# WITH playlist_info AS
# (
# SELECT
# p.playlist_id,
# p.name playlist_name,
# t.name track_name,
# (t.milliseconds / 1000) length_seconds
# FROM playlist p
# LEFT JOIN playlist_track pt ON pt.playlist_id = p.playlist_id
# LEFT JOIN track t ON t.track_id = pt.track_id
# )
# SELECT
# playlist_id,
# playlist_name,
# COUNT(track_name) number_of_tracks,
# SUM(length_seconds) length_seconds
# FROM playlist_info
# GROUP BY 1, 2
# ORDER BY 1;
VIEW (You create a permanent subquery and you can reuse it)
- Once a view is created it acts exactly like a table
# CREATE VIEW chinook.customer_2 AS
# SELECT * FROM chinook.customer;
# #Then if you want to edit it, you have to delete and create a new one
# DROP VIEW chinook.customer_2;
Remember!<p>
- you will use HAVING instead of WHERE when you create a virtual table (group by, sum), that is, when you use functions
# CREATE VIEW chinook.customer_gt_90_dollars AS
# SELECT c.*
# FROM chinook.invoice i
# INNER JOIN chinook.customer c ON i.customer_id = c.customer_id
# GROUP BY 1
# HAVING SUM(i.total) > 90;
# SELECT * FROM chinook.customer_gt_90_dollars;
UNION (different type of JOIN)
# SELECT * FROM customer_usa
# UNION
# SELECT * FROM customer_gt_90_dollars