laika222
4/21/2017 - 4:37 PM

Shows how order of operations can make a query much more efficient. This example shows how to complete a query that identifies people with a

Shows how order of operations can make a query much more efficient. This example shows how to complete a query that identifies people with a certain count (hits >= 500), and joins those rows with data from a second table (the full name of the player). There are three main parts to the query - grouping by ID, identifying the grouped players 500+ via the counts, and then joining the secondary information from the second table. This can be done using a single query that 1) joins both tables, 2) then groups by ID, 3) then identifies the people based on the counts, but that gets the results very inefficiently (it joins two entire tables, one of which is very large). Instead, this procedure shows how to make it much more efficient by changing the order - 1) grouping, 2) identifying by counts, 3) joining just the people who were identified in the counting step (300 rather than 300,000 in the single query method). Since this requires two separate SELECT statements, it must be a procedure using a temporary table.

-- A related procedure that allows the user to enter a year, and that year is then plugged into the SELECT statment that pulls the home run listings. For instance, if the user puts in 1959, it will pull the home run leaders for 1959.

DELIMITER $

DROP PROCEDURE IF EXISTS homeruns $
DROP TABLE IF EXISTS results $
CREATE PROCEDURE homeruns(p_enter_year varchar(4))

BEGIN

CREATE TABLE results (
eventID INT NOT NULL AUTO_INCREMENT,
BAT_ID VARCHAR(8),
HOME_RUN_COUNT INT(11),
PRIMARY KEY (eventID)
);

INSERT INTO results (BAT_ID, HOME_RUN_COUNT)
  SELECT  BAT_ID, COUNT(EVENT_CD) FROM events WHERE GAME_ID LIKE CONCAT('%', p_enter_year, '%') AND EVENT_CD = 23 GROUP BY BAT_ID;

SELECT DISTINCT a.BAT_ID, b.LAST_NAME_TX, b.FIRST_NAME_TX, (SELECT FORMAT(a.HOME_RUN_COUNT,0)) AS 'Number of Home Runs'
  FROM results a
  JOIN rosters b
  ON a.BAT_ID = b.PLAYER_ID
  GROUP BY a.BAT_ID
  ORDER BY a.HOME_RUN_COUNT;

DROP TABLE results ;

END $

DELIMITER ;

call homeruns(1959);



-- DETAIL!!!


DELIMITER $

-- Drop procedure homeruns if it already exists (so you can continually update it while testing), and drop the temporary results table if it already exists.

DROP PROCEDURE IF EXISTS homeruns $
DROP TABLE IF EXISTS results $

-- Create the procedure homeruns with one parameter, p_enter_year, which the users will use to input the year that she would like to view the home runs totals.

CREATE PROCEDURE homeruns(p_enter_year varchar(4))


-- Begin the procedure.

BEGIN

-- Creates results table which will temporarily hold the results of the first query

CREATE TABLE results (
eventID INT NOT NULL AUTO_INCREMENT,
BAT_ID VARCHAR(8),
HOME_RUN_COUNT INT(11),
PRIMARY KEY (eventID)
);

-- Inserts into the results table, into the BAT_ID and HOME_RUN_COUNT columns, the two column query results from the line below, WHERE GAME_ID contains the string value of p_enter_year entered by the user upon calling the procedure (apparently you must use CONCAT to place the string in-between the % wilcards at the beginning and end - for instance, if the person enters 1959, the CONCAT will results in '%1959%'.

INSERT INTO results (BAT_ID, HOME_RUN_COUNT)
  SELECT  BAT_ID, COUNT(EVENT_CD) FROM events WHERE GAME_ID LIKE CONCAT('%', p_enter_year, '%') AND EVENT_CD = 23 GROUP BY BAT_ID;

-- Selects the results placed into the results table, and then JOINs the LAST_NAME and FIRST_NAME from the rosters table, and FORMATs the HOME_RUN_COUNT column to include comma separators, and uses an alias of 'Number of Home Runs'

SELECT DISTINCT a.BAT_ID, b.LAST_NAME_TX, b.FIRST_NAME_TX, (SELECT FORMAT(a.HOME_RUN_COUNT,0)) AS 'Number of Home Runs'
  FROM results a
  JOIN rosters b
  ON a.BAT_ID = b.PLAYER_ID
  GROUP BY a.BAT_ID
  ORDER BY a.HOME_RUN_COUNT;

-- Drops the temporary results table

DROP TABLE results ;

-- CALLs the procedure

END $

DELIMITER ;

-- CALLs the procedure, and places in value of 1959. The procedure will then pull the list of home runs by player for the year 1959.

CALL homeruns(1959);
-- Procedure to pull all players with 500 or more total bases during the decade of the 1950's, and order them by number of total bases.
DELIMITER $

DROP PROCEDURE IF EXISTS hits_500 $
DROP TABLE IF EXISTS results $
CREATE PROCEDURE hits_500()

BEGIN

CREATE TABLE results (
eventID INT NOT NULL AUTO_INCREMENT,
BAT_ID VARCHAR(8),
HIT_COUNT INT(11),
PRIMARY KEY (eventID)
);

INSERT INTO results (BAT_ID, HIT_COUNT)
  SELECT BAT_ID, COUNT(EVENT_CD) FROM events WHERE EVENT_CD IN (20, 21, 22, 23) GROUP BY BAT_ID HAVING COUNT(EVENT_CD) >=500;

SELECT DISTINCT a.BAT_ID, b.LAST_NAME_TX, b.FIRST_NAME_TX, (SELECT FORMAT(a.HIT_COUNT,0)) AS 'Number of Hits'
  FROM results a
  JOIN rosters b
  ON a.BAT_ID = b.PLAYER_ID
  ORDER BY a.HIT_COUNT;

DROP TABLE results ;

END $

DELIMITER ;

call hits_500();


-- DETAIL!!!


DELIMITER $

-- Drop procedure hits_500 if it already exists (so you can continually update it while testing), and drop the temporary results table if it already exists.
DROP PROCEDURE IF EXISTS hits_500 $
DROP TABLE IF EXISTS results $

-- Create procedure hits_500 with no parameters
CREATE PROCEDURE hits_500()

-- Begin the procedure
BEGIN

-- Creates results table which will temporarily hold the results of the first query
CREATE TABLE results (
eventID INT NOT NULL AUTO_INCREMENT,
BAT_ID VARCHAR(8),
HIT_COUNT INT(11),
PRIMARY KEY (eventID)
);

-- Inserts into the results table, into the BAT_ID and HOME_RUN_COUNT columns, the two column query results from the line below, WHERE GAME_ID contains the string value of p_enter_year entered by the user upon calling the procedure (apparently you must use CONCAT to place the string in-between the % wilcards at the beginning and end - for instance, if the person enters 1959, the CONCAT will results in '%1959%'.
INSERT INTO results (BAT_ID, HIT_COUNT)
  SELECT BAT_ID, COUNT(EVENT_CD) FROM events WHERE EVENT_CD IN (20, 21, 22, 23) GROUP BY BAT_ID HAVING COUNT(EVENT_CD) >=500;

-- Selects the results placed into the results table, and then JOINs the LAST_NAME and FIRST_NAME from the rosters table, and FORMATs the HOME_RUN_COUNT column to include comma separators, and uses an alias of 'Number of Home Runs'
SELECT DISTINCT a.BAT_ID, b.LAST_NAME_TX, b.FIRST_NAME_TX, (SELECT FORMAT(a.HIT_COUNT,0)) AS 'Number of Hits'
  FROM results a
  JOIN rosters b
  ON a.BAT_ID = b.PLAYER_ID
  ORDER BY a.HIT_COUNT;

-- Drops the temporary results table
DROP TABLE results ;

-- ENDs the procedure
END $

DELIMITER ;

-- CALLs the procedure
CALL hits_500();