Structured Query Language (SQL) IBM RESEARCH PROJECT (SEQUEL) –E.F. CODD, JUNE 70, ACM –CONTINUED RESEARCH THROUGH 70S –CLONE MAKER BANDWAGON AMERICAN NATIONAL STANDARDS INSTITUTE –ANSI SQL-86, ANSI SQL-89, ANSI SQL-92 or SQL-2 Products providing SQL include: –Oracle from Oracle Corporation –SQL Server from Microsoft –SQLBase from Sybase –Ingres from Relational Technologies –DB2 Universal Database from IBM
SQL 의 구성 DDL(Data Definition Language) DML(Data Manipulation Language) –Interactive Embedded DML –C, Java 와 같은 일반언어 프로그램에서 DB 작업 View Definition Authorization –User, Group, 권한 Integrity – 제약 조건 지정 Transaction Control
DDL: Data definition language CREATE TABLE DROP TABLE ALTER TABLE CREATE INDEX DROP INDEX CREATE VIEW DROP VIEW
CREATE TABLE CREATE TABLE table_name ( attribute_name data-type [NOT NULL] [Unique],... PRIMARY KEY (attribute_name), PRIMARY KEY (attribute_name),... FOREIGN KEY (attribute_name) REFERENCES table_name, FOREIGN KEY (attribute_name) REFERENCES table_name ) ;
CREATE TABLE CREATE TABLE Employee ( EID INTEGER NOT NULL UNIQUE, PERHOUR DECIMAL(5,2) NOT NULL, ADDRESS CHAR(40), ZIP INTEGER, JOBTITLE CHAR(20), PRIMARY KEY (EID) ); CREATE TABLE Timesheets ( TID INTEGER NOT NULL UNIQUE, EID INTEGER, DATEWORKED DATE, HOURSWORKED INTEGER, PRIMARY KEY (TID), FOREIGN KEY (EID) REFERENCES EMPLOYEE );
DML: Data manipulation language DELETE UPDATE INSERT SELECT
SELECTION SELECT column_names, formulas,... FROM table_names WHERE row conditions are true ORDER BY column_name [ ASC | DESC ] ;
Selection 과 Relational Algebra Select A1, A2, …, An From R1, R2, …, Rm Where P 는 다음 RA 수식과 같다 Project A1, A2, …, An ( Selection P ( R1 X R2 X … X Rm ) –A1, A1, …, An 은 테이블들의 Attribute 로 중복되는 경우 Ri.Aj 로 테이블명의 명시 –P 는 조건식 필요한 경우 AND, OR, NOT 을 이용 –R1, R2,.., Rm 은 테이블 이름
SELECT ENAME, PERHOUR FROM EMPLOYEE WHERE CITY = ‘AUSTIN’ AND STATE = ‘TX’ ORDER BY ENAME ; SELECTION
SELECT AVG( PERHOUR ), MAX( PERHOUR ), MIN ( PERHOUR ) FROM EMPLOYEE ; SELECT COUNT( * ) FROM EMPLOYEE WHERE CITY = ‘AUSTIN’ AND STATE = ‘TX’ ; AGGREGATION (numeric functions)
SELECTION SELECT column_names, formulas,... FROM table_names WHERE row conditions are true GROUP BY column_names HAVING group conditions are true ORDER BY sort order ; * column_names should be matched.
SELECT AVG( PERHOUR ) FROM EMPLOYEE ; SELECT STATE, AVG( PERHOUR ) FROM EMPLOYEE GROUP BY STATE ; BY STATE
SELECT AVG( PERHOUR ) FROM EMPLOYEE ; SELECT STATE, AVG( PERHOUR ) FROM EMPLOYEE GROUP BY STATE ; SELECT CITY, STATE, AVG( PERHOUR ) FROM EMPLOYEE GROUP BY CITY, STATE ; BY CITY
SMALL GROUPS ? SELECT CITY, STATE, AVG( PERHOUR ) FROM EMPLOYEE GROUP BY CITY, STATE ; SELECT CITY, STATE, AVG( PERHOUR ) FROM EMPLOYEE GROUP BY CITY, STATE HAVING COUNT( * ) >= 3 ;
SELECT CITY, COUNT( * ) FROM EMPLOYEE WHERE STATE = ‘TX’ GROUP BY CITY HAVING COUNT( * ) >= 3 ; ROW & GROUP CONDITIONS
SELECT CITY, COUNT( * ) FROM EMPLOYEE WHERE STATE = ‘TX’ GROUP BY CITY HAVING COUNT( * ) >= 3 ; ELIMINATES ELIMINATES NON-TX ROWS ROW & GROUP CONDITIONS
SELECT CITY, COUNT( * ) FROM EMPLOYEE WHERE STATE = ‘TX’ GROUP BY CITY HAVING COUNT( * ) >= 3 ; ELIMINATES ELIMINATES SMALL GROUPS ELIMINATES ELIMINATES NON-TX ROWS ROW & GROUP CONDITIONS
SELECT ENAME, DATEWORKED, HOURSWORKED, PERHOUR FROM EMPLOYEE, TIMESHEETS WHERE EMPLOYEE.EID = TIMESHEETS.EID ORDER BY ENAME, DATEWORKED ; EMPLOYEETIMESHEETS REPORT JOINING TABLES
SELECT ENAME, DATEWORKED, HOURSWORKED, PERHOUR FROM EMPLOYEE {INNER | LEFT | RIGHT} JOIN TIMESHEETS ON EMPLOYEE.EID = TIMESHEETS.EID ORDER BY ENAME, DATEWORKED ; JOINING TABLES (Inner/left/right join)
SELECT ENAME, DATEWORKED, HOURSWORKED, PERHOUR FROM EMPLOYEE, TIMESHEETS WHERE EMPLOYEE.EID = TIMESHEETS.EID ORDER BY ENAME, DATEWORKED ; SMITH200X SMITH200X SMITH200X WILSON200X WILSON200X WILSON200X WILSON200X
SELECT ENAME, DATEWORKED, HOURSWORKED, PERHOUR, HOURSWORKED * PERHOUR AS PAY FROM EMPLOYEE, TIMESHEETS WHERE EMPLOYEE.EID = TIMESHEETS.EID ORDER BY ENAME, DATEWORKED ; SMITH200X SMITH200X SMITH200X WILSON200X WILSON200X WILSON200X WILSON200X
SELECT ENAME, SUM( HOURSWORKED * PERHOUR ) FROM EMPLOYEE, TIMESHEETS WHERE EMPLOYEE.EID = TIMESHEETS.EID GROUP BY TIMESHEETS.EID, ENAME ORDER BY ENAME ; SMITH WILSON
WHICH REPAIRPERSON HAS THE HIGHEST HOURLY RATE ? ( 1 ) Find the highest HOURLY RATE ( 2 ) Identify REPAIRPERSON(S) with that RATE MULTI-STEP QUERY
(1) Find the highest rate SELECT MAX( PERHOUR ) FROM EMPLOYEE WHERE JOBTITLE = ‘REPAIR’ ; $ 26.50
(2) Identify matches SELECT ENAME FROM EMPLOYEE WHERE JOBTITLE = ‘REPAIR’ AND PERHOUR = ; JACKSON, JEFF MITCHELL, TOM
TWO STEPS SELECT MAX( PERHOUR ) FROM EMPLOYEE WHERE JOBTITLE = ‘REPAIR’ ; SELECT ENAME FROM EMPLOYEE WHERE JOBTITLE = ‘REPAIR’ AND PERHOUR = ;
SUBQUERIES SELECT ENAME FROM EMPLOYEE WHERE JOBTITLE = ‘REPAIR’ AND PERHOUR = ( SELECT MAX( PERHOUR ) FROM EMPLOYEE WHERE JOBTITLE = ‘REPAIR’ ) ;
FIND THE NAMES OF ALL SALESPEOPLE WHO SOLD AT LEAST ONE VEHICLE FOR MORE THAN $50,000 LAST MONTH QUERY OPTIMIZATION
FIND THE NAMES OF ALL SALESPEOPLE WHO SOLD AT LEAST ONE VEHICLE FOR MORE THAN 50,000 LAST MONTH TABLES: –EMPLOYEE ( ENAME, EID,... ) –SALES ( EID, VID, CID, SID, PRICE, SDATE,... )
SELECT ENAME FROM EMPLOYEE, SALES WHERE EMPLOYEE.EID = SALES.EID AND PRICE >= AND month(SDATE) = month(date())-1 ; SELECTENAME FROMEMPLOYEE WHEREEID IN ( SELECT DISTINCT EID FROM SALES WHERE PRICE >= AND month(SDATE) = month(date())-1);
100 EMPLOYEE 2,000 SALES 200,000 EMPLOYEE-SALES 200,000 EMPLOYEE-SALES CROSS PRODUCT RECORDS 5 ENAMES
SELECT ENAME FROM EMPLOYEE, SALES WHERE EMPLOYEE.EID = SALES.EID AND PRICE >= AND month(SDATE) = month(date())-1 ; 200,000 “RECORDS” ARE CHECKED AN INDEX ON FOREIGN KEY, SALES.EID, HELPS
SELECTENAME FROMEMPLOYEE WHEREEID IN ( SELECT DISTINCT EID FROM SALES WHERE PRICE >= AND month(SDATE) = month(date())-1); 2,000 RECORDS ARE CHECKED TO GENERATE LIST OF 8 EIDS ( 5 UNIQUE )
SELECTENAME FROMEMPLOYEE WHEREEID IN ( 1234, 2345, 3456,..., 6789 ) ; 500 “RECORDS” ARE CHECKED TO GENERATE LIST OF 5 NAMES
SELECTENAME FROMEMPLOYEE WHEREEID IN ( SELECT DISTINCT EID FROM SALES WHERE PRICE >= AND month(SDATE) = month(date())-1); 2,000 RECORDS CHECKED IN SUBQUERIES 500 RECORDS CHECKED IN MAIN SELECT 500 RECORDS CHECKED IN MAIN SELECT
DELETION DELETE FROM table_name WHERE condition is true ; DELETE FROM EMPLOYEE WHERE EID = ; DELETE FROM EMPLOYEE WHERE ENAME = ‘SMITH, WILLIAM’ ;
MODIFICATIONS UPDATE table_name SETcolumn_name = expression, column_name = expression,... WHERE condition is true ;
UPDATE EMPLOYEE SETADDRESS = ‘4321 AVENUE J’, CITY = ‘AUSTIN’, STATE = ‘TX’, ZIP = WHERE EID = ; MODIFICATIONS
UPDATE EMPLOYEE SETADDRESS = ‘4321 AVENUE J’, CITY = ‘AUSTIN’, STATE = ‘TX’, ZIP = WHERE EID = ; UPDATE EMPLOYEE SET PERHOUR = 6.75 WHERE PERHOUR < 6.75 ; MODIFICATIONS
INSERTION INSERT INTO table_name ( column_list ) VALUES ( data_definition ) ; INSERT INTO table_name ( column_list ) SELECT statement ;
INSERT INTO EMPLOYEE ( EID, ENAME, PERHOUR, MINPAY ) VALUES ( 12345, ‘DOE, JOHN’, 16.20, 1200 ) ; EXTERNAL SOURCE
INTERNAL SOURCE INSERT INTO PAYROLL ( EID, PAYDATE, AMOUNT ) SELECT EID, CURRENT_DATE, SUM( HOURSWORKED * PERHOUR ) FROM EMPLOYEE, TIMESHEETS WHERE EMPLOYEE.EID = TIMESHEETS.EID AND DATEWORKED IS BETWEEN (CURRENT_DATE-14) AND CURRENT_DATE GROUP BY TIMESHEETS.EID ;
SELECT DEPTNAME, ENAME, DATEWORKED, HOURSWORKED, PERHOUR FROM DEPT, EMPLOYEE, TIMESHEETS WHERE DEPT.DID = EMPLOYEE.DID AND EMPLOYEE.EID = TIMESHEETS.EID GROUP BY DEPTNAME ; JOINING THREE TABLES DEPTTIMESHEETS REPORT EMPLOYEE
Review of SQL DML SELECT & SELECT DISTINCT FROM WHERE –Comparison operators: =, <>,>,>=,<,<= –Boolean operators: AND, OR, NOT ORDER BY GROUP BY UPDATE STATEMENTS DELETE STATEMENTS INSERT STATEMENTS