SQL Basics
SQL (Structured Query Language) is the standard language for managing and manipulating relational databases.
Basic SELECT Statement
SQL
-- Select all columns from a table
SELECT * FROM employees;
-- Select specific columns
SELECT first_name, last_name, salary
FROM employees;
-- Using aliases for columns
SELECT
first_name AS "First Name",
last_name AS "Last Name",
salary * 12 AS annual_salary
FROM employees;
-- Using DISTINCT to remove duplicates
SELECT DISTINCT department_id
FROM employees;
Sample Tables for Examples
employees
| id | name | dept_id | salary |
|---|---|---|---|
| 1 | John | 101 | 50000 |
| 2 | Jane | 102 | 60000 |
| 3 | Bob | 101 | 55000 |
departments
| id | name |
|---|---|
| 101 | Engineering |
| 102 | Marketing |
| 103 | Sales |
SQL Order of Execution
SQL executes in this order (not as written):
1.
2.
3.
4.
5.
6.
7.
8.
1.
FROM / JOIN - Identify tables2.
WHERE - Filter rows3.
GROUP BY - Group rows4.
HAVING - Filter groups5.
SELECT - Select columns6.
DISTINCT - Remove duplicates7.
ORDER BY - Sort results8.
LIMIT / OFFSET - Limit results
Data Types
| Type | Description | Example |
|---|---|---|
INT |
Integer numbers | 42, -100 |
VARCHAR(n) |
Variable-length string | 'Hello' |
DECIMAL(p,s) |
Fixed-point numbers | 123.45 |
DATE |
Date values | '2024-01-15' |
BOOLEAN |
True/False | TRUE, FALSE |
Filtering Data
WHERE Clause
SQL
-- Basic comparison operators
SELECT * FROM employees WHERE salary > 50000;
SELECT * FROM employees WHERE department = 'Engineering';
SELECT * FROM employees WHERE hire_date >= '2023-01-01';
-- Comparison Operators: =, <>, !=, <, >, <=, >=
-- Multiple conditions with AND/OR
SELECT * FROM employees
WHERE department = 'Engineering' AND salary > 60000;
SELECT * FROM employees
WHERE department = 'Engineering' OR department = 'Marketing';
-- BETWEEN for ranges (inclusive)
SELECT * FROM employees
WHERE salary BETWEEN 40000 AND 60000;
-- IN for multiple values
SELECT * FROM employees
WHERE department IN ('Engineering', 'Marketing', 'Sales');
-- NOT IN
SELECT * FROM employees
WHERE department NOT IN ('HR', 'Finance');
Pattern Matching with LIKE
SQL
-- % matches any sequence of characters
SELECT * FROM employees WHERE name LIKE 'J%'; -- Starts with J
SELECT * FROM employees WHERE name LIKE '%son'; -- Ends with son
SELECT * FROM employees WHERE name LIKE '%oh%'; -- Contains oh
-- _ matches exactly one character
SELECT * FROM employees WHERE name LIKE '_ohn'; -- 4-letter name ending in ohn
SELECT * FROM employees WHERE name LIKE 'J___'; -- 4-letter name starting with J
-- Case-insensitive search (varies by database)
SELECT * FROM employees WHERE LOWER(name) LIKE 'j%';
NULL Values
SQL
-- Check for NULL
SELECT * FROM employees WHERE manager_id IS NULL;
SELECT * FROM employees WHERE manager_id IS NOT NULL;
-- COALESCE - returns first non-null value
SELECT name, COALESCE(phone, email, 'No Contact') AS contact
FROM employees;
-- NULLIF - returns NULL if both values are equal
SELECT NULLIF(salary, 0) AS salary FROM employees;
Important: Never use
= NULL or != NULL. Always use IS NULL or IS NOT NULL. In SQL, NULL represents unknown, and comparing anything with unknown results in unknown.
ORDER BY & LIMIT
SQL
-- Sort ascending (default)
SELECT * FROM employees ORDER BY salary;
SELECT * FROM employees ORDER BY salary ASC;
-- Sort descending
SELECT * FROM employees ORDER BY salary DESC;
-- Multiple sort columns
SELECT * FROM employees
ORDER BY department ASC, salary DESC;
-- LIMIT results (MySQL/PostgreSQL)
SELECT * FROM employees ORDER BY salary DESC LIMIT 10;
-- With OFFSET for pagination
SELECT * FROM employees
ORDER BY salary DESC
LIMIT 10 OFFSET 20; -- Skip 20, get next 10
-- TOP (SQL Server)
SELECT TOP 10 * FROM employees ORDER BY salary DESC;
JOINs
JOINs combine rows from two or more tables based on a related column between them.
Types of JOINs
| JOIN Type | Description |
|---|---|
INNER JOIN |
Returns only matching rows from both tables |
LEFT JOIN |
All rows from left table + matching from right |
RIGHT JOIN |
All rows from right table + matching from left |
FULL OUTER JOIN |
All rows from both tables |
CROSS JOIN |
Cartesian product (all combinations) |
SELF JOIN |
Table joined with itself |
INNER JOIN
SQL
-- INNER JOIN - only matching rows
SELECT
e.name AS employee_name,
d.name AS department_name,
e.salary
FROM employees e
INNER JOIN departments d ON e.dept_id = d.id;
-- Multiple JOINs
SELECT
e.name,
d.name AS department,
m.name AS manager
FROM employees e
INNER JOIN departments d ON e.dept_id = d.id
INNER JOIN employees m ON e.manager_id = m.id;
LEFT JOIN (LEFT OUTER JOIN)
SQL
-- LEFT JOIN - all from left, matching from right
SELECT
e.name AS employee_name,
d.name AS department_name
FROM employees e
LEFT JOIN departments d ON e.dept_id = d.id;
-- Find employees without departments
SELECT e.name
FROM employees e
LEFT JOIN departments d ON e.dept_id = d.id
WHERE d.id IS NULL;
SELF JOIN
SQL
-- Find employee and their manager
SELECT
e.name AS employee,
m.name AS manager
FROM employees e
LEFT JOIN employees m ON e.manager_id = m.id;
-- Find employees earning more than their manager
SELECT e.name, e.salary, m.name AS manager, m.salary AS manager_salary
FROM employees e
INNER JOIN employees m ON e.manager_id = m.id
WHERE e.salary > m.salary;
Pro Tip: When joining tables, always use table aliases (e, d, m) for readability. Use INNER JOIN when you only want matching records, LEFT JOIN when you need all records from the "main" table.
Aggregations & GROUP BY
Aggregate Functions
| Function | Description | Example |
|---|---|---|
COUNT() |
Count rows | COUNT(*), COUNT(column) |
SUM() |
Sum of values | SUM(salary) |
AVG() |
Average value | AVG(salary) |
MIN() |
Minimum value | MIN(salary) |
MAX() |
Maximum value | MAX(salary) |
SQL
-- Basic aggregations
SELECT
COUNT(*) AS total_employees,
COUNT(DISTINCT dept_id) AS num_departments,
SUM(salary) AS total_salary,
AVG(salary) AS avg_salary,
MIN(salary) AS min_salary,
MAX(salary) AS max_salary
FROM employees;
GROUP BY
SQL
-- Group by one column
SELECT
dept_id,
COUNT(*) AS employee_count,
AVG(salary) AS avg_salary
FROM employees
GROUP BY dept_id;
-- Group by multiple columns
SELECT
dept_id,
job_title,
COUNT(*) AS count,
AVG(salary) AS avg_salary
FROM employees
GROUP BY dept_id, job_title;
-- With JOIN
SELECT
d.name AS department,
COUNT(e.id) AS employee_count,
ROUND(AVG(e.salary), 2) AS avg_salary
FROM departments d
LEFT JOIN employees e ON d.id = e.dept_id
GROUP BY d.id, d.name;
HAVING Clause
HAVING filters groups after aggregation (WHERE filters rows before).
SQL
-- Departments with more than 5 employees
SELECT
dept_id,
COUNT(*) AS employee_count
FROM employees
GROUP BY dept_id
HAVING COUNT(*) > 5;
-- Departments with average salary > 50000
SELECT
d.name AS department,
AVG(e.salary) AS avg_salary
FROM departments d
JOIN employees e ON d.id = e.dept_id
GROUP BY d.id, d.name
HAVING AVG(e.salary) > 50000
ORDER BY avg_salary DESC;
WHERE vs HAVING:
•
•
•
•
•
WHERE filters rows before grouping•
HAVING filters groups after aggregation•
WHERE cannot use aggregate functions•
HAVING must be used with GROUP BY
Subqueries
A subquery is a query nested inside another query.
Subquery in WHERE
SQL
-- Employees earning above average
SELECT name, salary
FROM employees
WHERE salary > (SELECT AVG(salary) FROM employees);
-- Employees in Engineering department
SELECT name, salary
FROM employees
WHERE dept_id = (SELECT id FROM departments WHERE name = 'Engineering');
-- Using IN with subquery
SELECT name
FROM employees
WHERE dept_id IN (
SELECT id FROM departments
WHERE name IN ('Engineering', 'Marketing')
);
EXISTS and NOT EXISTS
SQL
-- Departments that have employees
SELECT d.name
FROM departments d
WHERE EXISTS (
SELECT 1 FROM employees e
WHERE e.dept_id = d.id
);
-- Departments with no employees
SELECT d.name
FROM departments d
WHERE NOT EXISTS (
SELECT 1 FROM employees e
WHERE e.dept_id = d.id
);
Subquery in SELECT
SQL
-- Add department name as scalar subquery
SELECT
e.name,
e.salary,
(SELECT d.name FROM departments d WHERE d.id = e.dept_id) AS department
FROM employees e;
-- Compare salary to department average
SELECT
name,
salary,
salary - (SELECT AVG(salary) FROM employees e2 WHERE e2.dept_id = e.dept_id)
AS diff_from_dept_avg
FROM employees e;
Subquery in FROM (Derived Tables)
SQL
-- Find departments with highest average salary
SELECT department, avg_salary
FROM (
SELECT
d.name AS department,
AVG(e.salary) AS avg_salary
FROM departments d
JOIN employees e ON d.id = e.dept_id
GROUP BY d.id, d.name
) AS dept_salaries
WHERE avg_salary = (
SELECT MAX(avg_sal) FROM (
SELECT AVG(salary) AS avg_sal
FROM employees
GROUP BY dept_id
) AS max_avg
);
Common Table Expressions (CTEs)
A CTE is a temporary named result set that you can reference within a SELECT, INSERT, UPDATE, or DELETE statement.
Basic CTE Syntax
SQL
-- Basic CTE
WITH dept_stats AS (
SELECT
dept_id,
COUNT(*) AS emp_count,
AVG(salary) AS avg_salary
FROM employees
GROUP BY dept_id
)
SELECT
d.name,
ds.emp_count,
ds.avg_salary
FROM dept_stats ds
JOIN departments d ON ds.dept_id = d.id;
-- Multiple CTEs
WITH
high_earners AS (
SELECT * FROM employees WHERE salary > 70000
),
department_counts AS (
SELECT dept_id, COUNT(*) AS count
FROM high_earners
GROUP BY dept_id
)
SELECT d.name, COALESCE(dc.count, 0) AS high_earner_count
FROM departments d
LEFT JOIN department_counts dc ON d.id = dc.dept_id;
CTE vs Subquery: CTEs are more readable, can be referenced multiple times, and are easier to debug. Use CTEs for complex queries and subqueries for simple ones.
Recursive CTE
SQL
-- Find employee hierarchy
WITH RECURSIVE emp_hierarchy AS (
-- Base case: top-level employees (no manager)
SELECT id, name, manager_id, 1 AS level
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- Recursive case: employees with managers
SELECT e.id, e.name, e.manager_id, eh.level + 1
FROM employees e
INNER JOIN emp_hierarchy eh ON e.manager_id = eh.id
)
SELECT * FROM emp_hierarchy
ORDER BY level, name;
-- Generate a series of numbers
WITH RECURSIVE numbers AS (
SELECT 1 AS n
UNION ALL
SELECT n + 1 FROM numbers WHERE n < 10
)
SELECT * FROM numbers;
Window Functions
Window functions perform calculations across a set of rows related to the current row, without collapsing the result set.
Basic Window Function Syntax
SQL
-- Syntax: function() OVER (PARTITION BY ... ORDER BY ...)
SELECT
name,
dept_id,
salary,
AVG(salary) OVER () AS company_avg,
AVG(salary) OVER (PARTITION BY dept_id) AS dept_avg
FROM employees;
Ranking Functions
SQL
SELECT
name,
dept_id,
salary,
ROW_NUMBER() OVER (ORDER BY salary DESC) AS row_num, -- 1, 2, 3, 4, 5
RANK() OVER (ORDER BY salary DESC) AS rank, -- 1, 2, 2, 4, 5 (gaps)
DENSE_RANK() OVER (ORDER BY salary DESC) AS dense_rank, -- 1, 2, 2, 3, 4 (no gaps)
NTILE(4) OVER (ORDER BY salary DESC) AS quartile -- Divide into 4 groups
FROM employees;
-- Rank within each department
SELECT
name,
dept_id,
salary,
RANK() OVER (PARTITION BY dept_id ORDER BY salary DESC) AS dept_rank
FROM employees;
LAG and LEAD
SQL
-- LAG: access previous row
-- LEAD: access next row
SELECT
name,
hire_date,
salary,
LAG(salary, 1) OVER (ORDER BY hire_date) AS prev_salary,
LEAD(salary, 1) OVER (ORDER BY hire_date) AS next_salary,
salary - LAG(salary, 1) OVER (ORDER BY hire_date) AS salary_change
FROM employees;
-- Calculate month-over-month growth
SELECT
month,
revenue,
LAG(revenue) OVER (ORDER BY month) AS prev_month,
ROUND(100.0 * (revenue - LAG(revenue) OVER (ORDER BY month))
/ LAG(revenue) OVER (ORDER BY month), 2) AS growth_pct
FROM monthly_sales;
Running Totals and Moving Averages
SQL
-- Running total
SELECT
date,
amount,
SUM(amount) OVER (ORDER BY date ROWS UNBOUNDED PRECEDING) AS running_total
FROM transactions;
-- 3-day moving average
SELECT
date,
value,
AVG(value) OVER (
ORDER BY date
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
) AS moving_avg_3day
FROM daily_metrics;
-- Cumulative percentage
SELECT
name,
salary,
SUM(salary) OVER (ORDER BY salary DESC) AS cumulative_salary,
ROUND(100.0 * SUM(salary) OVER (ORDER BY salary DESC)
/ SUM(salary) OVER (), 2) AS cumulative_pct
FROM employees;
FIRST_VALUE and LAST_VALUE
SQL
SELECT
name,
dept_id,
salary,
FIRST_VALUE(name) OVER (
PARTITION BY dept_id
ORDER BY salary DESC
) AS highest_paid_in_dept,
LAST_VALUE(name) OVER (
PARTITION BY dept_id
ORDER BY salary DESC
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS lowest_paid_in_dept
FROM employees;
Query Optimization
Indexing
SQL
-- Create index
CREATE INDEX idx_employee_dept ON employees(dept_id);
-- Create composite index
CREATE INDEX idx_emp_dept_salary ON employees(dept_id, salary);
-- Create unique index
CREATE UNIQUE INDEX idx_email ON employees(email);
-- Drop index
DROP INDEX idx_employee_dept;
When to Use Indexes
- Columns in WHERE clauses
- Columns used in JOINs
- Columns in ORDER BY
- Columns with high cardinality (many unique values)
Index Considerations:
• Indexes speed up reads but slow down writes
• Don't over-index - each index requires storage
• Composite index order matters: (a, b) is different from (b, a)
• Indexes speed up reads but slow down writes
• Don't over-index - each index requires storage
• Composite index order matters: (a, b) is different from (b, a)
EXPLAIN Plan
SQL
-- View query execution plan
EXPLAIN SELECT * FROM employees WHERE dept_id = 101;
-- Detailed explain (PostgreSQL)
EXPLAIN ANALYZE SELECT * FROM employees WHERE salary > 50000;
-- MySQL
EXPLAIN FORMAT=JSON SELECT * FROM employees;
Optimization Tips
SQL
-- ❌ Bad: SELECT * (fetches unnecessary columns)
SELECT * FROM employees;
-- ✅ Good: Select only needed columns
SELECT id, name, salary FROM employees;
-- ❌ Bad: Function on column prevents index use
SELECT * FROM employees WHERE YEAR(hire_date) = 2023;
-- ✅ Good: Sargable - can use index
SELECT * FROM employees
WHERE hire_date >= '2023-01-01' AND hire_date < '2024-01-01';
-- ❌ Bad: OR can prevent index use
SELECT * FROM employees WHERE dept_id = 1 OR salary > 50000;
-- ✅ Good: Use UNION for separate index scans
SELECT * FROM employees WHERE dept_id = 1
UNION
SELECT * FROM employees WHERE salary > 50000;
-- ❌ Bad: Using NOT IN with NULLs
SELECT * FROM employees WHERE dept_id NOT IN (SELECT id FROM inactive_depts);
-- ✅ Good: Use NOT EXISTS
SELECT * FROM employees e
WHERE NOT EXISTS (SELECT 1 FROM inactive_depts d WHERE d.id = e.dept_id);
Interview Query Problems
1. Second Highest Salary
SQL
-- Method 1: Using subquery
SELECT MAX(salary) AS second_highest
FROM employees
WHERE salary < (SELECT MAX(salary) FROM employees);
-- Method 2: Using DENSE_RANK
SELECT salary AS second_highest
FROM (
SELECT salary, DENSE_RANK() OVER (ORDER BY salary DESC) AS rank
FROM employees
) ranked
WHERE rank = 2;
-- Method 3: Using LIMIT/OFFSET
SELECT DISTINCT salary
FROM employees
ORDER BY salary DESC
LIMIT 1 OFFSET 1;
2. Find Duplicate Emails
SQL
-- Using GROUP BY and HAVING
SELECT email
FROM users
GROUP BY email
HAVING COUNT(*) > 1;
-- Using self join
SELECT DISTINCT a.email
FROM users a
JOIN users b ON a.email = b.email AND a.id != b.id;
3. Employees Earning More Than Manager
SQL
SELECT e.name AS employee
FROM employees e
JOIN employees m ON e.manager_id = m.id
WHERE e.salary > m.salary;
4. Department with Highest Average Salary
SQL
WITH dept_avg AS (
SELECT
d.name AS department,
AVG(e.salary) AS avg_salary,
RANK() OVER (ORDER BY AVG(e.salary) DESC) AS rank
FROM departments d
JOIN employees e ON d.id = e.dept_id
GROUP BY d.id, d.name
)
SELECT department, avg_salary
FROM dept_avg
WHERE rank = 1;
5. Consecutive Login Days
SQL
-- Find users who logged in for 3+ consecutive days
WITH consecutive_logins AS (
SELECT
user_id,
login_date,
login_date - INTERVAL ROW_NUMBER() OVER (
PARTITION BY user_id
ORDER BY login_date
) DAY AS group_date
FROM logins
)
SELECT user_id, COUNT(*) AS consecutive_days
FROM consecutive_logins
GROUP BY user_id, group_date
HAVING COUNT(*) >= 3;
6. Top 3 Salaries per Department
SQL
WITH ranked AS (
SELECT
d.name AS department,
e.name AS employee,
e.salary,
DENSE_RANK() OVER (
PARTITION BY e.dept_id
ORDER BY e.salary DESC
) AS rank
FROM employees e
JOIN departments d ON e.dept_id = d.id
)
SELECT department, employee, salary
FROM ranked
WHERE rank <= 3
ORDER BY department, salary DESC;
7. Year-over-Year Growth
SQL
WITH yearly_sales AS (
SELECT
YEAR(order_date) AS year,
SUM(amount) AS total_sales
FROM orders
GROUP BY YEAR(order_date)
)
SELECT
year,
total_sales,
LAG(total_sales) OVER (ORDER BY year) AS prev_year_sales,
ROUND(100.0 * (total_sales - LAG(total_sales) OVER (ORDER BY year))
/ LAG(total_sales) OVER (ORDER BY year), 2) AS yoy_growth_pct
FROM yearly_sales;
8. Pivot Table
SQL
-- Convert rows to columns (manual pivot)
SELECT
product,
SUM(CASE WHEN month = 'Jan' THEN sales ELSE 0 END) AS Jan,
SUM(CASE WHEN month = 'Feb' THEN sales ELSE 0 END) AS Feb,
SUM(CASE WHEN month = 'Mar' THEN sales ELSE 0 END) AS Mar
FROM monthly_sales
GROUP BY product;
Interview Tips:
• Always clarify requirements before writing
• Consider edge cases (NULL, empty tables, ties)
• Explain your thought process
• Discuss time complexity and optimization
• Test with sample data mentally
• Always clarify requirements before writing
• Consider edge cases (NULL, empty tables, ties)
• Explain your thought process
• Discuss time complexity and optimization
• Test with sample data mentally