SQL実践:複雑なクエリの書き方と最適化テクニック
SQLは、リレーショナルデータベースと対話するための標準言語です。この記事では、基本的なSELECT文から始めて、複雑なJOIN、サブクエリ、ウィンドウ関数、そしてパフォーマンス最適化まで、実践的なSQLスキルを身につけます。
データベースエンジニアやバックエンド開発者を目指す方は、必ずマスターしておきたい内容です。
1. SQL基礎の復習
1.1 基本的なSELECT文
-- 全ての列を選択
SELECT * FROM users;
-- 特定の列を選択
SELECT id, name, email FROM users;
-- WHERE句によるフィルタリング
SELECT * FROM users WHERE age > 25;
-- ORDER BYによるソート
SELECT * FROM users ORDER BY created_at DESC;
-- LIMITによる件数制限
SELECT * FROM users ORDER BY created_at DESC LIMIT 10;
1.2 集計関数
-- COUNT(件数のカウント)
SELECT COUNT(*) FROM users;
-- SUM(合計)
SELECT SUM(amount) FROM orders;
-- AVG(平均)
SELECT AVG(age) FROM users;
-- MAX/MIN(最大値/最小値)
SELECT MAX(price), MIN(price) FROM products;
-- GROUP BY(グループ化)
SELECT category, COUNT(*) as count
FROM products
GROUP BY category;
2. JOINの詳細
2.1 INNER JOIN(内部結合)
-- 基本的なINNER JOIN
SELECT u.name, o.order_date, o.total_amount
FROM users u
INNER JOIN orders o ON u.id = o.user_id;
-- 複数テーブルのJOIN
SELECT u.name, o.order_date, p.name as product_name, od.quantity
FROM users u
INNER JOIN orders o ON u.id = o.user_id
INNER JOIN order_details od ON o.id = od.order_id
INNER JOIN products p ON od.product_id = p.id;
2.2 LEFT JOIN(左外部結合)
-- 全てのユーザーと、注文がある場合は注文情報も取得
SELECT u.name, o.order_date, o.total_amount
FROM users u
LEFT JOIN orders o ON u.id = o.user_id;
-- 注文がないユーザーを特定
SELECT u.name
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE o.id IS NULL;
2.3 RIGHT JOINとFULL OUTER JOIN
-- RIGHT JOIN(右外部結合)
SELECT u.name, o.order_date
FROM orders o
RIGHT JOIN users u ON u.id = o.user_id;
-- FULL OUTER JOIN(完全外部結合)- PostgreSQLなど
SELECT u.name, o.order_date
FROM users u
FULL OUTER JOIN orders o ON u.id = o.user_id;
2.4 セルフJOIN(自己結合)
-- 従業員とその上司を取得
SELECT
e.name AS employee_name,
m.name AS manager_name
FROM employees e
LEFT JOIN employees m ON e.manager_id = m.id;
-- 同じカテゴリの他の商品を取得
SELECT p1.name AS product1, p2.name AS product2
FROM products p1
INNER JOIN products p2 ON p1.category_id = p2.category_id
WHERE p1.id < p2.id;
3. サブクエリ
3.1 スカラーサブクエリ
-- 各ユーザーの注文数を取得
SELECT
name,
(SELECT COUNT(*) FROM orders WHERE user_id = users.id) AS order_count
FROM users;
-- 平均より高い給与の従業員
SELECT name, salary
FROM employees
WHERE salary > (SELECT AVG(salary) FROM employees);
3.2 相関サブクエリ
-- 各カテゴリで最も価格が高い商品
SELECT p1.name, p1.price, p1.category_id
FROM products p1
WHERE p1.price = (
SELECT MAX(p2.price)
FROM products p2
WHERE p2.category_id = p1.category_id
);
-- 直近30日以内に注文したユーザー
SELECT DISTINCT u.name
FROM users u
WHERE EXISTS (
SELECT 1
FROM orders o
WHERE o.user_id = u.id
AND o.order_date >= CURRENT_DATE - INTERVAL '30 days'
);
3.3 IN句を使ったサブクエリ
-- 注文があるユーザーのみ取得
SELECT * FROM users
WHERE id IN (SELECT DISTINCT user_id FROM orders);
-- 高価格商品を注文したユーザー
SELECT * FROM users
WHERE id IN (
SELECT DISTINCT o.user_id
FROM orders o
INNER JOIN order_details od ON o.id = od.order_id
INNER JOIN products p ON od.product_id = p.id
WHERE p.price > 1000
);
4. ウィンドウ関数(Window Functions)
ウィンドウ関数は、行の集合に対して計算を行いながら、各行に対して結果を返す強力な機能です。
4.1 ROW_NUMBER, RANK, DENSE_RANK
-- 各カテゴリで価格順にランキング
SELECT
name,
category_id,
price,
ROW_NUMBER() OVER (PARTITION BY category_id ORDER BY price DESC) AS row_num,
RANK() OVER (PARTITION BY category_id ORDER BY price DESC) AS rank,
DENSE_RANK() OVER (PARTITION BY category_id ORDER BY price DESC) AS dense_rank
FROM products;
-- 各ユーザーの最新の注文を取得
SELECT user_id, order_date, total_amount
FROM (
SELECT
user_id,
order_date,
total_amount,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY order_date DESC) AS rn
FROM orders
) ranked
WHERE rn = 1;
4.2 LAG/LEAD(前後の値の取得)
-- 前日比の売上を計算
SELECT
order_date,
total_amount,
LAG(total_amount) OVER (ORDER BY order_date) AS prev_amount,
total_amount - LAG(total_amount) OVER (ORDER BY order_date) AS diff
FROM (
SELECT order_date, SUM(total_amount) as total_amount
FROM orders
GROUP BY order_date
) daily_sales;
4.3 SUM, AVGなどの集計関数をウィンドウ関数として使用
-- 累積合計
SELECT
order_date,
total_amount,
SUM(total_amount) OVER (ORDER BY order_date) AS cumulative_total
FROM orders;
-- 移動平均(直近7日間)
SELECT
order_date,
total_amount,
AVG(total_amount) OVER (
ORDER BY order_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) AS moving_avg_7days
FROM (
SELECT order_date, SUM(total_amount) as total_amount
FROM orders
GROUP BY order_date
) daily_sales;
5. 共通テーブル式(CTE:Common Table Expression)
CTEは、クエリ内で一時的な結果セットを定義するための機能で、可読性と保守性を向上させます。
5.1 基本的なCTE
-- シンプルなCTE
WITH high_value_orders AS (
SELECT * FROM orders WHERE total_amount > 1000
)
SELECT * FROM high_value_orders;
-- 複数のCTE
WITH
user_orders AS (
SELECT user_id, COUNT(*) as order_count, SUM(total_amount) as total_spent
FROM orders
GROUP BY user_id
),
active_users AS (
SELECT * FROM user_orders WHERE order_count >= 3
)
SELECT u.name, ao.order_count, ao.total_spent
FROM active_users ao
INNER JOIN users u ON ao.user_id = u.id;
5.2 再帰CTE
-- 組織階層の取得(再帰CTE)
WITH RECURSIVE org_hierarchy AS (
-- ベースケース(ルート)
SELECT id, name, parent_id, 0 AS level
FROM organizations
WHERE parent_id IS NULL
UNION ALL
-- 再帰ケース
SELECT o.id, o.name, o.parent_id, oh.level + 1
FROM organizations o
INNER JOIN org_hierarchy oh ON o.parent_id = oh.id
)
SELECT * FROM org_hierarchy ORDER BY level, id;
6. CASE式と条件分岐
-- 基本的なCASE式
SELECT
name,
age,
CASE
WHEN age < 20 THEN '10代'
WHEN age < 30 THEN '20代'
WHEN age < 40 THEN '30代'
ELSE '40代以上'
END AS age_group
FROM users;
-- 集計でのCASE式
SELECT
category,
COUNT(*) AS total,
COUNT(CASE WHEN price > 1000 THEN 1 END) AS expensive_count,
AVG(CASE WHEN price > 1000 THEN price END) AS avg_expensive_price
FROM products
GROUP BY category;
7. 日付・時刻関数
-- 日付の抽出
SELECT
order_date,
EXTRACT(YEAR FROM order_date) AS year,
EXTRACT(MONTH FROM order_date) AS month,
EXTRACT(DAY FROM order_date) AS day
FROM orders;
-- 日付の計算
SELECT
order_date,
order_date + INTERVAL '7 days' AS due_date,
CURRENT_DATE - order_date AS days_ago
FROM orders;
-- 日付のフォーマット(PostgreSQL)
SELECT
order_date,
TO_CHAR(order_date, 'YYYY-MM-DD') AS formatted_date,
TO_CHAR(order_date, 'Month YYYY') AS month_year
FROM orders;
8. 実践的なクエリ例
8.1 複雑な集計クエリ
-- 月次売上レポート
WITH monthly_sales AS (
SELECT
DATE_TRUNC('month', order_date) AS month,
category_id,
SUM(total_amount) AS total_sales,
COUNT(*) AS order_count
FROM orders o
INNER JOIN order_details od ON o.id = od.order_id
INNER JOIN products p ON od.product_id = p.id
GROUP BY DATE_TRUNC('month', order_date), category_id
),
category_rankings AS (
SELECT
month,
category_id,
total_sales,
RANK() OVER (PARTITION BY month ORDER BY total_sales DESC) AS rank
FROM monthly_sales
)
SELECT
TO_CHAR(cr.month, 'YYYY-MM') AS month,
c.name AS category_name,
cr.total_sales,
cr.rank
FROM category_rankings cr
INNER JOIN categories c ON cr.category_id = c.id
WHERE cr.rank <= 3
ORDER BY cr.month DESC, cr.rank;
8.2 顧客セグメント分析
-- RFM分析(Recency, Frequency, Monetary)
WITH customer_metrics AS (
SELECT
u.id,
u.name,
MAX(o.order_date) AS last_order_date,
COUNT(o.id) AS order_count,
SUM(o.total_amount) AS total_spent,
CURRENT_DATE - MAX(o.order_date) AS days_since_last_order
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name
),
customer_segments AS (
SELECT
id,
name,
last_order_date,
order_count,
total_spent,
CASE
WHEN days_since_last_order <= 30 THEN 'Active'
WHEN days_since_last_order <= 90 THEN 'At Risk'
WHEN days_since_last_order <= 180 THEN 'Inactive'
ELSE 'Lost'
END AS recency_segment,
CASE
WHEN order_count >= 10 THEN 'High Frequency'
WHEN order_count >= 5 THEN 'Medium Frequency'
WHEN order_count >= 1 THEN 'Low Frequency'
ELSE 'No Orders'
END AS frequency_segment,
CASE
WHEN total_spent >= 10000 THEN 'High Value'
WHEN total_spent >= 5000 THEN 'Medium Value'
WHEN total_spent >= 0 THEN 'Low Value'
ELSE 'No Value'
END AS monetary_segment
FROM customer_metrics
)
SELECT
recency_segment,
frequency_segment,
monetary_segment,
COUNT(*) AS customer_count,
AVG(total_spent) AS avg_spent
FROM customer_segments
GROUP BY recency_segment, frequency_segment, monetary_segment
ORDER BY customer_count DESC;
9. パフォーマンス最適化
9.1 インデックスの活用
-- インデックスの作成
CREATE INDEX idx_user_email ON users(email);
CREATE INDEX idx_order_user_date ON orders(user_id, order_date);
CREATE INDEX idx_product_category ON products(category_id);
-- 複合インデックスの順序に注意
-- クエリでよく使用される順序でインデックスを作成
CREATE INDEX idx_order_details_order_product ON order_details(order_id, product_id);
9.2 EXPLAINによる実行計画の確認
-- 実行計画の表示
EXPLAIN ANALYZE
SELECT u.name, o.order_date, o.total_amount
FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE u.email = 'user@example.com';
-- インデックスが使用されているか確認
-- Seq Scan(シーケンシャルスキャン)ではなくIndex Scanになっているか
9.3 クエリの最適化テクニック
-- 1. 必要な列のみを選択(SELECT * を避ける)
SELECT id, name FROM users; -- 良い
SELECT * FROM users; -- 悪い(必要な列のみ選択)
-- 2. WHERE句でインデックスが使用できるようにする
SELECT * FROM orders WHERE order_date >= '2024-01-01'; -- 良い
SELECT * FROM orders WHERE YEAR(order_date) = 2024; -- 悪い(関数を使うとインデックスが使えない)
-- 3. JOINの順序を最適化(小さなテーブルから)
SELECT u.name, o.order_date
FROM (SELECT * FROM orders WHERE order_date >= '2024-01-01') o
INNER JOIN users u ON o.user_id = u.id;
-- 4. サブクエリよりJOINを優先
-- 悪い例
SELECT * FROM users
WHERE id IN (SELECT user_id FROM orders WHERE total_amount > 1000);
-- 良い例
SELECT DISTINCT u.*
FROM users u
INNER JOIN orders o ON u.id = o.user_id
WHERE o.total_amount > 1000;
10. トランザクションとロック
10.1 トランザクション
-- トランザクションの開始
BEGIN;
-- 複数の操作を実行
UPDATE accounts SET balance = balance - 1000 WHERE id = 1;
UPDATE accounts SET balance = balance + 1000 WHERE id = 2;
-- コミット(確定)またはロールバック(取り消し)
COMMIT;
-- または
ROLLBACK;
10.2 デッドロックの回避
-- 常に同じ順序でロックを取得する
-- 例:常にIDの小さい順に更新
UPDATE accounts SET balance = balance - 100 WHERE id = 1;
UPDATE accounts SET balance = balance + 100 WHERE id = 2;
11. ビュー(View)とマテリアライズドビュー
11.1 ビューの作成
-- ビューの作成
CREATE VIEW user_order_summary AS
SELECT
u.id AS user_id,
u.name,
COUNT(o.id) AS order_count,
SUM(o.total_amount) AS total_spent,
MAX(o.order_date) AS last_order_date
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.name;
-- ビューの使用
SELECT * FROM user_order_summary WHERE order_count > 5;
-- ビューの削除
DROP VIEW user_order_summary;
11.2 マテリアライズドビュー
-- マテリアライズドビューの作成(PostgreSQL)
CREATE MATERIALIZED VIEW monthly_sales_mv AS
SELECT
DATE_TRUNC('month', order_date) AS month,
SUM(total_amount) AS total_sales
FROM orders
GROUP BY DATE_TRUNC('month', order_date);
-- リフレッシュ(更新)
REFRESH MATERIALIZED VIEW monthly_sales_mv;
12. まとめと次のステップ
この記事を通じて、SQLの実践的なスキルを学びました。
学んだこと
- JOINの詳細: INNER JOIN、LEFT JOIN、セルフJOINなど
- サブクエリ: スカラー、相関、IN句を使ったサブクエリ
- ウィンドウ関数: ROW_NUMBER、RANK、LAG/LEAD、集計関数
- CTE: 共通テーブル式と再帰CTE
- パフォーマンス最適化: インデックス、実行計画、クエリ最適化
- 高度な機能: ビュー、マテリアライズドビュー、トランザクション
次のステップ
- ストアドプロシージャ: データベース内でロジックを実行
- トリガー: データ変更時の自動処理
- パーティショニング: 大規模データの分割
- データベース固有の機能: 各DBMSの独自機能を学ぶ
- NoSQLデータベース: MongoDB、Redisなどの非リレーショナルDB
SQLは、データベースと対話するための強力な言語です。継続的な学習と実践を通じて、複雑なクエリも自信を持って書けるようになりましょう!
Happy Querying!
コメント
コメントを読み込み中...
