資料集
CREATE table users (
id INT PRIMARY KEY AUTO_INCREMENT,
name VARCHAR(30) NOT NULL,
email VARCHAR(120) NOT NULL,
age INT NOT NULL,
created_at DATETIME
);
INSERT INTO users (name, email, age, created_at)
VALUES
('Jack Hung', 'jackh32@gmail.com', 20, '2019-07-24 17:11:01'),
('Tony Liu', 'tonykk@gmail.com', 62, '2020-06-03 17:11:01'),
('Amy Chang', 'amychang@gmail.com', 32, '2020-05-11 17:11:01');
CREATE table products (
id INT PRIMARY KEY AUTO_INCREMENT,
name VARCHAR(30) NOT NULL,
price INT NOT NULL,
weight DECIMAL(5, 2),
category VARCHAR(20),
created_at DATETIME
);
INSERT INTO products (name, price, weight, category, created_at)
VALUES
('華速 intel i3 筆電', 20000, 2.12, 'NB', '2020-05-11 17:11:01'),
('Mac Pro 筆電', 62000, 1.4, 'NB', '2020-05-11 17:11:01'),
('微興電競筆電', 32000, 3.00, 'NB', '2020-05-11 17:11:01'),
('戈林冰箱', 22000, 13.78, '3C', '2020-05-11 17:11:01'),
('三力冰箱', 52000, 23.18, '3C', '2020-05-11 17:11:01'),
('C 語言入門', 420, 0.31, 'Book', '2020-05-11 17:11:01'),
('python3 實戰', 580, 0.28, 'Book', '2020-05-11 17:11:01'),
('JavaScript 英雄', 1000, 0.12, 'Book', '2020-05-11 17:11:01'),
('Java 資料分析', 340, -1, 'Book', '2020-05-11 17:11:01'),
('python 資料分析', 640, -0.43, 'Book', '2020-05-11 17:11:01');
CREATE table orders (
id INT PRIMARY KEY AUTO_INCREMENT,
amount INT NOT NULL,
-- 先宣告欄位,再設定外鍵 FOREIGN KEY REFERENCES
customer_id INT,
created_at DATETIME,
FOREIGN KEY (customer_id) REFERENCES users(id)
);
INSERT INTO orders (amount, customer_id, created_at)
VALUES
(188420, 1, '2020-05-11 17:11:01'),
(54000, 3, '2019-04-13 17:11:01'),
(104420, 2, '2020-06-21 17:11:01'),
(52420, 2, '2020-05-01 17:11:01'),
(104000, 2, '2020-07-11 17:11:01'),
(32420, 2, '2019-03-24 17:11:01');
CREATE table order_details (
id INT PRIMARY KEY AUTO_INCREMENT,
-- 先宣告欄位,再設定外鍵 FOREIGN KEY REFERENCES
order_id INT,
product_id INT,
created_at DATETIME,
FOREIGN KEY (order_id) REFERENCES orders(id),
FOREIGN KEY (product_id) REFERENCES products(id)
);
INSERT INTO order_details (order_id, product_id, created_at)
VALUES
(1, 1, '2020-05-11 17:11:01'),
(1, 2, '2020-05-11 17:11:01'),
(1, 3, '2020-05-11 17:11:01'),
(1, 4, '2020-05-11 17:11:01'),
(1, 5, '2020-05-11 17:11:01'),
(1, 6, '2020-05-11 17:11:01'),
(2, 3, '2020-04-13 17:11:01'),
(2, 4, '2020-04-13 17:11:01'),
(3, 5, '2020-06-21 17:11:01'),
(3, 6, '2020-06-21 17:11:01'),
(3, 5, '2020-06-21 17:11:01'),
(4, 5, '2020-05-01 17:11:01'),
(4, 6, '2020-05-01 17:11:01'),
(5, 5, '2020-07-11 17:11:01'),
(5, 5, '2020-07-11 17:11:01'),
(6, 6, '2020-03-24 17:11:01'),
(6, 3, '2020-03-24 17:11:01');
視窗函式
視窗函式(Windows function),用來細部分析使用的函式,將資料進行更細緻的細分成一個個窗格(Frame)方便更細部的計算。
視窗函式可以為以下兩種:
1.彙總函式(SUM、AVG 等)
2.專用函式:RANK()排名函式
<視窗函式> OVER (PARTITION BY 欄位 ORDER BY)
透過 category 進行分類並使用 RANK() 和進行排名
> 透過PARTITION將資料分類成3C、BOOK、NB三個FRAMES,分別處理
SELECT name, price, category, RANK() OVER(PARTITION BY category ORDER BY price) FROM products;
![](https://static.coderbridge.com/img/smpss96271/de94266c08ac4a3aa9371eddfcea63fe.png) 不使用 PARTITION 進行總排名
> 把所有資料視為一大個FRAME來做RANK()的排名。
SELECT name, price, category, RANK() OVER(ORDER BY price) FROM products;
![](https://static.coderbridge.com/img/smpss96271/5129e94d005f405dbb7e84ef905863a4.png) category 當作窗格進行累加價格
> 用category分類並累價格。
SELECT name, price, category, SUM(price) OVER(PARTITION BY category ORDER BY price) AS aggregate_price
FROM products;
![](https://static.coderbridge.com/img/smpss96271/72af36ed77ec48dd86c4a65194c5d44e.png) 移動平均計算
> 將tuple的price與前兩筆資料的price進行平均得此筆資料的平均值。另一種寫法:OVER (ORDER BY id ROWS 2 PRECEDING)
SELECT id, name, price, category,
AVG(price) OVER (ORDER BY id ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS moving_avg_price
FROM products;
![](https://static.coderbridge.com/img/smpss96271/ebd5e57814014b72a3148fbbe1b0615c.png) > 將tuple的price與後一筆資料的price進行平均得此筆資料的平均值。 SELECT id, name, price, category,
AVG(price) OVER (ORDER BY id ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING) AS moving_avg_price
FROM products;
![](https://static.coderbridge.com/img/smpss96271/89804072120c4bd3a24240f1f4452f1d.png) 小計、總計 GROUP 運算子
>ROLLUP,可以讓我們根據條件進行分組小計和總計的計算。
SELECT name, category, SUM(price) FROM products GROUP BY category, name WITH ROLLUP;
![](https://static.coderbridge.com/img/smpss96271/847f231d420b44a283298f260dfba1c2.png) 常用營收分析日期函式
>搭配時間函式將訂單的資料進行分析,獲得日、月、年營收統計。 >>DATE() 取出日期時間中日期的部分:YYYY-MM-DD >>EXTRACT() 日期函式可以取出 TIMESTAMP 的日期部分(年 YEAR、月 MONTH等)
SELECT DATE('2020-03-24 17:11:01');
日營收分析
SELECT DATE(created_at), COUNT(*), SUM(amount), AVG(amount) FROM orders GROUP BY DATE(created_at) ORDER BY DATE(created_at);
月營收平均
SELECT EXTRACT(YEAR_MONTH FROM DATE(created_at)), COUNT(*), SUM(amount), AVG(amount) FROM orders GROUP BY EXTRACT(MONTH FROM DATE(created_at)) ORDER BY DATE(created_at);
年營收平均
SELECT EXTRACT(YEAR FROM DATE(created_at)), COUNT(*), SUM(amount), AVG(amount) FROM orders GROUP BY EXTRACT(YEAR FROM DATE(created_at)) ORDER BY DATE(created_at);
類別營收
SELECT products.category,SUM(price) FROM orders,order_details,products WHERE orders.id=order_details.order_id and products.id=order_details.product_id GROUP BY products.category;