MySQL学习笔记:3道面试题小测(MySQL learning notes: 3 interview questions quiz)

一、每个部门工资第二高员工

MySQL8.0测试运行。

MySQL8.0测试运行。

1.题目

有一张公司员工信息表 ,有4个字段:

employee
employee_id varchar -- 员工ID
employee_name varchar -- 员工姓名
employee_salary int -- 员工薪酬
department varchar -- 部门ID

另外一张部门信息表 ,有2个字段:

department
department_id varchar -- 部门ID
department_name varchar -- 部门名称

请查询每个部门工资第二高员工,输出员工ID、员工姓名、员工薪酬、员工部门名称4个字段。

2.建表

-- 建表
DROP TABLE IF EXISTS employee;
CREATE TABLE employee(
employee_id VARCHAR(8),
employee_name VARCHAR(8),
employee_salary INT,
department VARCHAR(8)
)
ENGINE = INNODB
DEFAULT CHARSET = utf8mb4;
-- 插入数据
INSERT INTO employee
(employee_id, employee_name, employee_salary, department)
VALUE ('a001','Bob',7000,'b1')
     ,('a002','Jack',9000,'b1')
     ,('a003','Alice',8000,'b2')
     ,('a004','Ben',5000,'b2')
     ,('a005','Candy',4000,'b2')
     ,('a006','Allen',5000,'b2')
     ,('a007','Linda',10000,'b3');
     
-- 建表
DROP TABLE IF EXISTS department;
CREATE TABLE department(
department_id VARCHAR(8),
department_name VARCHAR(8)
)
ENGINE = INNODB
DEFAULT CHARSET = utf8mb4;
-- 插入数据
INSERT INTO
department (department_id,department_name) 
VALUE ('b1','Sales')
     ,('b2','IT')
     ,('b3','Product');

3.答案

使用窗口函数 进行分组排序。

rank() over(partition by xxx order by xxx)

窗口函数、子查询、多表连接

-- 答案
SELECT a.employee_id,
       a.employee_name,
       a.employee_salary,
       b.department_name
FROM
(
	SELECT *,
	       rank() over (PARTITION BY department ORDER BY employee_salary DESC) AS rn
	FROM employee
) a
LEFT JOIN department b
ON a.department = b.department_id
WHERE a.rn = 2;
'''
employee_id	employee_name	employee_salary	department_name
a001	Bob	7000	Sales
a004	Ben	5000	IT
a006	Allen	5000	IT
'''

当然,也可以先进行关联后,再分组排序。

二、网站登录时间间隔统计

1.题目

有一张网站登录情况表 ,记录用户登录信息,有2个字段:

login_info
user_id varchar -- 用户ID
login_time date -- 用户登录日期  2021-1-15

计算每个用户登录日期间隔小于5天的次数,输出用户ID、次数2个字段。

2.建表

-- 建表
DROP TABLE IF EXISTS login_info;
CREATE TABLE login_info(
user_id VARCHAR(8),
login_time DATE
)
ENGINE = INNODB
DEFAULT CHARSET = utf8mb4;
-- 插入数据
INSERT INTO
login_info (user_id,login_time) 
VALUE ('a001','2021-01-01')
,('b001','2021-01-01')
,('a001','2021-01-03')
,('a001','2021-01-06')
,('a001','2021-01-07')
,('b001','2021-01-07')
,('a001','2021-01-08')
,('a001','2021-01-09')
,('b001','2021-01-09')
,('b001','2021-01-10')
,('b001','2021-01-15')
,('a001','2021-01-16')
,('a001','2021-01-18')
,('a001','2021-01-19')
,('b001','2021-01-20')
,('a001','2021-01-23');

3.答案

利用偏移函数 处理时间间隔。

lead()

窗口函数、子查询、分组聚合、时间函数

-- 答案
SELECT a.user_id,
       COUNT(1) AS cnt
FROM
(
    SELECT user_id,
           login_time,
           lead(login_time) over (PARTITION BY user_id ORDER BY login_time) AS next_login_time
    FROM login_info
) a
WHERE TIMESTAMPDIFF(DAY, login_time, next_login_time) < 5
GROUP BY user_id;
'''
user_id	cnt
a001	8
b001	2
'''

注意:、 的区别,一个向前移,一个向后移。

lag
lead

三、用户购买渠道分析

1.题目

有一张用户购买信息表 ,记录了用户的购物信息,有4个字段:

purchase_channel
user_id varchar -- 用户ID
channel varchar -- 渠道
purchase_date date -- 购买日期
purchase_amount int -- 购买金额

请查询每天仅适用手机端、仅使用网页端的用户和同时使用两种渠道的不同用户人数,和总购物金额。

并且即使某天某渠道没有用户的购买信息,也需要展示。

输出:日期、购买渠道、总购买金额、不同用户人数4个字段。

2.建表

-- 建表
DROP TABLE IF EXISTS purchase_channel;
CREATE TABLE purchase_channel(
user_id VARCHAR(8),
channel VARCHAR(8),
purchase_date DATE,
purchase_amount INT
)
ENGINE = INNODB
DEFAULT CHARSET = utf8mb4;
-- 插入数据
INSERT INTO
purchase_channel (user_id,channel,purchase_date,purchase_amount) 
VALUE ('a001','app','2021-03-14',200)
     ,('a001','web','2021-03-14',100)
     ,('a002','app','2021-03-14',400)
     ,('a001','web','2021-03-15',3000)
     ,('a002','app','2021-03-15',900)
     ,('a003','app','2021-03-15',1000);

3.答案

根据用户ID和日期进行分组,统计用户在各个渠道的购买个数来判断采用方式(web、app、both)。

分别统计单个渠道,多个渠道数据,进行 合并。

union all

union all、分组聚合、数据去重、笛卡尔积

-- 答案
SELECT purchase_date,
       channel,
       SUM(sum_amount) AS sum_amount,
       SUM(user_cnt) AS total_users
FROM
(
	SELECT purchase_date,
	       MIN(channel) AS channel,
	       COUNT(DISTINCT user_id) AS user_cnt,
	       SUM(purchase_amount) AS sum_amount
	FROM purchase_channel
	GROUP BY purchase_date, user_id    
	HAVING COUNT(DISTINCT channel) = 1
	UNION ALL
	SELECT purchase_date,
	       'both' AS channel,       
	       COUNT(DISTINCT user_id) AS user_cnt,
	       SUM(purchase_amount) AS sum_amount
	FROM purchase_channel
	GROUP BY purchase_date, user_id
	HAVING COUNT(DISTINCT channel) > 1
) aa
GROUP BY purchase_date, channel;

此种结果只是将存在的日期、渠道列出来,未包括所有的,还待优化。

所有日期与渠道的笛卡尔积,再进行 关联操作即可。

left join
-- 最终答案
SELECT t1.purchase_date,
       t1.channel,
       t2.sum_amount,
       t2.total_users
FROM
(
	SELECT DISTINCT a.purchase_date,
			b.channel
	FROM purchase_channel a,
	(
		SELECT 'app' AS channel
		UNION ALL
		SELECT 'web' AS channel
		UNION ALL
		SELECT 'both' AS channel
	) b
) t1
LEFT JOIN
(
	SELECT purchase_date,
	       channel,
	       SUM(sum_amount) AS sum_amount,
	       SUM(user_cnt) AS total_users
	FROM
	(
		SELECT purchase_date,
		       MIN(channel) AS channel,
		       COUNT(DISTINCT user_id) AS user_cnt,
		       SUM(purchase_amount) AS sum_amount
		FROM purchase_channel
		GROUP BY purchase_date, user_id    
		HAVING COUNT(DISTINCT channel) = 1
		UNION ALL
		SELECT purchase_date,
		       'both' AS channel,       
		       COUNT(DISTINCT user_id) AS user_cnt,
		       SUM(purchase_amount) AS sum_amount
		FROM purchase_channel
		GROUP BY purchase_date, user_id
		HAVING COUNT(DISTINCT channel) > 1
	) aa
	GROUP BY purchase_date, channel
) t2
ON t1.purchase_date = t2.purchase_date
AND t1.channel = t2.channel
ORDER BY purchase_date, channel;
/*
purchase_date	channel	sum_amount	total_users
2021-03-14	app	400	1
2021-03-14	both	300	1
2021-03-14	web	\N	\N
2021-03-15	app	1900	2
2021-03-15	both	\N	\N
2021-03-15	web	3000	1
*/

参考链接:数据分析笔试题06

参考链接:数据分析笔试题06

————————

1、 The second highest paid employee in each department

MySQL8. 0 test run.

MySQL8. 0 test run.

1. Title

There is a company employee information table with four fields:

employee
employee_id varchar -- 员工ID
employee_name varchar -- 员工姓名
employee_salary int -- 员工薪酬
department varchar -- 部门ID

Another department information table has two fields:

department
department_id varchar -- 部门ID
department_name varchar -- 部门名称

Please query the employee with the second highest salary in each department and output four fields: employee ID, employee name, employee salary and employee department name.

2. Table building

-- 建表
DROP TABLE IF EXISTS employee;
CREATE TABLE employee(
employee_id VARCHAR(8),
employee_name VARCHAR(8),
employee_salary INT,
department VARCHAR(8)
)
ENGINE = INNODB
DEFAULT CHARSET = utf8mb4;
-- 插入数据
INSERT INTO employee
(employee_id, employee_name, employee_salary, department)
VALUE ('a001','Bob',7000,'b1')
     ,('a002','Jack',9000,'b1')
     ,('a003','Alice',8000,'b2')
     ,('a004','Ben',5000,'b2')
     ,('a005','Candy',4000,'b2')
     ,('a006','Allen',5000,'b2')
     ,('a007','Linda',10000,'b3');
     
-- 建表
DROP TABLE IF EXISTS department;
CREATE TABLE department(
department_id VARCHAR(8),
department_name VARCHAR(8)
)
ENGINE = INNODB
DEFAULT CHARSET = utf8mb4;
-- 插入数据
INSERT INTO
department (department_id,department_name) 
VALUE ('b1','Sales')
     ,('b2','IT')
     ,('b3','Product');

3. Answer

Use the window function to sort groups.

rank() over(partition by xxx order by xxx)

< strong > window function, sub query, multi table connection < / strong >

-- 答案
SELECT a.employee_id,
       a.employee_name,
       a.employee_salary,
       b.department_name
FROM
(
	SELECT *,
	       rank() over (PARTITION BY department ORDER BY employee_salary DESC) AS rn
	FROM employee
) a
LEFT JOIN department b
ON a.department = b.department_id
WHERE a.rn = 2;
'''
employee_id	employee_name	employee_salary	department_name
a001	Bob	7000	Sales
a004	Ben	5000	IT
a006	Allen	5000	IT
'''

Of course, you can also perform association before grouping and sorting.

2、 Website login interval statistics

1. Title

There is a website login status table, which records user login information and has two fields:

login_info
user_id varchar -- 用户ID
login_time date -- 用户登录日期  2021-1-15

Calculate the number of times when the login date interval of each user is less than 5 days, and output two fields: user ID and number of times.

2. Table building

-- 建表
DROP TABLE IF EXISTS login_info;
CREATE TABLE login_info(
user_id VARCHAR(8),
login_time DATE
)
ENGINE = INNODB
DEFAULT CHARSET = utf8mb4;
-- 插入数据
INSERT INTO
login_info (user_id,login_time) 
VALUE ('a001','2021-01-01')
,('b001','2021-01-01')
,('a001','2021-01-03')
,('a001','2021-01-06')
,('a001','2021-01-07')
,('b001','2021-01-07')
,('a001','2021-01-08')
,('a001','2021-01-09')
,('b001','2021-01-09')
,('b001','2021-01-10')
,('b001','2021-01-15')
,('a001','2021-01-16')
,('a001','2021-01-18')
,('a001','2021-01-19')
,('b001','2021-01-20')
,('a001','2021-01-23');

3. Answer

Use the offset function to process the time interval.

lead()

< strong > window function, sub query, grouping aggregation, time function < / strong >

-- 答案
SELECT a.user_id,
       COUNT(1) AS cnt
FROM
(
    SELECT user_id,
           login_time,
           lead(login_time) over (PARTITION BY user_id ORDER BY login_time) AS next_login_time
    FROM login_info
) a
WHERE TIMESTAMPDIFF(DAY, login_time, next_login_time) < 5
GROUP BY user_id;
'''
user_id	cnt
a001	8
b001	2
'''

Note: the difference between and is that one moves forward and the other moves backward.

lag
lead

3、 User purchase channel analysis

1. Title

There is a user purchase information table, which records the user’s purchase information and has four fields:

purchase_channel
user_id varchar -- 用户ID
channel varchar -- 渠道
purchase_date date -- 购买日期
purchase_amount int -- 购买金额

Please check the number of users who only use mobile phone, web page and two channels at the same time, and the total shopping amount.

Even if the user doesn’t need to display the information of the channel on a certain day.

Output: 4 fields: date, purchase channel, total purchase amount and number of different users.

2. Table building

-- 建表
DROP TABLE IF EXISTS purchase_channel;
CREATE TABLE purchase_channel(
user_id VARCHAR(8),
channel VARCHAR(8),
purchase_date DATE,
purchase_amount INT
)
ENGINE = INNODB
DEFAULT CHARSET = utf8mb4;
-- 插入数据
INSERT INTO
purchase_channel (user_id,channel,purchase_date,purchase_amount) 
VALUE ('a001','app','2021-03-14',200)
     ,('a001','web','2021-03-14',100)
     ,('a002','app','2021-03-14',400)
     ,('a001','web','2021-03-15',3000)
     ,('a002','app','2021-03-15',900)
     ,('a003','app','2021-03-15',1000);

3. Answer

Group according to the user ID and date, count the number of users’ purchases in each channel to judge the adoption method (web, app, both).

The data of a single channel and multiple channels are counted and consolidated respectively.

union all

< strong > union all, grouping aggregation, data De duplication, Cartesian product < / strong >

-- 答案
SELECT purchase_date,
       channel,
       SUM(sum_amount) AS sum_amount,
       SUM(user_cnt) AS total_users
FROM
(
	SELECT purchase_date,
	       MIN(channel) AS channel,
	       COUNT(DISTINCT user_id) AS user_cnt,
	       SUM(purchase_amount) AS sum_amount
	FROM purchase_channel
	GROUP BY purchase_date, user_id    
	HAVING COUNT(DISTINCT channel) = 1
	UNION ALL
	SELECT purchase_date,
	       'both' AS channel,       
	       COUNT(DISTINCT user_id) AS user_cnt,
	       SUM(purchase_amount) AS sum_amount
	FROM purchase_channel
	GROUP BY purchase_date, user_id
	HAVING COUNT(DISTINCT channel) > 1
) aa
GROUP BY purchase_date, channel;

This result only lists the existing dates and channels, does not include all, and needs to be optimized.

The Cartesian product of all dates and channels can be associated again.

left join
-- 最终答案
SELECT t1.purchase_date,
       t1.channel,
       t2.sum_amount,
       t2.total_users
FROM
(
	SELECT DISTINCT a.purchase_date,
			b.channel
	FROM purchase_channel a,
	(
		SELECT 'app' AS channel
		UNION ALL
		SELECT 'web' AS channel
		UNION ALL
		SELECT 'both' AS channel
	) b
) t1
LEFT JOIN
(
	SELECT purchase_date,
	       channel,
	       SUM(sum_amount) AS sum_amount,
	       SUM(user_cnt) AS total_users
	FROM
	(
		SELECT purchase_date,
		       MIN(channel) AS channel,
		       COUNT(DISTINCT user_id) AS user_cnt,
		       SUM(purchase_amount) AS sum_amount
		FROM purchase_channel
		GROUP BY purchase_date, user_id    
		HAVING COUNT(DISTINCT channel) = 1
		UNION ALL
		SELECT purchase_date,
		       'both' AS channel,       
		       COUNT(DISTINCT user_id) AS user_cnt,
		       SUM(purchase_amount) AS sum_amount
		FROM purchase_channel
		GROUP BY purchase_date, user_id
		HAVING COUNT(DISTINCT channel) > 1
	) aa
	GROUP BY purchase_date, channel
) t2
ON t1.purchase_date = t2.purchase_date
AND t1.channel = t2.channel
ORDER BY purchase_date, channel;
/*
purchase_date	channel	sum_amount	total_users
2021-03-14	app	400	1
2021-03-14	both	300	1
2021-03-14	web	\N	\N
2021-03-15	app	1900	2
2021-03-15	both	\N	\N
2021-03-15	web	3000	1
*/

Reference link: data analysis written test question 06

Reference link: data analysis written test question 06