SELECT t_msg.msg_id,t_msg.content , ...... FROM t_msg LEFT JOIN t_user ON t_msg.user_id = t_user.user_id LEFT JOIN t_group ON t_msg.group_id = t_group.group_id WHERE t_msg.gmt_modified >= date_sub('2018-05-20 09:31:45', INTERVAL 30 SECOND)
SELECT t_msg.msg_id,t_msg.content , ...... FROM t_msg LEFT JOIN t_user ON t_msg.user_id = t_user.user_id LEFT JOIN t_group ON t_msg.group_id = t_group.group_id WHERE t_user.gmt_modified >= date_sub('2018-04-29 09:31:44', INTERVAL 30 SECOND)
SELECT t_msg.msg_id,t_msg.content , ...... FROM t_msg LEFT JOIN t_user ON t_msg.user_id = t_user.user_id LEFT JOIN t_group ON t_msg.group_id = t_group.group_id WHERE t_group.gmt_modified >= date_sub('2018-04-29 09:31:44', INTERVAL 30 SECOND)
SELECT u.id userId, u.mobile, u.created_date createdDate FROM `user` u LEFT JOIN user_cash_detail ucd ON u.id= ucd.user_id WHERE 1=1 AND EXISTS( SELECT 1 FROM borrow b WHERE b.user_id= u.id AND b.borrow_no LIKE '202001011212XXX%') ORDER BY u.id limit 13;
SELECT distinct u.id userId, u.mobile, u.created_date createdDate FROM borrow b join `user` u on b.user_id= u.id where b.borrow_no LIKE '202001011212XXX%' order by u.id limit 13
create table user ( id int not null auto_increment, mobile varchar(20), created_date datetime, padding varchar(2000), primary key(id) ) engine=innodb;
create table user_cash_detail( id int not null auto_increment, user_id int not null, padding varchar(2000), primary key(id), key idx_userid(user_id) ) engine=innodb;
create table borrow( id int not null auto_increment, user_id int not null, borrow_no varchar(30), padding varchar(2000), primary key(id), key idx_user_id(user_id), key idx_borrowno(borrow_no) ) engine=innodb;
insert into user(id, mobile, created_date, padding) select 1000000 + n, 13500000000 + n, date_add('2024-01-01 00:00:00', interval n hour), rpad('', 1000, 'abcd ') from numbers;
insert into user_cash_detail(user_id, padding) select 1000000 + n, rpad('', 1000, 'abcd ') from numbers;
insert into borrow(user_id, borrow_no, padding) select 1000000 + n - n % 2, date_format(date_add('2019-06-01 00:00:00', interval n hour), '%Y%m%d%H%i%s'), rpad('', 1000, 'abcd ') from numbers;
在8.0中,这个SQL自动转换成了半连接。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
explain> SELECT u.id userId, u.mobile, u.created_date createdDate FROM user u LEFT JOIN user_cash_detail ucd ON u.id= ucd.user_id WHERE 1= 1 AND EXISTS( SELECT 1 FROM borrow b WHERE b.user_id= u.id AND b.borrow_no LIKE '202007201%') ORDER BY u.id limit 13;
+----+--------------+-------------+--------+--------------------------+--------------+---------+---------------------+------+----------+---------------------------------+ | id | select_type | table | type | possible_keys | key | key_len | ref | rows | filtered | Extra | +----+--------------+-------------+--------+--------------------------+--------------+---------+---------------------+------+----------+---------------------------------+ | 1 | SIMPLE | <subquery2> | ALL | NULL | NULL | NULL | NULL | NULL | 100.00 | Using temporary; Using filesort | | 1 | SIMPLE | u | eq_ref | PRIMARY | PRIMARY | 4 | <subquery2>.user_id | 1 | 100.00 | NULL | | 1 | SIMPLE | ucd | ref | idx_userid | idx_userid | 4 | <subquery2>.user_id | 1 | 100.00 | Using where; Using index | | 2 | MATERIALIZED | b | range | idx_user_id,idx_borrowno | idx_borrowno | 123 | NULL | 10 | 100.00 | Using index condition | +----+--------------+-------------+--------+--------------------------+--------------+---------+---------------------+------+----------+---------------------------------+
mysql> create table mysql_stat( id int not null auto_increment, tenant_id int not null, instance_name varchar(30) not null, check_time datetime not null, padding varchar(200), primary key(id), key idx_tenantid_instname_checktime(tenant_id, instance_name, check_time) ) engine=innodb;
mysql> insert into mysql_stat(tenant_id, instance_name, check_time, padding) select 1, 'dtstack-dev1:3306', date_add('2024-01-01 00:00:00', interval n * 10 second) as check_time, rpad('x', 100, 'abcd ') from numbers_1m
mysql> explain SELECT * FROM mysql_stat WHERE tenant_id = 1 and instance_name='dtstack-dev1:3306' and check_time >='2024-02-01' and check_time <= '2024-03-01' ORDER BY check_time desc limit 1\G
# Query_time: 1.245558 Lock_time: 0.000281 Rows_sent: 1 Rows_examined: 481600 SET timestamp=1725604597; SELECT * FROM mysql_stat WHERE tenant_id = 1 and instance_name='dtstack-dev1:3306' and check_time >='2024-02-01' and check_time <= '2024-03-01' ORDER BY check_time desc limit 1;
我们给这个SQL加上一个force index提示。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
mysql> explain SELECT * FROM mysql_stat force index(idx_tenantid_instname_checktime) WHERE tenant_id = 1 and instance_name='dtstack-dev1:3306' and check_time >='2024-02-01' and check_time <= '2024-03-01' ORDER BY check_time desc limit 1\G
*************************** 1. row *************************** id: 1 select_type: SIMPLE table: mysql_stat type: range possible_keys: idx_tenantid_instname_checktime key: idx_tenantid_instname_checktime key_len: 41 ref: NULL rows: 468284 Extra: Using index condition
# Query_time: 0.000468 Lock_time: 0.000162 Rows_sent: 1 Rows_examined: 1 SET timestamp=1725604885; SELECT * FROM mysql_stat force index(idx_tenantid_instname_checktime) WHERE tenant_id = 1 and instance_name='dtstack-dev1:3306' and check_time >='2024-02-01' and check_time <= '2024-03-01' ORDER BY check_time desc limit 1;
SELECT * FROM t_exp t WHERE t.link_id in (x,x,x, ......) and t.com_id = xx and t.expense in (x,x,x) and t.link_type= 1 and ledger_status= 1 AND status = 1
Warning 3170 Memory capacity of N bytes for 'range_optimizer_max_mem_size' exceeded. Range optimization was not done for this query.
根据官方文档的描述,每个OR条件使用230字节,每个AND条件使用125字节,单个字段上的IN条件,会转换成OR。多个字段使用IN,会以组合形式展开,比如 c1 in (a,b) and c2 in (x,y,z)会转换成 (c1 = a and c2 = x) or (c1 = a and c2=y) or ( c1=a and c2=z) or (c1 = b and c2 = x) or (c1 = b and c2=y) or ( c1=b and c2=z) 。
我们案例中的SQL,有2个in条件会在range优化时展开。
1 2 3 4 5 6 7 8
SELECT * FROM t_exp t WHERE t.link_id in (x,x,x,…) and t.com_id = xx and t.expense in (x,x,x) and t.link_type= 1 and ledger_status= 1 AND status = 1
select * FROM trans_001 a LEFT JOIN purchase_001 b
ON a.serial_no = b.order_no AND a.dist_name = b.inst_id AND a.alino = b.alino
WHERE b.order_no IS NULL AND a.serial_no >= '201809200000057858291ALI' AND a.serial_no <= '201809216100342526601ALI' AND a.business_code = 'xx' AND a.trans_date = '20180921';
MySQL优化器在评估一个索引的访问效率时,要么使用Index Dive,要么使用索引统计信息。在表连接时,如果连接条件是一个范围,那么对于驱动表中的每一行记录,都需要进行一次Index Dive,来评估被驱动表里满足条件的记录数。这种情况下,执行计划的Extra 列中会显示“Range checked for each record”,很多时候,这也是一个查询性能不好的信号。
我们来看下面这个例子。
1 2 3 4 5 6 7 8 9 10 11
SELECT t1.ppd , ( SELECT sum(amount) AS total_amount FROM `order` WHERE pay_date <= t1.ppd AND pay_date >= date_sub(t1.ppd, INTERVAL 7 DAY) ) AS total_amount FROM ( select distinct pay_date as ppd from `order` where pay_date is not null ) t1
这样的SQL还有优化的空间吗?我们来尝试一下。先准备一点测试数据。
1 2 3 4 5 6 7 8 9 10 11 12 13 14
create table t_order( id int not null auto_increment, pay_date date not null, amount int not null, padding varchar(2000), primary key(id), key idx_paydate(pay_date) ) engine=innodb;
insert into t_order(pay_date, amount, padding) select date_add('2023-01-01', interval n % 365 day), 2000 + n % 1000, rpad('', 1000, 'abcd ') from numbers;
在我的测试环境中,原始SQL耗时35秒。
1 2 3 4 5 6 7 8 9 10 11 12 13
# Query_time: 35.923849 # Lock_time: 0.000086 # Rows_sent: 365 # Rows_examined: 3650365 SELECT t1.ppd , (SELECT sum(amount) AS total_amount FROM t_order WHERE pay_date <= t1.ppd AND pay_date >= date_sub(t1.ppd, INTERVAL 7 DAY) ) AS total_amount FROM ( select distinct pay_date as ppd from t_order where pay_date is not null ) t1;
我尝试把SQL改写成表连接,执行时间减少了一些。
1 2 3 4 5 6 7 8 9 10 11 12
# Query_time: 7.566888 # Lock_time: 0.000050 # Rows_sent: 365 # Rows_examined: 365 SELECT t1.ppd, sum(amount) AS leiji_amount FROM (select distinct pay_date as ppd from t_order where pay_date is not null) t1 , t_order t2 WHERE t1.ppd >= t2. pay_date AND t2.pay_date >= date_sub(t1.ppd, INTERVAL 7 DAY) GROUP BY t1.ppd;
就这个SQL的逻辑,我们还可以使用MySQL 8.0的窗口函数,这样SQL执行又更快了一些。
1 2 3 4 5 6 7 8 9 10 11 12 13
# Query_time: 0.776545 # Lock_time: 0.000046 # Rows_sent: 365 # Rows_examined: 730 select pay_date, sum(total_amount) over( order by pay_date range BETWEEN INTERVAL 7 DAY PRECEDING AND CURRENT ROW) from ( select pay_date, sum(amount) as total_amount from t_order where pay_date is not null group by pay_date ) t;
考虑到SQL中只用到了2个字段,我们给这个SQL建立一个联合索引。
1
alter table t_order add key idx_paydate_amount(pay_date, amount);
这样执行的速度又提升了一些。
1 2 3 4 5 6 7 8 9 10 11 12 13
# Query_time: 0.090399 # Lock_time: 0.000026 # Rows_sent: 365 # Rows_examined: 730 select pay_date, sum(total_amount) over( order by pay_date range BETWEEN INTERVAL 7 DAY PRECEDING AND CURRENT ROW) from ( select pay_date, sum(amount) as total_amount from t_order where pay_date is not null group by pay_date ) t;