SQL Server 数据库死锁处理超详细攻略
一、死锁概述
1.1 什么是死锁
死锁是指两个或多个事务相互等待对方持有的资源,导致所有事务都无法继续执行的状态。
1.2 死锁的必要条件(Coffman条件)
- 互斥条件:资源只能被一个进程使用
- 占有并等待:进程已持有资源,同时请求新资源
- 不可剥夺:资源不能被强制从持有进程中夺走
- 循环等待:存在进程-资源的循环等待链
二、死锁检测与监控
2.1 系统视图和扩展事件
-- 1. 使用系统视图查看当前死锁信息
SELECT * FROM sys.dm_tran_locks;
SELECT * FROM sys.dm_os_waiting_tasks;
-- 2. 查询最近发生的死锁
SELECT
event_date,
CAST(event_data AS XML) AS DeadlockGraph
FROM sys.fn_xe_file_target_read_file(
'system_health*.xel',
NULL, NULL, NULL
)
WHERE event_data LIKE '%deadlock%';
-- 3. 创建死锁监控扩展事件
CREATE EVENT SESSION [Deadlock_Monitor]
ON SERVER
ADD EVENT sqlserver.xml_deadlock_report
ADD TARGET package0.event_file(SET filename=N'C:\Logs\Deadlock_Monitor.xel')
WITH (STARTUP_STATE=ON);
GO
2.2 SQL Server Profiler 跟踪死锁
新建跟踪
选择事件:Deadlock graph、Lock:Deadlock、Lock:Deadlock Chain
保存跟踪结果分析
2.3 实时监控脚本
-- 实时监控死锁
SELECT
DATEADD(ms, -1 * (SELECT cpu_ticks/(cpu_ticks/ms_ticks)
FROM sys.dm_os_sys_info),
DATEADD(ms, -1 * ([timestamp]), GETDATE())) AS [Deadlock_Time],
CAST(event_data AS XML) AS [Deadlock_Graph]
FROM (
SELECT XEvent.query('.') AS [event_data], XEvent.value('(@timestamp)[1]', 'bigint') AS [timestamp]
FROM (
SELECT CAST(target_data AS XML) AS TargetData
FROM sys.dm_xe_session_targets st
JOIN sys.dm_xe_sessions s ON s.address = st.event_session_address
WHERE s.name = 'system_health'
AND st.target_name = 'ring_buffer'
) AS Data
CROSS APPLY TargetData.nodes('RingBufferTarget/event[@name="xml_deadlock_report"]') AS XEventData(XEvent)
) AS tab
ORDER BY Deadlock_Time DESC;
三、死锁分析方法
3.1 解析死锁图XML
-- 解析死锁图信息
WITH DeadlockData AS (
SELECT
event_date,
CAST(event_data AS XML) AS DeadlockGraph
FROM sys.fn_xe_file_target_read_file(
'C:\Logs\Deadlock_Monitor*.xel',
NULL, NULL, NULL
)
WHERE object_name = 'xml_deadlock_report'
)
SELECT
event_date,
DeadlockGraph.value('(/event/@timestamp)[1]', 'datetime') AS [Timestamp],
DeadlockGraph.value('(/event/data[@name="xml_report"]/value/deadlock/process-list/process/@lastbatchstarted)[1]', 'datetime') AS BatchStarted,
DeadlockGraph.value('(/event/data[@name="xml_report"]/value/deadlock/process-list/process/@lasttranstarted)[1]', 'datetime') AS TranStarted,
DeadlockGraph.query('/event/data[@name="xml_report"]/value/deadlock') AS DeadlockDetails
FROM DeadlockData;
3.2 分析死锁原因的关键信息
死锁受害者:被选为牺牲品的事务
等待资源:每个进程正在等待的锁
持有资源:每个进程当前持有的锁
执行的SQL语句
隔离级别
事务开始时间
四、常见死锁场景及解决方案
4.1 场景一:更新顺序不一致
-- 事务1
BEGIN TRANSACTION;
UPDATE TableA SET Col1 = 1 WHERE ID = 1;
UPDATE TableB SET Col2 = 2 WHERE ID = 1;
COMMIT;
-- 事务2(死锁发生)
BEGIN TRANSACTION;
UPDATE TableB SET Col2 = 2 WHERE ID = 1; -- 先更新TableB
UPDATE TableA SET Col1 = 1 WHERE ID = 1; -- 再更新TableA
COMMIT;
解决方案:
-- 方案1:统一更新顺序
CREATE PROCEDURE UpdateBothTables
@TableAId INT,
@TableBId INT
AS
BEGIN
SET NOCOUNT ON;
BEGIN TRANSACTION;
BEGIN TRY
UPDATE TableA SET Col1 = 1 WHERE ID = @TableAId;
UPDATE TableB SET Col2 = 2 WHERE ID = @TableBId;
COMMIT;
END TRY
BEGIN CATCH
ROLLBACK;
THROW;
END CATCH
END;
-- 方案2:使用表提示减少锁争用
UPDATE TableA WITH (ROWLOCK) SET Col1 = 1 WHERE ID = 1;
4.2 场景二:缺失索引导致的表扫描锁
-- 没有索引的查询会导致全表扫描,增加锁冲突
UPDATE Orders SET Status = 'Completed'
WHERE CustomerID = 100 AND OrderDate > '2024-01-01';
解决方案:
-- 创建合适的索引
CREATE INDEX IX_Orders_CustomerID_OrderDate
ON Orders(CustomerID, OrderDate)
INCLUDE (Status);
-- 使用索引提示
UPDATE Orders WITH (INDEX(IX_Orders_CustomerID_OrderDate))
SET Status = 'Completed'
WHERE CustomerID = 100 AND OrderDate > '2024-01-01';
4.3 场景三:读写锁争用
-- 事务1:读操作
BEGIN TRANSACTION;
SELECT * FROM Products WITH (HOLDLOCK) WHERE CategoryID = 1;
-- 长时间持有锁...
WAITFOR DELAY '00:00:10';
COMMIT;
-- 事务2:写操作(死锁)
BEGIN TRANSACTION;
UPDATE Products SET Price = Price * 1.1 WHERE CategoryID = 1;
COMMIT;
解决方案:
-- 方案1:优化事务隔离级别
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
SELECT * FROM Products WHERE CategoryID = 1;
-- 方案2:使用快照隔离
ALTER DATABASE YourDatabase
SET ALLOW_SNAPSHOT_ISOLATION ON;
SET TRANSACTION ISOLATION LEVEL SNAPSHOT;
SELECT * FROM Products WHERE CategoryID = 1;
-- 方案3:使用NOLOCK提示(谨慎使用)
SELECT * FROM Products WITH (NOLOCK) WHERE CategoryID = 1;
4.4 场景四:外键约束导致的锁升级
-- 父表
DELETE FROM Categories WHERE CategoryID = 1;
-- 同时有事务在子表插入
INSERT INTO Products (CategoryID, ProductName) VALUES (1, 'New Product');
解决方案:
-- 1. 检查外键约束的索引
EXEC sp_helpindex 'Products';
-- 2. 在外键列上创建索引
CREATE INDEX IX_Products_CategoryID ON Products(CategoryID);
-- 3. 考虑禁用约束检查(仅在必要时)
ALTER TABLE Products NOCHECK CONSTRAINT FK_Products_Categories;
-- 执行操作...
ALTER TABLE Products CHECK CONSTRAINT FK_Products_Categories;
五、死锁预防策略
5.1 应用程序层预防
// C# 示例:重试逻辑
public void ExecuteWithRetry(Action action, int maxRetries = 3)
{
int retryCount = 0;
while (retryCount < maxRetries)
{
try
{
action();
return;
}
catch (SqlException ex) when (ex.Number == 1205) // 死锁错误号
{
retryCount++;
if (retryCount >= maxRetries)
throw;
// 指数退避
int delay = (int)Math.Pow(2, retryCount) * 100;
Thread.Sleep(delay + new Random().Next(100));
}
}
}
5.2 数据库设计优化
-- 1. 规范化设计,减少冗余
-- 2. 合理设计索引
-- 3. 使用覆盖索引减少书签查找
-- 覆盖索引示例
CREATE INDEX IX_Orders_Covering
ON Orders(CustomerID, OrderDate)
INCLUDE (TotalAmount, Status);
5.3 事务优化
-- 1. 保持事务简短
BEGIN TRANSACTION;
-- 只包含必要的操作
COMMIT;
-- 2. 按相同顺序访问对象
-- 3. 避免用户交互在事务中
-- 4. 使用低隔离级别
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
-- 5. 使用行版本控制
ALTER DATABASE YourDatabase
SET READ_COMMITTED_SNAPSHOT ON;
六、死锁处理工具和脚本
6.1 自动死锁监控和报警
-- 创建死锁监控表
CREATE TABLE DeadlockLog (
Id INT IDENTITY(1,1) PRIMARY KEY,
DeadlockTime DATETIME DEFAULT GETDATE(),
DeadlockGraph XML,
Processed BIT DEFAULT 0
);
-- 创建监控作业
CREATE PROCEDURE MonitorDeadlocks
AS
BEGIN
INSERT INTO DeadlockLog (DeadlockGraph)
SELECT CAST(event_data AS XML)
FROM sys.fn_xe_file_target_read_file(
'system_health*.xel',
NULL, NULL, NULL
)
WHERE event_data LIKE '%deadlock%'
AND CAST(event_data AS XML).value('(/event/@timestamp)[1]', 'datetime')
> DATEADD(HOUR, -1, GETDATE());
END;
-- 配置SQL Agent作业定期执行
6.2 死锁分析报告脚本
-- 生成死锁分析报告
DECLARE @StartDate DATETIME = DATEADD(DAY, -7, GETDATE());
DECLARE @EndDate DATETIME = GETDATE();
SELECT
DeadlockTime,
VictimProcess = DeadlockGraph.value('(/deadlock/victim-list/victimProcess/@id)[1]', 'VARCHAR(50)'),
ProcessCount = DeadlockGraph.value('count(/deadlock/process-list/process)', 'INT'),
ObjectCount = DeadlockGraph.value('count(/deadlock/resource-list/*)', 'INT'),
-- 提取涉及的SQL语句
SQLStatements = STUFF((
SELECT '; ' + ProcessNode.value('(@currentdbname)[1]', 'VARCHAR(128)')
+ '.' + ProcessNode.value('(@currentdbname)[1]', 'VARCHAR(128)')
+ ': ' + ProcessNode.value('(executionStack/frame)[1]', 'VARCHAR(MAX)')
FROM DeadlockGraph.nodes('/deadlock/process-list/process') AS T(ProcessNode)
FOR XML PATH('')
), 1, 2, ''),
DeadlockGraph
FROM DeadlockLog
WHERE DeadlockTime BETWEEN @StartDate AND @EndDate
ORDER BY DeadlockTime DESC;
七、高级死锁处理技术
7.1 使用资源锁提示
-- UPDLOCK:更新锁,防止其他事务获取更新锁或排他锁
SELECT * FROM Table WITH (UPDLOCK) WHERE ID = 1;
-- ROWLOCK:强制行级锁
UPDATE Table WITH (ROWLOCK) SET Column = Value WHERE Condition;
-- PAGLOCK:页级锁
-- TABLOCK:表级锁
-- NOLOCK:脏读(谨慎使用)
7.2 锁超时设置
-- 设置锁超时(毫秒)
SET LOCK_TIMEOUT 5000; -- 5秒
-- 在代码中处理超时
BEGIN TRY
SET LOCK_TIMEOUT 5000;
-- 执行可能阻塞的操作
END TRY
BEGIN CATCH
IF ERROR_NUMBER() = 1222 -- 锁超时错误
BEGIN
-- 处理超时逻辑
PRINT '操作超时,请重试';
END
END CATCH
7.3 使用快照隔离
-- 启用快照隔离
ALTER DATABASE YourDatabase
SET ALLOW_SNAPSHOT_ISOLATION ON;
-- 在事务中使用
SET TRANSACTION ISOLATION LEVEL SNAPSHOT;
BEGIN TRANSACTION;
-- 操作...
COMMIT;
-- 或启用行版本控制的读已提交
ALTER DATABASE YourDatabase
SET READ_COMMITTED_SNAPSHOT ON;
八、性能优化建议
8.1 索引优化
-- 1. 分析缺失索引
SELECT
migs.avg_total_user_cost * (migs.avg_user_impact / 100.0) * (migs.user_seeks + migs.user_scans) AS improvement_measure,
'CREATE INDEX [IX_' + OBJECT_NAME(mid.object_id) + '_' + REPLACE(REPLACE(REPLACE(ISNULL(mid.equality_columns, ''), ', ', '_'), '[', ''), ']', '') +
CASE WHEN mid.inequality_columns IS NULL THEN ''
ELSE '_' + REPLACE(REPLACE(REPLACE(mid.inequality_columns, ', ', '_'), '[', ''), ']', '') END +
'] ON ' + mid.statement + ' (' + ISNULL(mid.equality_columns, '') +
CASE WHEN mid.inequality_columns IS NULL THEN ''
ELSE ',' + mid.inequality_columns END + ')' +
ISNULL(' INCLUDE (' + mid.included_columns + ')', '') AS create_index_statement
FROM sys.dm_db_missing_index_groups mig
INNER JOIN sys.dm_db_missing_index_group_stats migs ON migs.group_handle = mig.index_group_handle
INNER JOIN sys.dm_db_missing_index_details mid ON mig.index_handle = mid.index_handle
ORDER BY migs.avg_total_user_cost * migs.avg_user_impact * (migs.user_seeks + migs.user_scans) DESC;
8.2 查询优化
-- 使用执行计划分析
SET STATISTICS IO ON;
SET STATISTICS TIME ON;
-- 分析实际执行计划
SELECT * FROM YourTable WHERE Conditions;
九、应急预案
9.1 紧急处理步骤
识别死锁:使用sp_who2或活动监视器
终止阻塞进程:谨慎使用KILL命令
-- 查找阻塞进程
SELECT
blocking.session_id AS BlockingSession,
blocked.session_id AS BlockedSession,
wait.wait_type AS WaitType,
blocking_text.text AS BlockingSQL,
blocked_text.text AS BlockedSQL
FROM sys.dm_exec_requests blocked
INNER JOIN sys.dm_exec_requests blocking ON blocking.session_id = blocked.blocking_session_id
CROSS APPLY sys.dm_exec_sql_text(blocking.sql_handle) blocking_text
CROSS APPLY sys.dm_exec_sql_text(blocked.sql_handle) blocked_text
INNER JOIN sys.dm_os_waiting_tasks wait ON wait.session_id = blocked.session_id;
-- 终止进程(谨慎操作)
KILL [Session_ID];
### 9.2 预防措施检查清单
- [ ] 所有事务是否保持简短
- [ ] 是否按相同顺序访问数据库对象
- [ ] 是否使用了合适的索引
- [ ] 是否选择了适当的隔离级别
- [ ] 是否有应用程序级别的重试逻辑
- [ ] 是否定期分析死锁日志
- [ ] 是否设置了适当的锁超时
## 十、最佳实践总结
1. **设计阶段**:
- 规范数据库设计
- 建立合理的索引策略
- 考虑使用行版本控制
2. **开发阶段**:
- 统一数据访问顺序
- 保持事务简短
- 实现重试机制
- 使用适当的隔离级别
3. **运维阶段**:
- 定期监控死锁
- 分析死锁模式
- 优化性能瓶颈
- 定期维护索引
4. **应急响应**:
- 建立监控警报
- 准备应急预案
- 定期演练恢复流程
通过综合运用上述策略和技术,可以有效减少SQL Server中的死锁发生,并在死锁发生时快速定位和解决问题。