SQL Server死锁排查实战指南
一、死锁基本概念
1.1 什么是死锁
- 两个或多个进程相互等待对方释放资源
- 每个进程都持有对方需要的资源
- SQL Server自动检测并选择"牺牲品"进程终止
1.2 死锁四要素
二、死锁监控工具
2.1 系统视图和函数
-- 1. 系统健康会话(SQL Server 2012+)
SELECT * FROM sys.dm_xe_session_targets xst
INNER JOIN sys.dm_xe_sessions xs ON xs.address = xst.event_session_address
WHERE xs.name = 'system_health'
-- 2. 查看当前死锁信息
SELECT * FROM sys.dm_tran_locks
WHERE request_status = 'CONVERT'
-- 3. 查看阻塞链
SELECT
blocking.session_id AS blocking_session_id,
blocked.session_id AS blocked_session_id,
blocking_text = blocking.text,
blocked_text = blocked.text
FROM sys.dm_exec_requests blocked
INNER JOIN sys.dm_exec_requests blocking
ON blocking.session_id = blocked.blocking_session_id
CROSS APPLY sys.dm_exec_sql_text(blocked.sql_handle) blocked
CROSS APPLY sys.dm_exec_sql_text(blocking.sql_handle) blocking
2.2 扩展事件跟踪
-- 创建死锁监控扩展事件
CREATE EVENT SESSION [Deadlock_Monitor] ON SERVER
ADD EVENT sqlserver.xml_deadlock_report
ADD TARGET package0.event_file(
SET filename=N'D:\Logs\Deadlock_Monitor.xel',
max_file_size=(50),
max_rollover_files=(10))
WITH (STARTUP_STATE=ON)
GO
-- 启动事件会话
ALTER EVENT SESSION [Deadlock_Monitor] ON SERVER STATE = START
2.3 SQL Profiler跟踪
- 使用SQL Server Profiler
- 选择"Deadlock graph"事件
- 保存跟踪结果分析
三、死锁信息收集方法
3.1 使用跟踪标志
-- 启用死锁跟踪标志(不推荐生产环境长期使用)
DBCC TRACEON(1204, -1) -- 基本死锁信息
DBCC TRACEON(1222, -1) -- 更详细的XML格式信息
-- 查看错误日志中的死锁信息
EXEC xp_readerrorlog 0, 1, 'deadlock'
3.2 系统健康扩展事件解析
-- 解析system_health中的死锁信息
SELECT
DeadlockEvent = XEvent.value('(data/value)[1]', 'varchar(max)'),
EventTime = XEvent.value('(@timestamp)[1]', 'datetime2')
FROM (
SELECT CAST(target_data AS XML) AS TargetData
FROM sys.dm_xe_session_targets st
INNER JOIN sys.dm_xe_sessions s ON s.address = st.event_session_address
WHERE s.name = 'system_health'
AND st.target_name = 'ring_buffer'
) AS Data
CROSS APPLY TargetData.nodes('RingBufferTarget/event[@name="xml_deadlock_report"]') AS XEventData(XEvent)
WHERE XEvent.value('(@name)[1]', 'varchar(4000)') = 'xml_deadlock_report'
ORDER BY EventTime DESC
四、死锁图形分析
4.1 死锁XML解析
<!-- 死锁XML结构示例 -->
<deadlock>
<victim-list>
<victimProcess id="processId"/>
</victim-list>
<process-list>
<process id="processId" taskpriority="0" logused="0" waitresource="KEY: ..."
waittime="ms" ownerId="transactionId" transactionname="user_transaction"
lasttranstarted="datetime" XDES="0x..." lockMode="X"
schedulerid="1" kpid="pid" status="suspended" spid="spid"
sbid="0" ecid="0" priority="0" trancount="2"
lastbatchstarted="datetime" lastbatchcompleted="datetime"
clientapp="appName" hostname="hostname" hostpid="pid"
loginname="login" isolationlevel="read committed (2)"
xactid="transactionId" currentdb="dbId" lockTimeout="milliseconds"
clientoption1="value" clientoption2="value">
<executionStack>
<frame line="lineNumber" stmtstart="startOffset"
stmtend="endOffset" sqlhandle="handle"/>
</executionStack>
<inputbuf>
SQL语句内容
</inputbuf>
</process>
</process-list>
<resource-list>
<keylock hobtid="objectId" dbid="dbId" objectname="objectName"
indexname="indexName" id="lockId" mode="X" associatedObjectId="objectId">
<owner-list>
<owner id="processId" mode="X"/>
</owner-list>
<waiter-list>
<waiter id="processId" mode="X" requestType="wait"/>
</waiter-list>
</keylock>
</resource-list>
</deadlock>
4.2 关键信息提取
-- 解析死锁XML获取关键信息
WITH DeadlockData AS (
SELECT
CAST(event_data AS XML) AS DeadlockGraph,
event_date
FROM sys.fn_xe_file_target_read_file(
'D:\Logs\Deadlock_Monitor*.xel',
NULL, NULL, NULL)
WHERE object_name = 'xml_deadlock_report'
)
SELECT
event_date,
DeadlockGraph.value('(deadlock/victim-list/victimProcess/@id)[1]', 'varchar(50)') AS VictimProcessId,
DeadlockGraph.query('deadlock/process-list/process/inputbuf') AS InputBuffer,
DeadlockGraph.query('deadlock/resource-list') AS Resources
FROM DeadlockData
五、常见死锁场景及解决方案
5.1 顺序访问死锁
场景: 两个事务以不同顺序访问相同资源
-- 事务1
BEGIN TRANSACTION
UPDATE TableA SET Column1 = 'Value' WHERE ID = 1
UPDATE TableB SET Column1 = 'Value' WHERE ID = 1
COMMIT
-- 事务2(反顺序)
BEGIN TRANSACTION
UPDATE TableB SET Column1 = 'Value' WHERE ID = 1
UPDATE TableA SET Column1 = 'Value' WHERE ID = 1
COMMIT
解决方案:
- 统一访问顺序
- 使用表提示 WITH (UPDLOCK, HOLDLOCK)
- 应用层控制事务顺序
5.2 缺失索引死锁
场景: 表扫描导致大量锁升级
-- 查看缺失索引
SELECT
migs.avg_total_user_cost * (migs.avg_user_impact / 100.0) * (migs.user_seeks + migs.user_scans) AS improvement_measure,
'CREATE INDEX [IX_' + OBJECT_NAME(mid.object_id) + '_' + REPLACE(REPLACE(REPLACE(ISNULL(mid.equality_columns,''),', ','_'),'[',''),']','') + ']' +
ISNULL('_' + REPLACE(REPLACE(REPLACE(ISNULL(mid.inequality_columns,''),', ','_'),'[',''),']',''), '') +
' ON ' + mid.statement +
' (' + ISNULL (mid.equality_columns,'') +
CASE WHEN mid.equality_columns IS NOT NULL AND mid.inequality_columns IS NOT NULL THEN ',' ELSE '' END +
ISNULL (mid.inequality_columns, '') + ')' +
ISNULL (' INCLUDE (' + mid.included_columns + ')', '') AS create_index_statement
FROM sys.dm_db_missing_index_groups mig
INNER JOIN sys.dm_db_missing_index_group_stats migs ON migs.group_handle = mig.index_group_handle
INNER JOIN sys.dm_db_missing_index_details mid ON mig.index_handle = mid.index_handle
ORDER BY improvement_measure DESC
5.3 外键死锁
场景: 外键约束检查导致的锁争用
解决方案:
- 使用 WITH NOLOCK 提示(谨慎使用)
- 优化外键索引
- 考虑禁用外键检查(特定场景)
5.4 页拆分死锁
场景: INSERT操作导致页拆分
-- 监控页拆分
SELECT
OBJECT_NAME(object_id) AS TableName,
index_id,
leaf_allocation_count,
nonleaf_allocation_count
FROM sys.dm_db_index_operational_stats(DB_ID(), NULL, NULL, NULL)
WHERE leaf_allocation_count > 0
ORDER BY leaf_allocation_count DESC
-- 解决方法:调整填充因子
ALTER INDEX ALL ON TableName REBUILD WITH (FILLFACTOR = 90)
六、预防和优化策略
6.1 索引优化
-- 1. 检查索引碎片
SELECT
OBJECT_NAME(ips.object_id) AS TableName,
i.name AS IndexName,
ips.avg_fragmentation_in_percent
FROM sys.dm_db_index_physical_stats(
DB_ID(), NULL, NULL, NULL, 'LIMITED') ips
INNER JOIN sys.indexes i ON i.object_id = ips.object_id
AND i.index_id = ips.index_id
WHERE ips.avg_fragmentation_in_percent > 30
-- 2. 创建覆盖索引减少键锁
CREATE INDEX IX_Covering ON Orders(CustomerID, OrderDate)
INCLUDE (TotalAmount, Status)
6.2 事务优化
-- 1. 缩短事务时间
BEGIN TRANSACTION
-- 尽量减少事务内的业务逻辑
-- 只包含必要的数据库操作
COMMIT TRANSACTION
-- 2. 使用行版本控制(READ_COMMITTED_SNAPSHOT)
ALTER DATABASE YourDatabase
SET READ_COMMITTED_SNAPSHOT ON
6.3 锁提示使用
-- 1. 使用UPDLOCK避免死锁
SELECT * FROM Table WITH (UPDLOCK, ROWLOCK) WHERE ID = 1
-- 2. 使用NOLOCK(只读查询,注意脏读风险)
SELECT * FROM Table WITH (NOLOCK) WHERE Status = 1
-- 3. 使用READPAST跳过锁定的行
SELECT * FROM Table WITH (READPAST) WHERE Status = 1
6.4 应用层优化
# PowerShell脚本监控死锁
$server = "YourServer"
$database = "YourDatabase"
$query = @"
SELECT
DATEADD(mi, DATEDIFF(mi, GETUTCDATE(), GETDATE()), xed.event_date) AS event_date,
xed.event_data.value('(deadlock/victim-list/victimProcess/@id)[1]', 'varchar(50)') AS victim_id,
xed.event_data.query('deadlock/process-list/process/inputbuf') AS input_buffer
FROM (
SELECT CAST(target_data AS XML) AS target_data
FROM sys.dm_xe_session_targets st
INNER JOIN sys.dm_xe_sessions s ON s.address = st.event_session_address
WHERE s.name = 'system_health'
AND st.target_name = 'ring_buffer'
) AS data
CROSS APPLY target_data.nodes('RingBufferTarget/event[@name="xml_deadlock_report"]') AS xed(event_data)
"@
Invoke-Sqlcmd -ServerInstance $server -Database $database -Query $query
七、紧急处理流程
7.1 实时死锁处理
-- 1. 查看当前死锁
EXEC sp_who2
EXEC sp_lock
-- 2. 终止阻塞进程(谨慎使用)
KILL [SPID]
-- 3. 查看死锁链
SELECT
wt.blocking_session_id,
wt.wait_type,
wt.wait_duration_ms,
wt.resource_description,
t.text AS [SQL Text]
FROM sys.dm_os_waiting_tasks wt
INNER JOIN sys.dm_exec_requests er ON wt.session_id = er.session_id
CROSS APPLY sys.dm_exec_sql_text(er.sql_handle) t
WHERE wt.blocking_session_id IS NOT NULL
7.2 自动化监控脚本
-- 创建死锁监控表
CREATE TABLE DeadlockHistory (
ID INT IDENTITY(1,1) PRIMARY KEY,
EventTime DATETIME2,
DeadlockGraph XML,
Processed BIT DEFAULT 0
)
-- 创建监控作业
CREATE PROCEDURE MonitorDeadlocks
AS
BEGIN
INSERT INTO DeadlockHistory (EventTime, DeadlockGraph)
SELECT
DATEADD(mi, DATEDIFF(mi, GETUTCDATE(), GETDATE()), xed.event_date) AS event_date,
xed.event_data
FROM (
SELECT CAST(target_data AS XML) AS target_data
FROM sys.dm_xe_session_targets st
INNER JOIN sys.dm_xe_sessions s ON s.address = st.event_session_address
WHERE s.name = 'system_health'
AND st.target_name = 'ring_buffer'
) AS data
CROSS APPLY target_data.nodes('RingBufferTarget/event[@name="xml_deadlock_report"]') AS xed(event_data)
WHERE xed.event_data IS NOT NULL
END
八、最佳实践总结
监控先行: 建立持续的死锁监控机制
索引优化: 确保合适的索引覆盖查询
事务精简: 缩短事务时间,减少锁持有时间
访问顺序: 统一资源访问顺序
隔离级别: 选择合适的隔离级别
批量操作: 大量更新时使用批量提交
应用重试: 实现应用层的重试逻辑
定期评审: 定期分析死锁报告并优化
九、工具推荐
SQL Server Management Studio: 内置死锁图形查看器
SQL Sentry Plan Explorer: 免费的死锁分析工具
Redgate SQL Monitor: 商业监控工具
Spotlight on SQL Server: 性能监控工具
自定义PowerShell脚本: 灵活定制监控需求
通过以上指南,您可以系统性地排查和解决SQL Server死锁问题。关键是建立预防机制、快速响应流程和持续优化策略。