琼海市分类吧

SQL Server数据库死锁处理超详细攻略

2026-03-26 20:35:01 浏览次数:0
详细信息

SQL Server 数据库死锁处理超详细攻略

一、死锁概述

1.1 什么是死锁

死锁是指两个或多个事务相互等待对方持有的资源,导致所有事务都无法继续执行的状态。

1.2 死锁的必要条件(Coffman条件)

二、死锁检测与监控

2.1 系统视图和扩展事件

-- 1. 使用系统视图查看当前死锁信息
SELECT * FROM sys.dm_tran_locks;
SELECT * FROM sys.dm_os_waiting_tasks;

-- 2. 查询最近发生的死锁
SELECT 
    event_date,
    CAST(event_data AS XML) AS DeadlockGraph
FROM sys.fn_xe_file_target_read_file(
    'system_health*.xel',
    NULL, NULL, NULL
)
WHERE event_data LIKE '%deadlock%';

-- 3. 创建死锁监控扩展事件
CREATE EVENT SESSION [Deadlock_Monitor] 
ON SERVER 
ADD EVENT sqlserver.xml_deadlock_report
ADD TARGET package0.event_file(SET filename=N'C:\Logs\Deadlock_Monitor.xel')
WITH (STARTUP_STATE=ON);
GO

2.2 SQL Server Profiler 跟踪死锁

新建跟踪 选择事件:Deadlock graph、Lock:Deadlock、Lock:Deadlock Chain 保存跟踪结果分析

2.3 实时监控脚本

-- 实时监控死锁
SELECT 
    DATEADD(ms, -1 * (SELECT cpu_ticks/(cpu_ticks/ms_ticks) 
    FROM sys.dm_os_sys_info), 
    DATEADD(ms, -1 * ([timestamp]), GETDATE())) AS [Deadlock_Time],
    CAST(event_data AS XML) AS [Deadlock_Graph]
FROM (
    SELECT XEvent.query('.') AS [event_data], XEvent.value('(@timestamp)[1]', 'bigint') AS [timestamp]
    FROM (
        SELECT CAST(target_data AS XML) AS TargetData
        FROM sys.dm_xe_session_targets st
        JOIN sys.dm_xe_sessions s ON s.address = st.event_session_address
        WHERE s.name = 'system_health'
          AND st.target_name = 'ring_buffer'
    ) AS Data
    CROSS APPLY TargetData.nodes('RingBufferTarget/event[@name="xml_deadlock_report"]') AS XEventData(XEvent)
) AS tab
ORDER BY Deadlock_Time DESC;

三、死锁分析方法

3.1 解析死锁图XML

-- 解析死锁图信息
WITH DeadlockData AS (
    SELECT 
        event_date,
        CAST(event_data AS XML) AS DeadlockGraph
    FROM sys.fn_xe_file_target_read_file(
        'C:\Logs\Deadlock_Monitor*.xel',
        NULL, NULL, NULL
    )
    WHERE object_name = 'xml_deadlock_report'
)
SELECT 
    event_date,
    DeadlockGraph.value('(/event/@timestamp)[1]', 'datetime') AS [Timestamp],
    DeadlockGraph.value('(/event/data[@name="xml_report"]/value/deadlock/process-list/process/@lastbatchstarted)[1]', 'datetime') AS BatchStarted,
    DeadlockGraph.value('(/event/data[@name="xml_report"]/value/deadlock/process-list/process/@lasttranstarted)[1]', 'datetime') AS TranStarted,
    DeadlockGraph.query('/event/data[@name="xml_report"]/value/deadlock') AS DeadlockDetails
FROM DeadlockData;

3.2 分析死锁原因的关键信息

死锁受害者:被选为牺牲品的事务 等待资源:每个进程正在等待的锁 持有资源:每个进程当前持有的锁 执行的SQL语句 隔离级别 事务开始时间

四、常见死锁场景及解决方案

4.1 场景一:更新顺序不一致

-- 事务1
BEGIN TRANSACTION;
UPDATE TableA SET Col1 = 1 WHERE ID = 1;
UPDATE TableB SET Col2 = 2 WHERE ID = 1;
COMMIT;

-- 事务2(死锁发生)
BEGIN TRANSACTION;
UPDATE TableB SET Col2 = 2 WHERE ID = 1;  -- 先更新TableB
UPDATE TableA SET Col1 = 1 WHERE ID = 1;  -- 再更新TableA
COMMIT;

解决方案

-- 方案1:统一更新顺序
CREATE PROCEDURE UpdateBothTables
    @TableAId INT,
    @TableBId INT
AS
BEGIN
    SET NOCOUNT ON;
    BEGIN TRANSACTION;
    BEGIN TRY
        UPDATE TableA SET Col1 = 1 WHERE ID = @TableAId;
        UPDATE TableB SET Col2 = 2 WHERE ID = @TableBId;
        COMMIT;
    END TRY
    BEGIN CATCH
        ROLLBACK;
        THROW;
    END CATCH
END;

-- 方案2:使用表提示减少锁争用
UPDATE TableA WITH (ROWLOCK) SET Col1 = 1 WHERE ID = 1;

4.2 场景二:缺失索引导致的表扫描锁

-- 没有索引的查询会导致全表扫描,增加锁冲突
UPDATE Orders SET Status = 'Completed' 
WHERE CustomerID = 100 AND OrderDate > '2024-01-01';

解决方案

-- 创建合适的索引
CREATE INDEX IX_Orders_CustomerID_OrderDate 
ON Orders(CustomerID, OrderDate) 
INCLUDE (Status);

-- 使用索引提示
UPDATE Orders WITH (INDEX(IX_Orders_CustomerID_OrderDate))
SET Status = 'Completed' 
WHERE CustomerID = 100 AND OrderDate > '2024-01-01';

4.3 场景三:读写锁争用

-- 事务1:读操作
BEGIN TRANSACTION;
SELECT * FROM Products WITH (HOLDLOCK) WHERE CategoryID = 1;
-- 长时间持有锁...
WAITFOR DELAY '00:00:10';
COMMIT;

-- 事务2:写操作(死锁)
BEGIN TRANSACTION;
UPDATE Products SET Price = Price * 1.1 WHERE CategoryID = 1;
COMMIT;

解决方案

-- 方案1:优化事务隔离级别
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
SELECT * FROM Products WHERE CategoryID = 1;

-- 方案2:使用快照隔离
ALTER DATABASE YourDatabase 
SET ALLOW_SNAPSHOT_ISOLATION ON;

SET TRANSACTION ISOLATION LEVEL SNAPSHOT;
SELECT * FROM Products WHERE CategoryID = 1;

-- 方案3:使用NOLOCK提示(谨慎使用)
SELECT * FROM Products WITH (NOLOCK) WHERE CategoryID = 1;

4.4 场景四:外键约束导致的锁升级

-- 父表
DELETE FROM Categories WHERE CategoryID = 1;
-- 同时有事务在子表插入
INSERT INTO Products (CategoryID, ProductName) VALUES (1, 'New Product');

解决方案

-- 1. 检查外键约束的索引
EXEC sp_helpindex 'Products';

-- 2. 在外键列上创建索引
CREATE INDEX IX_Products_CategoryID ON Products(CategoryID);

-- 3. 考虑禁用约束检查(仅在必要时)
ALTER TABLE Products NOCHECK CONSTRAINT FK_Products_Categories;
-- 执行操作...
ALTER TABLE Products CHECK CONSTRAINT FK_Products_Categories;

五、死锁预防策略

5.1 应用程序层预防

// C# 示例:重试逻辑
public void ExecuteWithRetry(Action action, int maxRetries = 3)
{
    int retryCount = 0;
    while (retryCount < maxRetries)
    {
        try
        {
            action();
            return;
        }
        catch (SqlException ex) when (ex.Number == 1205) // 死锁错误号
        {
            retryCount++;
            if (retryCount >= maxRetries)
                throw;

            // 指数退避
            int delay = (int)Math.Pow(2, retryCount) * 100;
            Thread.Sleep(delay + new Random().Next(100));
        }
    }
}

5.2 数据库设计优化

-- 1. 规范化设计,减少冗余
-- 2. 合理设计索引
-- 3. 使用覆盖索引减少书签查找

-- 覆盖索引示例
CREATE INDEX IX_Orders_Covering 
ON Orders(CustomerID, OrderDate) 
INCLUDE (TotalAmount, Status);

5.3 事务优化

-- 1. 保持事务简短
BEGIN TRANSACTION;
-- 只包含必要的操作
COMMIT;

-- 2. 按相同顺序访问对象
-- 3. 避免用户交互在事务中
-- 4. 使用低隔离级别
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;

-- 5. 使用行版本控制
ALTER DATABASE YourDatabase 
SET READ_COMMITTED_SNAPSHOT ON;

六、死锁处理工具和脚本

6.1 自动死锁监控和报警

-- 创建死锁监控表
CREATE TABLE DeadlockLog (
    Id INT IDENTITY(1,1) PRIMARY KEY,
    DeadlockTime DATETIME DEFAULT GETDATE(),
    DeadlockGraph XML,
    Processed BIT DEFAULT 0
);

-- 创建监控作业
CREATE PROCEDURE MonitorDeadlocks
AS
BEGIN
    INSERT INTO DeadlockLog (DeadlockGraph)
    SELECT CAST(event_data AS XML)
    FROM sys.fn_xe_file_target_read_file(
        'system_health*.xel',
        NULL, NULL, NULL
    )
    WHERE event_data LIKE '%deadlock%'
      AND CAST(event_data AS XML).value('(/event/@timestamp)[1]', 'datetime') 
          > DATEADD(HOUR, -1, GETDATE());
END;

-- 配置SQL Agent作业定期执行

6.2 死锁分析报告脚本

-- 生成死锁分析报告
DECLARE @StartDate DATETIME = DATEADD(DAY, -7, GETDATE());
DECLARE @EndDate DATETIME = GETDATE();

SELECT 
    DeadlockTime,
    VictimProcess = DeadlockGraph.value('(/deadlock/victim-list/victimProcess/@id)[1]', 'VARCHAR(50)'),
    ProcessCount = DeadlockGraph.value('count(/deadlock/process-list/process)', 'INT'),
    ObjectCount = DeadlockGraph.value('count(/deadlock/resource-list/*)', 'INT'),
    -- 提取涉及的SQL语句
    SQLStatements = STUFF((
        SELECT '; ' + ProcessNode.value('(@currentdbname)[1]', 'VARCHAR(128)') 
               + '.' + ProcessNode.value('(@currentdbname)[1]', 'VARCHAR(128)') 
               + ': ' + ProcessNode.value('(executionStack/frame)[1]', 'VARCHAR(MAX)')
        FROM DeadlockGraph.nodes('/deadlock/process-list/process') AS T(ProcessNode)
        FOR XML PATH('')
    ), 1, 2, ''),
    DeadlockGraph
FROM DeadlockLog
WHERE DeadlockTime BETWEEN @StartDate AND @EndDate
ORDER BY DeadlockTime DESC;

七、高级死锁处理技术

7.1 使用资源锁提示

-- UPDLOCK:更新锁,防止其他事务获取更新锁或排他锁
SELECT * FROM Table WITH (UPDLOCK) WHERE ID = 1;

-- ROWLOCK:强制行级锁
UPDATE Table WITH (ROWLOCK) SET Column = Value WHERE Condition;

-- PAGLOCK:页级锁
-- TABLOCK:表级锁
-- NOLOCK:脏读(谨慎使用)

7.2 锁超时设置

-- 设置锁超时(毫秒)
SET LOCK_TIMEOUT 5000; -- 5秒

-- 在代码中处理超时
BEGIN TRY
    SET LOCK_TIMEOUT 5000;
    -- 执行可能阻塞的操作
END TRY
BEGIN CATCH
    IF ERROR_NUMBER() = 1222 -- 锁超时错误
    BEGIN
        -- 处理超时逻辑
        PRINT '操作超时,请重试';
    END
END CATCH

7.3 使用快照隔离

-- 启用快照隔离
ALTER DATABASE YourDatabase 
SET ALLOW_SNAPSHOT_ISOLATION ON;

-- 在事务中使用
SET TRANSACTION ISOLATION LEVEL SNAPSHOT;
BEGIN TRANSACTION;
-- 操作...
COMMIT;

-- 或启用行版本控制的读已提交
ALTER DATABASE YourDatabase 
SET READ_COMMITTED_SNAPSHOT ON;

八、性能优化建议

8.1 索引优化

-- 1. 分析缺失索引
SELECT 
    migs.avg_total_user_cost * (migs.avg_user_impact / 100.0) * (migs.user_seeks + migs.user_scans) AS improvement_measure,
    'CREATE INDEX [IX_' + OBJECT_NAME(mid.object_id) + '_' + REPLACE(REPLACE(REPLACE(ISNULL(mid.equality_columns, ''), ', ', '_'), '[', ''), ']', '') + 
    CASE WHEN mid.inequality_columns IS NULL THEN '' 
         ELSE '_' + REPLACE(REPLACE(REPLACE(mid.inequality_columns, ', ', '_'), '[', ''), ']', '') END +
    '] ON ' + mid.statement + ' (' + ISNULL(mid.equality_columns, '') +
    CASE WHEN mid.inequality_columns IS NULL THEN '' 
         ELSE ',' + mid.inequality_columns END + ')' +
    ISNULL(' INCLUDE (' + mid.included_columns + ')', '') AS create_index_statement
FROM sys.dm_db_missing_index_groups mig
INNER JOIN sys.dm_db_missing_index_group_stats migs ON migs.group_handle = mig.index_group_handle
INNER JOIN sys.dm_db_missing_index_details mid ON mig.index_handle = mid.index_handle
ORDER BY migs.avg_total_user_cost * migs.avg_user_impact * (migs.user_seeks + migs.user_scans) DESC;

8.2 查询优化

-- 使用执行计划分析
SET STATISTICS IO ON;
SET STATISTICS TIME ON;

-- 分析实际执行计划
SELECT * FROM YourTable WHERE Conditions;

九、应急预案

9.1 紧急处理步骤

识别死锁:使用sp_who2或活动监视器 终止阻塞进程:谨慎使用KILL命令

-- 查找阻塞进程
SELECT 
 blocking.session_id AS BlockingSession,
 blocked.session_id AS BlockedSession,
 wait.wait_type AS WaitType,
 blocking_text.text AS BlockingSQL,
 blocked_text.text AS BlockedSQL
FROM sys.dm_exec_requests blocked
INNER JOIN sys.dm_exec_requests blocking ON blocking.session_id = blocked.blocking_session_id
CROSS APPLY sys.dm_exec_sql_text(blocking.sql_handle) blocking_text
CROSS APPLY sys.dm_exec_sql_text(blocked.sql_handle) blocked_text
INNER JOIN sys.dm_os_waiting_tasks wait ON wait.session_id = blocked.session_id;

-- 终止进程(谨慎操作) KILL [Session_ID];



### 9.2 预防措施检查清单
- [ ] 所有事务是否保持简短
- [ ] 是否按相同顺序访问数据库对象
- [ ] 是否使用了合适的索引
- [ ] 是否选择了适当的隔离级别
- [ ] 是否有应用程序级别的重试逻辑
- [ ] 是否定期分析死锁日志
- [ ] 是否设置了适当的锁超时

## 十、最佳实践总结

1. **设计阶段**:
   - 规范数据库设计
   - 建立合理的索引策略
   - 考虑使用行版本控制

2. **开发阶段**:
   - 统一数据访问顺序
   - 保持事务简短
   - 实现重试机制
   - 使用适当的隔离级别

3. **运维阶段**:
   - 定期监控死锁
   - 分析死锁模式
   - 优化性能瓶颈
   - 定期维护索引

4. **应急响应**:
   - 建立监控警报
   - 准备应急预案
   - 定期演练恢复流程

通过综合运用上述策略和技术,可以有效减少SQL Server中的死锁发生,并在死锁发生时快速定位和解决问题。
相关推荐