diff mbox series

[1/6] wifi: mt76: mt7996: add debugging for MCU command timeouts.

Message ID 20240307192951.3271156-1-greearb@candelatech.com
State New
Headers show
Series [1/6] wifi: mt76: mt7996: add debugging for MCU command timeouts. | expand

Commit Message

Ben Greear March 7, 2024, 7:29 p.m. UTC
From: Ben Greear <greearb@candelatech.com>

Print information about whether the message is the first timeout,
and also print info if we manage to recover after a timeout.

Signed-off-by: Ben Greear <greearb@candelatech.com>
---
 drivers/net/wireless/mediatek/mt76/mt76.h     |  2 ++
 .../net/wireless/mediatek/mt76/mt7996/mcu.c   | 31 +++++++++++++++++--
 2 files changed, 30 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index b20c34d5a0f7..c341915e4d62 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -829,6 +829,8 @@  struct mt76_dev {
 	struct device *dma_dev;
 
 	struct mt76_mcu mcu;
+	u32 first_failed_mcu_cmd; /* for debugging */
+	u32 last_successful_mcu_cmd; /* for debugging */
 
 	struct net_device napi_dev;
 	struct net_device tx_napi_dev;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c
index 699be57309c2..a858a0914bf0 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7996/mcu.c
@@ -200,14 +200,39 @@  mt7996_mcu_parse_response(struct mt76_dev *mdev, int cmd,
 	int ret = 0;
 
 	if (!skb) {
-		dev_err(mdev->dev, "Message %08x (seq %d) timeout\n",
-			cmd, seq);
+		const char *first = "Secondary";
+
+		if (!mdev->first_failed_mcu_cmd)
+			first = "Initial";
+
+		dev_err(mdev->dev,
+			"MCU: %s Failure: Message %08x (cid %lx ext_cid: %lx seq %d) timeout.  Last successful cmd: 0x%x\n",
+			first,
+			cmd, FIELD_GET(__MCU_CMD_FIELD_ID, cmd),
+			FIELD_GET(__MCU_CMD_FIELD_EXT_ID, cmd), seq,
+			mdev->last_successful_mcu_cmd);
+
+		if (!mdev->first_failed_mcu_cmd)
+			mdev->first_failed_mcu_cmd = cmd;
 		return -ETIMEDOUT;
 	}
 
+	mdev->last_successful_mcu_cmd = cmd;
+
+	if (mdev->first_failed_mcu_cmd) {
+		dev_err(mdev->dev, "MCU: First success after failure: Message %08x (cid %lx ext_cid: %lx seq %d)\n",
+			cmd, FIELD_GET(__MCU_CMD_FIELD_ID, cmd),
+			FIELD_GET(__MCU_CMD_FIELD_EXT_ID, cmd), seq);
+		mdev->first_failed_mcu_cmd = 0;
+	}
+
 	rxd = (struct mt7996_mcu_rxd *)skb->data;
-	if (seq != rxd->seq)
+	if (seq != rxd->seq) {
+		/* This can happen if the previous request didn't wait (which is normal).
+		 * Quietly return EAGAIN in that case, it is not something to warn about.
+		 */
 		return -EAGAIN;
+	}
 
 	if (cmd == MCU_CMD(PATCH_SEM_CONTROL)) {
 		skb_pull(skb, sizeof(*rxd) - 4);