From f621eeead8d6eca12289dc8f52727cd23d5c5b21 Mon Sep 17 00:00:00 2001 From: Omer Shpigelman Date: Mon, 16 Aug 2021 13:27:12 +0300 Subject: [PATCH] habanalabs: add "in device creation" status [ Upstream commit 71731090ab17a208a58020e4b342fdfee280458a ] On init, the disabled state is cleared right before hw_init and that causes the device to report on "Operational" state before the device initialization is finished. Although the char device is not yet exposed to the user at this stage, the sysfs entries are exposed. This can cause errors in monitoring applications that use the sysfs entries. In order to avoid this, a new state "in device creation" is introduced to ne reported when the device is not disabled but is still in init flow. Signed-off-by: Omer Shpigelman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay Signed-off-by: Sasha Levin --- drivers/misc/habanalabs/common/device.c | 3 +++ drivers/misc/habanalabs/common/habanalabs.h | 2 +- .../misc/habanalabs/common/habanalabs_drv.c | 8 ++++++-- drivers/misc/habanalabs/common/sysfs.c | 20 +++++++------------ include/uapi/misc/habanalabs.h | 4 +++- 5 files changed, 20 insertions(+), 17 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index ff4cbde289c0b..4e9b677460bad 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -23,6 +23,8 @@ enum hl_device_status hl_device_status(struct hl_device *hdev) status = HL_DEVICE_STATUS_NEEDS_RESET; else if (hdev->disabled) status = HL_DEVICE_STATUS_MALFUNCTION; + else if (!hdev->init_done) + status = HL_DEVICE_STATUS_IN_DEVICE_CREATION; else status = HL_DEVICE_STATUS_OPERATIONAL; @@ -44,6 +46,7 @@ bool hl_device_operational(struct hl_device *hdev, case HL_DEVICE_STATUS_NEEDS_RESET: return false; case HL_DEVICE_STATUS_OPERATIONAL: + case HL_DEVICE_STATUS_IN_DEVICE_CREATION: default: return true; } diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 6b3cdd7e068a3..61db72ecec0e0 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1798,7 +1798,7 @@ struct hl_dbg_device_entry { #define HL_STR_MAX 32 -#define HL_DEV_STS_MAX (HL_DEVICE_STATUS_NEEDS_RESET + 1) +#define HL_DEV_STS_MAX (HL_DEVICE_STATUS_LAST + 1) /* Theoretical limit only. A single host can only contain up to 4 or 8 PCIe * x16 cards. In extreme cases, there are hosts that can accommodate 16 cards. diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index 4194cda2d04c3..536451a9a16c9 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -318,12 +318,16 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev, hdev->asic_prop.fw_security_enabled = false; /* Assign status description string */ - strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], - "disabled", HL_STR_MAX); + strncpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], + "operational", HL_STR_MAX); strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], "in reset", HL_STR_MAX); + strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], + "disabled", HL_STR_MAX); strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX); + strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION], + "in device creation", HL_STR_MAX); hdev->major = hl_major; hdev->reset_on_lockup = reset_on_lockup; diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c index db72df282ef8d..34f9f2779962a 100644 --- a/drivers/misc/habanalabs/common/sysfs.c +++ b/drivers/misc/habanalabs/common/sysfs.c @@ -9,8 +9,7 @@ #include -long hl_get_frequency(struct hl_device *hdev, u32 pll_index, - bool curr) +long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) { struct cpucp_packet pkt; u32 used_pll_idx; @@ -44,8 +43,7 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, return (long) result; } -void hl_set_frequency(struct hl_device *hdev, u32 pll_index, - u64 freq) +void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) { struct cpucp_packet pkt; u32 used_pll_idx; @@ -285,16 +283,12 @@ static ssize_t status_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hl_device *hdev = dev_get_drvdata(dev); - char *str; + char str[HL_STR_MAX]; - if (atomic_read(&hdev->in_reset)) - str = "In reset"; - else if (hdev->disabled) - str = "Malfunction"; - else if (hdev->needs_reset) - str = "Needs Reset"; - else - str = "Operational"; + strscpy(str, hdev->status[hl_device_status(hdev)], HL_STR_MAX); + + /* use uppercase for backward compatibility */ + str[0] = 'A' + (str[0] - 'a'); return sprintf(buf, "%s\n", str); } diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index a47a731e45277..b4b681b81df81 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -276,7 +276,9 @@ enum hl_device_status { HL_DEVICE_STATUS_OPERATIONAL, HL_DEVICE_STATUS_IN_RESET, HL_DEVICE_STATUS_MALFUNCTION, - HL_DEVICE_STATUS_NEEDS_RESET + HL_DEVICE_STATUS_NEEDS_RESET, + HL_DEVICE_STATUS_IN_DEVICE_CREATION, + HL_DEVICE_STATUS_LAST = HL_DEVICE_STATUS_IN_DEVICE_CREATION }; /* Opcode for management ioctl