Skip to content

Commit

Permalink
scsi: lpfc: Fix EEH support for NVMe I/O
Browse files Browse the repository at this point in the history
[ Upstream commit 25ac2c9 ]

Injecting errors on the PCI slot while the driver is handling NVMe I/O will
cause crashes and hangs.

There are several rather difficult scenarios occurring. The main issue is
that the adapter can report a PCI error before or simultaneously to the PCI
subsystem reporting the error. Both paths have different entry points and
currently there is no interlock between them. Thus multiple teardown paths
are competing and all heck breaks loose.

Complicating things is the NVMs path. To a large degree, I/O was able to be
shutdown for a full FC port on the SCSI stack. But on NVMe, there isn't a
similar call. At best, it works on a per-controller basis, but even at the
controller level, it's a controller "reset" call. All of which means I/O is
still flowing on different CPUs with reset paths expecting hw access
(mailbox commands) to execute properly.

The following modifications are made:

 - A new flag is set in PCI error entrypoints so the driver can track being
   called by that path.

 - An interlock is added in the SLI hw error path and the PCI error path
   such that only one of the paths proceeds with the teardown logic.

 - RPI cleanup is patched such that RPIs are marked unregistered w/o mbx
   cmds in cases of hw error.

 - If entering the SLI port re-init calls, a case where SLI error teardown
   was quick and beat the PCI calls now reporting error, check whether the
   SLI port is still live on the PCI bus.

 - In the PCI reset code to bring the adapter back, recheck the IRQ
   settings. Different checks for SLI3 vs SLI4.

 - In I/O completions, that may be called as part of the cleanup or
   underway just before the hw error, check the state of the adapter.  If
   in error, shortcut handling that would expect further adapter
   completions as the hw error won't be sending them.

 - In routines waiting on I/O completions, which may have been in progress
   prior to the hw error, detect the device is being torn down and abort
   from their waits and just give up. This points to a larger issue in the
   driver on ref-counting for data structures, as it doesn't have
   ref-counting on q and port structures. We'll do this fix for now as it
   would be a major rework to be done differently.

 - Fix the NVMe cleanup to simulate NVMe I/O completions if I/O is being
   failed back due to hw error.

 - In I/O buf allocation, done at the start of new I/Os, check hw state and
   fail if hw error.

Link: https://lore.kernel.org/r/20210910233159.115896-10-jsmart2021@gmail.com
Co-developed-by: Justin Tee <justin.tee@broadcom.com>
Signed-off-by: Justin Tee <justin.tee@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
  • Loading branch information
jsmart-gh authored and gregkh committed Aug 17, 2022
1 parent 0c29e14 commit eb36ec3
Show file tree
Hide file tree
Showing 7 changed files with 154 additions and 28 deletions.
1 change: 1 addition & 0 deletions drivers/scsi/lpfc/lpfc.h
Original file line number Diff line number Diff line change
Expand Up @@ -1028,6 +1028,7 @@ struct lpfc_hba {
* Firmware supports Forced Link Speed
* capability
*/
#define HBA_PCI_ERR 0x80000 /* The PCI slot is offline */
#define HBA_FLOGI_ISSUED 0x100000 /* FLOGI was issued */
#define HBA_CGN_RSVD1 0x200000 /* Reserved CGN flag */
#define HBA_CGN_DAY_WRAP 0x400000 /* HBA Congestion info day wraps */
Expand Down
1 change: 1 addition & 0 deletions drivers/scsi/lpfc/lpfc_hbadisc.c
Original file line number Diff line number Diff line change
Expand Up @@ -5349,6 +5349,7 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)

rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT);
if (rc == MBX_NOT_FINISHED) {
ndlp->nlp_flag &= ~NLP_UNREG_INP;
mempool_free(mbox, phba->mbox_mem_pool);
acc_plogi = 1;
}
Expand Down
31 changes: 29 additions & 2 deletions drivers/scsi/lpfc/lpfc_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -1606,6 +1606,11 @@ void
lpfc_sli4_offline_eratt(struct lpfc_hba *phba)
{
spin_lock_irq(&phba->hbalock);
if (phba->link_state == LPFC_HBA_ERROR &&
phba->hba_flag & HBA_PCI_ERR) {
spin_unlock_irq(&phba->hbalock);
return;
}
phba->link_state = LPFC_HBA_ERROR;
spin_unlock_irq(&phba->hbalock);

Expand Down Expand Up @@ -1945,7 +1950,6 @@ lpfc_handle_eratt_s4(struct lpfc_hba *phba)
if (pci_channel_offline(phba->pcidev)) {
lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
"3166 pci channel is offline\n");
lpfc_sli4_offline_eratt(phba);
return;
}

Expand Down Expand Up @@ -3643,6 +3647,7 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action)
struct lpfc_vport **vports;
struct Scsi_Host *shost;
int i;
int offline = 0;

if (vport->fc_flag & FC_OFFLINE_MODE)
return;
Expand All @@ -3651,6 +3656,8 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action)

lpfc_linkdown(phba);

offline = pci_channel_offline(phba->pcidev);

/* Issue an unreg_login to all nodes on all vports */
vports = lpfc_create_vport_work_array(phba);
if (vports != NULL) {
Expand All @@ -3673,7 +3680,14 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action)
ndlp->nlp_flag &= ~NLP_NPR_ADISC;
spin_unlock_irq(&ndlp->lock);

lpfc_unreg_rpi(vports[i], ndlp);
if (offline) {
spin_lock_irq(&ndlp->lock);
ndlp->nlp_flag &= ~(NLP_UNREG_INP |
NLP_RPI_REGISTERED);
spin_unlock_irq(&ndlp->lock);
} else {
lpfc_unreg_rpi(vports[i], ndlp);
}
/*
* Whenever an SLI4 port goes offline, free the
* RPI. Get a new RPI when the adapter port
Expand Down Expand Up @@ -14070,6 +14084,10 @@ lpfc_pci_resume_one_s3(struct device *dev_d)
return error;
}

/* Init cpu_map array */
lpfc_cpu_map_array_init(phba);
/* Init hba_eq_hdl array */
lpfc_hba_eq_hdl_array_init(phba);
/* Configure and enable interrupt */
intr_mode = lpfc_sli_enable_intr(phba, phba->intr_mode);
if (intr_mode == LPFC_INTR_ERROR) {
Expand Down Expand Up @@ -15023,14 +15041,17 @@ lpfc_io_error_detected_s4(struct pci_dev *pdev, pci_channel_state_t state)
lpfc_sli4_prep_dev_for_recover(phba);
return PCI_ERS_RESULT_CAN_RECOVER;
case pci_channel_io_frozen:
phba->hba_flag |= HBA_PCI_ERR;
/* Fatal error, prepare for slot reset */
lpfc_sli4_prep_dev_for_reset(phba);
return PCI_ERS_RESULT_NEED_RESET;
case pci_channel_io_perm_failure:
phba->hba_flag |= HBA_PCI_ERR;
/* Permanent failure, prepare for device down */
lpfc_sli4_prep_dev_for_perm_failure(phba);
return PCI_ERS_RESULT_DISCONNECT;
default:
phba->hba_flag |= HBA_PCI_ERR;
/* Unknown state, prepare and request slot reset */
lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
"2825 Unknown PCI error state: x%x\n", state);
Expand Down Expand Up @@ -15074,6 +15095,7 @@ lpfc_io_slot_reset_s4(struct pci_dev *pdev)

pci_restore_state(pdev);

phba->hba_flag &= ~HBA_PCI_ERR;
/*
* As the new kernel behavior of pci_restore_state() API call clears
* device saved_state flag, need to save the restored state again.
Expand All @@ -15098,6 +15120,7 @@ lpfc_io_slot_reset_s4(struct pci_dev *pdev)
return PCI_ERS_RESULT_DISCONNECT;
} else
phba->intr_mode = intr_mode;
lpfc_cpu_affinity_check(phba, phba->cfg_irq_chann);

/* Log the current active interrupt mode */
lpfc_log_intr_mode(phba, phba->intr_mode);
Expand Down Expand Up @@ -15299,6 +15322,10 @@ lpfc_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
pci_ers_result_t rc = PCI_ERS_RESULT_DISCONNECT;

if (phba->link_state == LPFC_HBA_ERROR &&
phba->hba_flag & HBA_IOQ_FLUSH)
return PCI_ERS_RESULT_NEED_RESET;

switch (phba->pci_dev_grp) {
case LPFC_PCI_DEV_LP:
rc = lpfc_io_error_detected_s3(pdev, state);
Expand Down
53 changes: 51 additions & 2 deletions drivers/scsi/lpfc/lpfc_nvme.c
Original file line number Diff line number Diff line change
Expand Up @@ -937,6 +937,7 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
int cpu;
#endif
int offline = 0;

/* Sanity check on return of outstanding command */
if (!lpfc_ncmd) {
Expand Down Expand Up @@ -1098,11 +1099,12 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
nCmd->transferred_length = 0;
nCmd->rcv_rsplen = 0;
nCmd->status = NVME_SC_INTERNAL;
offline = pci_channel_offline(vport->phba->pcidev);
}
}

/* pick up SLI4 exhange busy condition */
if (bf_get(lpfc_wcqe_c_xb, wcqe))
if (bf_get(lpfc_wcqe_c_xb, wcqe) && !offline)
lpfc_ncmd->flags |= LPFC_SBUF_XBUSY;
else
lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY;
Expand Down Expand Up @@ -2174,13 +2176,21 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport,
abts_nvme = 0;
for (i = 0; i < phba->cfg_hdw_queue; i++) {
qp = &phba->sli4_hba.hdwq[i];
if (!vport || !vport->localport ||
!qp || !qp->io_wq)
return;

pring = qp->io_wq->pring;
if (!pring)
continue;
pending += pring->txcmplq_cnt;
abts_scsi += qp->abts_scsi_io_bufs;
abts_nvme += qp->abts_nvme_io_bufs;
}
if (!vport || !vport->localport ||
vport->phba->hba_flag & HBA_PCI_ERR)
return;

lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
"6176 Lport x%px Localport x%px wait "
"timed out. Pending %d [%d:%d]. "
Expand Down Expand Up @@ -2220,6 +2230,8 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
return;

localport = vport->localport;
if (!localport)
return;
lport = (struct lpfc_nvme_lport *)localport->private;

lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
Expand Down Expand Up @@ -2536,7 +2548,8 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
* return values is ignored. The upcall is a courtesy to the
* transport.
*/
if (vport->load_flag & FC_UNLOADING)
if (vport->load_flag & FC_UNLOADING ||
unlikely(vport->phba->hba_flag & HBA_PCI_ERR))
(void)nvme_fc_set_remoteport_devloss(remoteport, 0);

ret = nvme_fc_unregister_remoteport(remoteport);
Expand Down Expand Up @@ -2564,6 +2577,42 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
vport->localport, ndlp->rport, ndlp->nlp_DID);
}

/**
* lpfc_sli4_nvme_pci_offline_aborted - Fast-path process of NVME xri abort
* @phba: pointer to lpfc hba data structure.
* @lpfc_ncmd: The nvme job structure for the request being aborted.
*
* This routine is invoked by the worker thread to process a SLI4 fast-path
* NVME aborted xri. Aborted NVME IO commands are completed to the transport
* here.
**/
void
lpfc_sli4_nvme_pci_offline_aborted(struct lpfc_hba *phba,
struct lpfc_io_buf *lpfc_ncmd)
{
struct nvmefc_fcp_req *nvme_cmd = NULL;

lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
"6533 %s nvme_cmd %p tag x%x abort complete and "
"xri released\n", __func__,
lpfc_ncmd->nvmeCmd,
lpfc_ncmd->cur_iocbq.iotag);

/* Aborted NVME commands are required to not complete
* before the abort exchange command fully completes.
* Once completed, it is available via the put list.
*/
if (lpfc_ncmd->nvmeCmd) {
nvme_cmd = lpfc_ncmd->nvmeCmd;
nvme_cmd->transferred_length = 0;
nvme_cmd->rcv_rsplen = 0;
nvme_cmd->status = NVME_SC_INTERNAL;
nvme_cmd->done(nvme_cmd);
lpfc_ncmd->nvmeCmd = NULL;
}
lpfc_release_nvme_buf(phba, lpfc_ncmd);
}

/**
* lpfc_sli4_nvme_xri_aborted - Fast-path process of NVME xri abort
* @phba: pointer to lpfc hba data structure.
Expand Down
63 changes: 41 additions & 22 deletions drivers/scsi/lpfc/lpfc_scsi.c
Original file line number Diff line number Diff line change
Expand Up @@ -493,8 +493,8 @@ void
lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba,
struct sli4_wcqe_xri_aborted *axri, int idx)
{
uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri);
uint16_t rxid = bf_get(lpfc_wcqe_xa_remote_xid, axri);
u16 xri = 0;
u16 rxid = 0;
struct lpfc_io_buf *psb, *next_psb;
struct lpfc_sli4_hdw_queue *qp;
unsigned long iflag = 0;
Expand All @@ -504,15 +504,22 @@ lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba,
int rrq_empty = 0;
struct lpfc_sli_ring *pring = phba->sli4_hba.els_wq->pring;
struct scsi_cmnd *cmd;
int offline = 0;

if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
return;

offline = pci_channel_offline(phba->pcidev);
if (!offline) {
xri = bf_get(lpfc_wcqe_xa_xri, axri);
rxid = bf_get(lpfc_wcqe_xa_remote_xid, axri);
}
qp = &phba->sli4_hba.hdwq[idx];
spin_lock_irqsave(&phba->hbalock, iflag);
spin_lock(&qp->abts_io_buf_list_lock);
list_for_each_entry_safe(psb, next_psb,
&qp->lpfc_abts_io_buf_list, list) {
if (offline)
xri = psb->cur_iocbq.sli4_xritag;
if (psb->cur_iocbq.sli4_xritag == xri) {
list_del_init(&psb->list);
psb->flags &= ~LPFC_SBUF_XBUSY;
Expand All @@ -521,8 +528,15 @@ lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba,
qp->abts_nvme_io_bufs--;
spin_unlock(&qp->abts_io_buf_list_lock);
spin_unlock_irqrestore(&phba->hbalock, iflag);
lpfc_sli4_nvme_xri_aborted(phba, axri, psb);
return;
if (!offline) {
lpfc_sli4_nvme_xri_aborted(phba, axri,
psb);
return;
}
lpfc_sli4_nvme_pci_offline_aborted(phba, psb);
spin_lock_irqsave(&phba->hbalock, iflag);
spin_lock(&qp->abts_io_buf_list_lock);
continue;
}
qp->abts_scsi_io_bufs--;
spin_unlock(&qp->abts_io_buf_list_lock);
Expand All @@ -534,13 +548,13 @@ lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba,

rrq_empty = list_empty(&phba->active_rrq_list);
spin_unlock_irqrestore(&phba->hbalock, iflag);
if (ndlp) {
if (ndlp && !offline) {
lpfc_set_rrq_active(phba, ndlp,
psb->cur_iocbq.sli4_lxritag, rxid, 1);
lpfc_sli4_abts_err_handler(phba, ndlp, axri);
}

if (phba->cfg_fcp_wait_abts_rsp) {
if (phba->cfg_fcp_wait_abts_rsp || offline) {
spin_lock_irqsave(&psb->buf_lock, iflag);
cmd = psb->pCmd;
psb->pCmd = NULL;
Expand All @@ -567,25 +581,30 @@ lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba,
lpfc_release_scsi_buf_s4(phba, psb);
if (rrq_empty)
lpfc_worker_wake_up(phba);
return;
if (!offline)
return;
spin_lock_irqsave(&phba->hbalock, iflag);
spin_lock(&qp->abts_io_buf_list_lock);
continue;
}
}
spin_unlock(&qp->abts_io_buf_list_lock);
for (i = 1; i <= phba->sli.last_iotag; i++) {
iocbq = phba->sli.iocbq_lookup[i];

if (!(iocbq->iocb_flag & LPFC_IO_FCP) ||
(iocbq->iocb_flag & LPFC_IO_LIBDFC))
continue;
if (iocbq->sli4_xritag != xri)
continue;
psb = container_of(iocbq, struct lpfc_io_buf, cur_iocbq);
psb->flags &= ~LPFC_SBUF_XBUSY;
spin_unlock_irqrestore(&phba->hbalock, iflag);
if (!list_empty(&pring->txq))
lpfc_worker_wake_up(phba);
return;
if (!offline) {
for (i = 1; i <= phba->sli.last_iotag; i++) {
iocbq = phba->sli.iocbq_lookup[i];

if (!(iocbq->iocb_flag & LPFC_IO_FCP) ||
(iocbq->iocb_flag & LPFC_IO_LIBDFC))
continue;
if (iocbq->sli4_xritag != xri)
continue;
psb = container_of(iocbq, struct lpfc_io_buf, cur_iocbq);
psb->flags &= ~LPFC_SBUF_XBUSY;
spin_unlock_irqrestore(&phba->hbalock, iflag);
if (!list_empty(&pring->txq))
lpfc_worker_wake_up(phba);
return;
}
}
spin_unlock_irqrestore(&phba->hbalock, iflag);
}
Expand Down

0 comments on commit eb36ec3

Please sign in to comment.