Misc driver patches for 5.15-rc1, second round

Here is a second round of misc driver patches for 5.15-rc1.
 
 In here is only updates for the Habanalabs driver.  This request is late
 because the previously-objected-to dma-buf patches are all removed and
 some fixes that you and others found are now included in here as well.
 
 All of these have been in linux-next for well over a week with no
 reports of problems, and they are all self-contained to only this one
 driver.  Full details are in the shortlog.
 
 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 -----BEGIN PGP SIGNATURE-----
 
 iG0EABECAC0WIQT0tgzFv3jCIUoxPcsxR9QN2y37KQUCYTtnnQ8cZ3JlZ0Brcm9h
 aC5jb20ACgkQMUfUDdst+yldxgCfeXoRaaCBSzpbejgf48CQRd/m/v0AmgMRVUxB
 kJ58eqBrDMdeExOkvkh8
 =6d/9
 -----END PGP SIGNATURE-----

Merge tag 'char-misc-5.15-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc

Pull habanalabs updates from Greg KH:
 "Here is another round of misc driver patches for 5.15-rc1.

  In here is only updates for the Habanalabs driver. This request is
  late because the previously-objected-to dma-buf patches are all
  removed and some fixes that you and others found are now included in
  here as well.

  All of these have been in linux-next for well over a week with no
  reports of problems, and they are all self-contained to only this one
  driver. Full details are in the shortlog"

* tag 'char-misc-5.15-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc: (61 commits)
  habanalabs/gaudi: hwmon default card name
  habanalabs: add support for f/w reset
  habanalabs/gaudi: block ICACHE_BASE_ADDERESS_HIGH in TPC
  habanalabs: cannot sleep while holding spinlock
  habanalabs: never copy_from_user inside spinlock
  habanalabs: remove unnecessary device status check
  habanalabs: disable IRQ in user interrupts spinlock
  habanalabs: add "in device creation" status
  habanalabs/gaudi: invalidate PMMU mem cache on init
  habanalabs/gaudi: size should be printed in decimal
  habanalabs/gaudi: define DC POWER for secured PMC
  habanalabs/gaudi: unmask out of bounds SLM access interrupt
  habanalabs: add userptr_lookup node in debugfs
  habanalabs/gaudi: fetch TPC/MME ECC errors from F/W
  habanalabs: modify multi-CS to wait on stream masters
  habanalabs/gaudi: add monitored SOBs to state dump
  habanalabs/gaudi: restore user registers when context opens
  habanalabs/gaudi: increase boot fit timeout
  habanalabs: update to latest firmware headers
  habanalabs/gaudi: minimize number of register reads
  ...
This commit is contained in:
Linus Torvalds 2021-09-10 11:31:47 -07:00
commit 5ffc06ebea
28 changed files with 3880 additions and 796 deletions

View file

@ -276,7 +276,17 @@ enum hl_device_status {
HL_DEVICE_STATUS_OPERATIONAL,
HL_DEVICE_STATUS_IN_RESET,
HL_DEVICE_STATUS_MALFUNCTION,
HL_DEVICE_STATUS_NEEDS_RESET
HL_DEVICE_STATUS_NEEDS_RESET,
HL_DEVICE_STATUS_IN_DEVICE_CREATION,
HL_DEVICE_STATUS_LAST = HL_DEVICE_STATUS_IN_DEVICE_CREATION
};
enum hl_server_type {
HL_SERVER_TYPE_UNKNOWN = 0,
HL_SERVER_GAUDI_HLS1 = 1,
HL_SERVER_GAUDI_HLS1H = 2,
HL_SERVER_GAUDI_TYPE1 = 3,
HL_SERVER_GAUDI_TYPE2 = 4
};
/* Opcode for management ioctl
@ -337,17 +347,49 @@ enum hl_device_status {
#define HL_INFO_VERSION_MAX_LEN 128
#define HL_INFO_CARD_NAME_MAX_LEN 16
/**
* struct hl_info_hw_ip_info - hardware information on various IPs in the ASIC
* @sram_base_address: The first SRAM physical base address that is free to be
* used by the user.
* @dram_base_address: The first DRAM virtual or physical base address that is
* free to be used by the user.
* @dram_size: The DRAM size that is available to the user.
* @sram_size: The SRAM size that is available to the user.
* @num_of_events: The number of events that can be received from the f/w. This
* is needed so the user can what is the size of the h/w events
* array he needs to pass to the kernel when he wants to fetch
* the event counters.
* @device_id: PCI device ID of the ASIC.
* @module_id: Module ID of the ASIC for mezzanine cards in servers
* (From OCP spec).
* @first_available_interrupt_id: The first available interrupt ID for the user
* to be used when it works with user interrupts.
* @server_type: Server type that the Gaudi ASIC is currently installed in.
* The value is according to enum hl_server_type
* @cpld_version: CPLD version on the board.
* @psoc_pci_pll_nr: PCI PLL NR value. Needed by the profiler in some ASICs.
* @psoc_pci_pll_nf: PCI PLL NF value. Needed by the profiler in some ASICs.
* @psoc_pci_pll_od: PCI PLL OD value. Needed by the profiler in some ASICs.
* @psoc_pci_pll_div_factor: PCI PLL DIV factor value. Needed by the profiler
* in some ASICs.
* @tpc_enabled_mask: Bit-mask that represents which TPCs are enabled. Relevant
* for Goya/Gaudi only.
* @dram_enabled: Whether the DRAM is enabled.
* @cpucp_version: The CPUCP f/w version.
* @card_name: The card name as passed by the f/w.
* @dram_page_size: The DRAM physical page size.
*/
struct hl_info_hw_ip_info {
__u64 sram_base_address;
__u64 dram_base_address;
__u64 dram_size;
__u32 sram_size;
__u32 num_of_events;
__u32 device_id; /* PCI Device ID */
__u32 module_id; /* For mezzanine cards in servers (From OCP spec.) */
__u32 device_id;
__u32 module_id;
__u32 reserved;
__u16 first_available_interrupt_id;
__u16 reserved2;
__u16 server_type;
__u32 cpld_version;
__u32 psoc_pci_pll_nr;
__u32 psoc_pci_pll_nf;
@ -358,7 +400,7 @@ struct hl_info_hw_ip_info {
__u8 pad[2];
__u8 cpucp_version[HL_INFO_VERSION_MAX_LEN];
__u8 card_name[HL_INFO_CARD_NAME_MAX_LEN];
__u64 reserved3;
__u64 reserved2;
__u64 dram_page_size;
};
@ -628,12 +670,21 @@ struct hl_cs_chunk {
__u64 cb_handle;
/* Relevant only when HL_CS_FLAGS_WAIT or
* HL_CS_FLAGS_COLLECTIVE_WAIT is set.
* HL_CS_FLAGS_COLLECTIVE_WAIT is set
* This holds address of array of u64 values that contain
* signal CS sequence numbers. The wait described by this job
* will listen on all those signals (wait event per signal)
* signal CS sequence numbers. The wait described by
* this job will listen on all those signals
* (wait event per signal)
*/
__u64 signal_seq_arr;
/*
* Relevant only when HL_CS_FLAGS_WAIT or
* HL_CS_FLAGS_COLLECTIVE_WAIT is set
* along with HL_CS_FLAGS_ENCAP_SIGNALS.
* This is the CS sequence which has the encapsulated signals.
*/
__u64 encaps_signal_seq;
};
/* Index of queue to put the CB on */
@ -651,6 +702,17 @@ struct hl_cs_chunk {
* Number of entries in signal_seq_arr
*/
__u32 num_signal_seq_arr;
/* Relevant only when HL_CS_FLAGS_WAIT or
* HL_CS_FLAGS_COLLECTIVE_WAIT is set along
* with HL_CS_FLAGS_ENCAP_SIGNALS
* This set the signals range that the user want to wait for
* out of the whole reserved signals range.
* e.g if the signals range is 20, and user don't want
* to wait for signal 8, so he set this offset to 7, then
* he call the API again with 9 and so on till 20.
*/
__u32 encaps_signal_offset;
};
/* HL_CS_CHUNK_FLAGS_* */
@ -678,6 +740,28 @@ struct hl_cs_chunk {
#define HL_CS_FLAGS_CUSTOM_TIMEOUT 0x200
#define HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT 0x400
/*
* The encapsulated signals CS is merged into the existing CS ioctls.
* In order to use this feature need to follow the below procedure:
* 1. Reserve signals, set the CS type to HL_CS_FLAGS_RESERVE_SIGNALS_ONLY
* the output of this API will be the SOB offset from CFG_BASE.
* this address will be used to patch CB cmds to do the signaling for this
* SOB by incrementing it's value.
* for reverting the reservation use HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY
* CS type, note that this might fail if out-of-sync happened to the SOB
* value, in case other signaling request to the same SOB occurred between
* reserve-unreserve calls.
* 2. Use the staged CS to do the encapsulated signaling jobs.
* use HL_CS_FLAGS_STAGED_SUBMISSION and HL_CS_FLAGS_STAGED_SUBMISSION_FIRST
* along with HL_CS_FLAGS_ENCAP_SIGNALS flag, and set encaps_signal_offset
* field. This offset allows app to wait on part of the reserved signals.
* 3. Use WAIT/COLLECTIVE WAIT CS along with HL_CS_FLAGS_ENCAP_SIGNALS flag
* to wait for the encapsulated signals.
*/
#define HL_CS_FLAGS_ENCAP_SIGNALS 0x800
#define HL_CS_FLAGS_RESERVE_SIGNALS_ONLY 0x1000
#define HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY 0x2000
#define HL_CS_STATUS_SUCCESS 0
#define HL_MAX_JOBS_PER_CS 512
@ -690,10 +774,35 @@ struct hl_cs_in {
/* holds address of array of hl_cs_chunk for execution phase */
__u64 chunks_execute;
/* Sequence number of a staged submission CS
* valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set
*/
__u64 seq;
union {
/*
* Sequence number of a staged submission CS
* valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set and
* HL_CS_FLAGS_STAGED_SUBMISSION_FIRST is unset.
*/
__u64 seq;
/*
* Encapsulated signals handle id
* Valid for two flows:
* 1. CS with encapsulated signals:
* when HL_CS_FLAGS_STAGED_SUBMISSION and
* HL_CS_FLAGS_STAGED_SUBMISSION_FIRST
* and HL_CS_FLAGS_ENCAP_SIGNALS are set.
* 2. unreserve signals:
* valid when HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY is set.
*/
__u32 encaps_sig_handle_id;
/* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */
struct {
/* Encapsulated signals number */
__u32 encaps_signals_count;
/* Encapsulated signals queue index (stream) */
__u32 encaps_signals_q_idx;
};
};
/* Number of chunks in restore phase array. Maximum number is
* HL_MAX_JOBS_PER_CS
@ -718,14 +827,31 @@ struct hl_cs_in {
};
struct hl_cs_out {
/*
* seq holds the sequence number of the CS to pass to wait ioctl. All
* values are valid except for 0 and ULLONG_MAX
*/
__u64 seq;
/* HL_CS_STATUS_* */
union {
/*
* seq holds the sequence number of the CS to pass to wait
* ioctl. All values are valid except for 0 and ULLONG_MAX
*/
__u64 seq;
/* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */
struct {
/* This is the resereved signal handle id */
__u32 handle_id;
/* This is the signals count */
__u32 count;
};
};
/* HL_CS_STATUS */
__u32 status;
__u32 pad;
/*
* SOB base address offset
* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set
*/
__u32 sob_base_addr_offset;
};
union hl_cs_args {
@ -735,11 +861,18 @@ union hl_cs_args {
#define HL_WAIT_CS_FLAGS_INTERRUPT 0x2
#define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000
#define HL_WAIT_CS_FLAGS_MULTI_CS 0x4
#define HL_WAIT_MULTI_CS_LIST_MAX_LEN 32
struct hl_wait_cs_in {
union {
struct {
/* Command submission sequence number */
/*
* In case of wait_cs holds the CS sequence number.
* In case of wait for multi CS hold a user pointer to
* an array of CS sequence numbers
*/
__u64 seq;
/* Absolute timeout to wait for command submission
* in microseconds
@ -767,12 +900,17 @@ struct hl_wait_cs_in {
/* Context ID - Currently not in use */
__u32 ctx_id;
/* HL_WAIT_CS_FLAGS_*
* If HL_WAIT_CS_FLAGS_INTERRUPT is set, this field should include
* interrupt id according to HL_WAIT_CS_FLAGS_INTERRUPT_MASK, in order
* not to specify an interrupt id ,set mask to all 1s.
*/
__u32 flags;
/* Multi CS API info- valid entries in multi-CS array */
__u8 seq_arr_len;
__u8 pad[7];
};
#define HL_WAIT_CS_STATUS_COMPLETED 0
@ -789,8 +927,15 @@ struct hl_wait_cs_out {
__u32 status;
/* HL_WAIT_CS_STATUS_FLAG* */
__u32 flags;
/* valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set */
/*
* valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set
* for wait_cs: timestamp of CS completion
* for wait_multi_cs: timestamp of FIRST CS completion
*/
__s64 timestamp_nsec;
/* multi CS completion bitmap */
__u32 cs_completion_map;
__u32 pad;
};
union hl_wait_cs_args {
@ -813,6 +958,7 @@ union hl_wait_cs_args {
#define HL_MEM_CONTIGUOUS 0x1
#define HL_MEM_SHARED 0x2
#define HL_MEM_USERPTR 0x4
#define HL_MEM_FORCE_HINT 0x8
struct hl_mem_in {
union {