Merge tag 'amd-drm-next-5.17-2021-12-30' of ssh://gitlab.freedesktop.org/agd5f/linux into drm-next

amd-drm-next-5.17-2021-12-30:

amdgpu:
- Suspend/resume fixes
- Fence fix
- Misc code cleanups
- IP discovery fixes
- SRIOV fixes
- RAS fixes
- GMC 8 VRAM detection fix
- FRU fixes for Aldebaran
- Display fixes

amdkfd:
- SVM fixes
- IP discovery fixes

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211230141032.613596-1-alexander.deucher@amd.com
This commit is contained in:
Dave Airlie 2021-12-31 10:59:16 +10:00
commit cb6846fbb8
83 changed files with 1819 additions and 752 deletions

View file

@ -2317,6 +2317,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
/* need to do gmc hw init early so we can allocate gpu mem */
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
/* Try to reserve bad pages early */
if (amdgpu_sriov_vf(adev))
amdgpu_virt_exchange_data(adev);
r = amdgpu_device_vram_scratch_init(adev);
if (r) {
DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
@ -2348,7 +2352,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
}
if (amdgpu_sriov_vf(adev))
amdgpu_virt_init_data_exchange(adev);
amdgpu_virt_exchange_data(adev);
r = amdgpu_ib_pool_init(adev);
if (r) {
@ -2615,11 +2619,10 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
if (r)
DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
/* For XGMI + passthrough configuration on arcturus, enable light SBR */
if (adev->asic_type == CHIP_ARCTURUS &&
amdgpu_passthrough(adev) &&
adev->gmc.xgmi.num_physical_nodes > 1)
smu_set_light_sbr(&adev->smu, true);
/* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
if (amdgpu_passthrough(adev) && ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1)||
adev->asic_type == CHIP_ALDEBARAN ))
smu_handle_passthrough_sbr(&adev->smu, true);
if (adev->gmc.xgmi.num_physical_nodes > 1) {
mutex_lock(&mgpu_info.mutex);
@ -3182,6 +3185,12 @@ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
{
switch (asic_type) {
#ifdef CONFIG_DRM_AMDGPU_SI
case CHIP_HAINAN:
#endif
case CHIP_TOPAZ:
/* chips with no display hardware */
return false;
#if defined(CONFIG_DRM_AMD_DC)
case CHIP_TAHITI:
case CHIP_PITCAIRN:
@ -3573,6 +3582,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (r)
return r;
/* Need to get xgmi info early to decide the reset behavior*/
if (adev->gmc.xgmi.supported) {
r = adev->gfxhub.funcs->get_xgmi_info(adev);
if (r)
return r;
}
/* enable PCIE atomic ops */
if (amdgpu_sriov_vf(adev))
adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
@ -3885,11 +3901,14 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
amdgpu_irq_fini_hw(adev);
ttm_device_clear_dma_mappings(&adev->mman.bdev);
if (adev->mman.initialized)
ttm_device_clear_dma_mappings(&adev->mman.bdev);
amdgpu_gart_dummy_page_fini(adev);
amdgpu_device_unmap_mmio(adev);
if (drm_dev_is_unplugged(adev_to_drm(adev)))
amdgpu_device_unmap_mmio(adev);
}
void amdgpu_device_fini_sw(struct amdgpu_device *adev)
@ -4507,7 +4526,7 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
struct amdgpu_reset_context *reset_context)
{
int i, j, r = 0;
int i, r = 0;
struct amdgpu_job *job = NULL;
bool need_full_reset =
test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
@ -4529,15 +4548,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
/*clear job fence from fence drv to avoid force_completion
*leave NULL and vm flush fence in fence drv */
for (j = 0; j <= ring->fence_drv.num_fences_mask; j++) {
struct dma_fence *old, **ptr;
amdgpu_fence_driver_clear_job_fences(ring);
ptr = &ring->fence_drv.fences[j];
old = rcu_dereference_protected(*ptr, 1);
if (old && test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &old->flags)) {
RCU_INIT_POINTER(*ptr, NULL);
}
}
/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
amdgpu_fence_driver_force_completion(ring);
}