From ae3c1c2368ba07f621c3e60984f6d61f2800b2e1 Mon Sep 17 00:00:00 2001 From: sbwml Date: Sun, 7 Jul 2024 21:39:51 +0800 Subject: [PATCH] rockchip: backport panthor gpu driver * rk3588 Signed-off-by: sbwml --- armv8/config-6.6 | 1 - image/nanopi-r4s.bootscript | 2 +- image/nanopi-r5s.bootscript | 2 +- modules.mk | 24 +- .../003-rk3568-general-patch-set.patch | 423 -- .../005-friendlyelec-nanopi-series.patch | 196 - ...2-rk356x-add-dwc3-xhci-usb-trb-quirk.patch | 14 +- ...dd-Samsung-HDMI-eDP-Combo-PHY-driver.patch | 1084 ++++ ...chip-vop-Add-rv1126-vop_lite-support.patch | 88 + ...chip-dsi-Add-rv1126-MIPI-DSI-support.patch | 60 + ...e-cleanup-helper-directly-as-destroy.patch | 71 + ...2-Demote-message-in-mod_supported-to.patch | 35 + ...ckchip-remove-redundant-of_match_ptr.patch | 53 + ...e-devm_platform_get_and_ioremap_reso.patch | 29 + ...ockchip-remove-unused-struct-in-vop2.patch | 54 + ...kchip-remove-NR_LAYERS-macro-on-vop2.patch | 36 + ...kchip-vop-fix-format-bpp-calculation.patch | 57 + ...ove-the-unsupported-format-of-cluste.patch | 89 + ...op2-Add-more-supported-10bit-formats.patch | 162 + ...ame-window-formats-to-show-window-ty.patch | 116 + ...fourcc-Add-NV20-and-NV30-YUV-formats.patch | 57 + ...p-vop-Add-NV15-NV20-and-NV30-support.patch | 231 + ...kchip-vop2-Add-NV20-and-NV30-support.patch | 67 + ...k3066_hdmi-Remove-useless-mode_fixup.patch | 40 + ..._hdmi-Switch-encoder-hooks-to-atomic.patch | 88 + ...rk3066_hdmi-include-drm-drm_atomic.h.patch | 43 + ...tput-interface-related-definition-to.patch | 189 + ...ckchip-vop2-Use-regcache_sync-to-fix.patch | 60 + ...p2-set-half_block_en-bit-in-all-mode.patch | 83 + ...-clear-afbc-en-and-transform-bit-for.patch | 36 + ...p2-Add-write-mask-for-VP-config-done.patch | 50 + ...ckchip-vop2-Set-YUV-RGB-overlay-mode.patch | 95 + ...p-vop2-set-bg-dly-and-prescan-dly-at.patch | 70 + ...-rockchip-vop2-rename-grf-to-sys_grf.patch | 50 + ...ip-vop2-Add-more-endpoint-definition.patch | 28 + ...rockchip-vop2-Add-support-for-rk3588.patch | 997 ++++ ...2-rename-VOP_FEATURE_OUTPUT_10BIT-to.patch | 80 + ...id-use-regmap_reinit_cache-at-runtim.patch | 59 + ...clean-up-some-inconsistent-indenting.patch | 31 + ...ckchip-vop2-Drop-superfluous-include.patch | 25 + ...p2-Drop-unused-if_dclk_rate-variable.patch | 47 + ...ockchip-vop2-add-a-missing-unlock-in.patch | 31 + ...e-struct-drm_gpuva_manager-to-struct.patch | 5137 +++++++++++++++++ ...7-drm-gpuvm-allow-building-as-module.patch | 78 + ...rm-nouveau-uvmm-rename-umgr-to-base-.patch | 208 + ...ce-the-drm_gpuvm-code-GPL-2.0-OR-MIT.patch | 45 + ...vm-convert-WARN-to-drm_WARN-variants.patch | 165 + ...m-gpuvm-export-drm_gpuvm_range_valid.patch | 61 + ...au-make-use-of-drm_gpuvm_range_valid.patch | 66 + ...common-dma-resv-per-struct-drm_gpuvm.patch | 205 + ...e-use-of-the-GPUVM-s-shared-dma-resv.patch | 140 + ...uvm-add-drm_gpuvm_flags-to-drm_gpuvm.patch | 98 + ...arately-allocate-struct-nouveau_uvmm.patch | 219 + ...reference-count-drm_gpuvm-structures.patch | 221 + ...-abstraction-for-a-VM-BO-combination.patch | 1036 ++++ ...ck-validate-external-evicted-objects.patch | 1052 ++++ ...veau-use-GPUVM-common-infrastructure.patch | 448 ++ ...-get-range-of-unmap-from-a-remap-op..patch | 60 + ...vm-Fix-deprecated-license-identifier.patch | 41 + ...gpuvm-fall-back-to-drm_exec_lock_obj.patch | 142 + ...m_gpuvm_bo_put-report-when-the-vm_bo.patch | 59 + ...-GPU-scheduler-to-variable-number-of.patch | 405 ++ ...-sched-Add-drm_sched_wqueue_-helpers.patch | 241 + ...-scheduler-to-use-a-work-queue-rathe.patch | 507 ++ ...ed-Split-free_job-into-own-work-item.patch | 275 + ...dd-a-helper-to-queue-TDR-immediately.patch | 70 + ...x-from-drm_sched_wakeup_if_can_queue.patch | 70 + ...rm-sched-Qualify-drm_sched_wakeup-by.patch | 69 + ...d-implement-dynamic-job-flow-control.patch | 612 ++ ...custom-allocators-to-pgtable-drivers.patch | 129 + ...c-drm-gpuvm-Prefer-u32-over-uint32_t.patch | 68 + .../034-30-v6.10-drm-panthor-Add-uAPI.patch | 1024 ++++ ...panthor-Add-GPU-register-definitions.patch | 280 + ...panthor-Add-the-device-logical-block.patch | 1013 ++++ ...rm-panthor-Add-the-GPU-logical-block.patch | 593 ++ ...10-drm-panthor-Add-GEM-logical-block.patch | 426 ++ ...anthor-Add-the-devfreq-logical-block.patch | 356 ++ ...panthor-Add-the-MMU-VM-logical-block.patch | 2975 ++++++++++ ...drm-panthor-Add-the-FW-logical-block.patch | 1929 +++++++ ...m-panthor-Add-the-heap-logical-block.patch | 696 +++ ...thor-Add-the-scheduler-logical-block.patch | 3647 ++++++++++++ ...anthor-Add-the-driver-frontend-block.patch | 1534 +++++ ...drm-panthor-Allow-driver-compilation.patch | 104 + ...anthor-Fix-panthor_devfreq-kerneldoc.patch | 27 + ...xplicitly-include-mm-h-for-the-virt-.patch | 31 + ...drm-panthor-Fix-the-CONFIG_PM_n-case.patch | 75 + ...10-drm-panthor-Don-t-use-virt_to_pfn.patch | 74 + ...spelling-mistake-readyness-readiness.patch | 26 + ...IO-page-mmap-for-32-bit-userspace-on.patch | 142 + ...panthor-Fix-ordering-in-_irq_suspend.patch | 55 + ...-Drop-the-dev_enter-exit-sections-in.patch | 56 + ...ix-clang-Wunused-but-set-variable-in.patch | 46 + ...thor-Cleanup-unused-variable-cookie-.patch | 40 + ...-panthor-Fix-some-kerneldoc-warnings.patch | 58 + ...thor-Fix-a-couple-ENOMEM-error-codes.patch | 42 + ...r-Fix-error-code-in-panthor_gpu_init.patch | 33 + ...ff-by-one-in-panthor_fw_get_cs_iface.patch | 28 + ...0-drm-panthor-Don-t-return-NULL-from.patch | 58 + ...-NULL-vs-IS_ERR-bug-in-panthor_probe.patch | 28 + ...-some-types-in-panthor_sched_suspend.patch | 34 + ...gic-v3-add-hackaround-for-rk3568-its.patch | 32 +- 101 files changed, 31785 insertions(+), 657 deletions(-) delete mode 100644 patches-6.6/003-rk3568-general-patch-set.patch delete mode 100644 patches-6.6/005-friendlyelec-nanopi-series.patch create mode 100644 patches-6.6/032-01-v6.9-phy-rockchip-Add-Samsung-HDMI-eDP-Combo-PHY-driver.patch create mode 100644 patches-6.6/033-01-v6.7-drm-rockchip-vop-Add-rv1126-vop_lite-support.patch create mode 100644 patches-6.6/033-02-v6.7-drm-rockchip-dsi-Add-rv1126-MIPI-DSI-support.patch create mode 100644 patches-6.6/033-03-v6.7-drm-rockchip-vop-Use-cleanup-helper-directly-as-destroy.patch create mode 100644 patches-6.6/033-04-v6.7-drm-rockchip-vop2-Demote-message-in-mod_supported-to.patch create mode 100644 patches-6.6/033-05-v6.7-drm-rockchip-remove-redundant-of_match_ptr.patch create mode 100644 patches-6.6/033-06-v6.7-drm-rockchip-dsi-Use-devm_platform_get_and_ioremap_reso.patch create mode 100644 patches-6.6/033-07-v6.7-drm-rockchip-remove-unused-struct-in-vop2.patch create mode 100644 patches-6.6/033-08-v6.7-drm-rockchip-remove-NR_LAYERS-macro-on-vop2.patch create mode 100644 patches-6.6/033-09-v6.7-drm-rockchip-vop-fix-format-bpp-calculation.patch create mode 100644 patches-6.6/033-10-v6.7-drm-rockchip-vop2-remove-the-unsupported-format-of-cluste.patch create mode 100644 patches-6.6/033-11-v6.7-drm-rockchip-vop2-Add-more-supported-10bit-formats.patch create mode 100644 patches-6.6/033-12-v6.7-drm-rockchip-vop2-rename-window-formats-to-show-window-ty.patch create mode 100644 patches-6.6/033-13-v6.7-drm-fourcc-Add-NV20-and-NV30-YUV-formats.patch create mode 100644 patches-6.6/033-14-v6.7-drm-rockchip-vop-Add-NV15-NV20-and-NV30-support.patch create mode 100644 patches-6.6/033-15-v6.8-drm-rockchip-vop2-Add-NV20-and-NV30-support.patch create mode 100644 patches-6.6/033-16-v6.8-drm-rockchip-rk3066_hdmi-Remove-useless-mode_fixup.patch create mode 100644 patches-6.6/033-17-v6.8-drm-rockchip-rk3066_hdmi-Switch-encoder-hooks-to-atomic.patch create mode 100644 patches-6.6/033-18-v6.8-drm-rockchip-rk3066_hdmi-include-drm-drm_atomic.h.patch create mode 100644 patches-6.6/033-19-v6.8-drm-rockchip-move-output-interface-related-definition-to.patch create mode 100644 patches-6.6/033-20-v6.8-Revert-drm-rockchip-vop2-Use-regcache_sync-to-fix.patch create mode 100644 patches-6.6/033-21-v6.8-drm-rockchip-vop2-set-half_block_en-bit-in-all-mode.patch create mode 100644 patches-6.6/033-22-v6.8-drm-rockchip-vop2-clear-afbc-en-and-transform-bit-for.patch create mode 100644 patches-6.6/033-23-v6.8-drm-rockchip-vop2-Add-write-mask-for-VP-config-done.patch create mode 100644 patches-6.6/033-24-v6.8-drm-rockchip-vop2-Set-YUV-RGB-overlay-mode.patch create mode 100644 patches-6.6/033-25-v6.8-drm-rockchip-vop2-set-bg-dly-and-prescan-dly-at.patch create mode 100644 patches-6.6/033-26-v6.8-drm-rockchip-vop2-rename-grf-to-sys_grf.patch create mode 100644 patches-6.6/033-27-v6.8-dt-bindings-rockchip-vop2-Add-more-endpoint-definition.patch create mode 100644 patches-6.6/033-28-v6.8-drm-rockchip-vop2-Add-support-for-rk3588.patch create mode 100644 patches-6.6/033-29-v6.8-drm-rockchip-vop2-rename-VOP_FEATURE_OUTPUT_10BIT-to.patch create mode 100644 patches-6.6/033-30-v6.8-drm-rockchip-vop2-Avoid-use-regmap_reinit_cache-at-runtim.patch create mode 100644 patches-6.6/033-31-v6.8-drm-rockchip-vop2-clean-up-some-inconsistent-indenting.patch create mode 100644 patches-6.6/033-32-v6.8-drm-rockchip-vop2-Drop-superfluous-include.patch create mode 100644 patches-6.6/033-33-v6.8-drm-rockchip-vop2-Drop-unused-if_dclk_rate-variable.patch create mode 100644 patches-6.6/033-34-v6.8-drm-rockchip-vop2-add-a-missing-unlock-in.patch create mode 100644 patches-6.6/034-01-v6.7-drm-gpuvm-rename-struct-drm_gpuva_manager-to-struct.patch create mode 100644 patches-6.6/034-02-v6.7-drm-gpuvm-allow-building-as-module.patch create mode 100644 patches-6.6/034-03-v6.7-drm-nouveau-uvmm-rename-umgr-to-base-.patch create mode 100644 patches-6.6/034-04-v6.7-drm-gpuvm-Dual-licence-the-drm_gpuvm-code-GPL-2.0-OR-MIT.patch create mode 100644 patches-6.6/034-05-v6.8-drm-gpuvm-convert-WARN-to-drm_WARN-variants.patch create mode 100644 patches-6.6/034-06-v6.8-drm-gpuvm-export-drm_gpuvm_range_valid.patch create mode 100644 patches-6.6/034-07-v6.8-drm-nouveau-make-use-of-drm_gpuvm_range_valid.patch create mode 100644 patches-6.6/034-08-v6.8-drm-gpuvm-add-common-dma-resv-per-struct-drm_gpuvm.patch create mode 100644 patches-6.6/034-09-v6.8-drm-nouveau-make-use-of-the-GPUVM-s-shared-dma-resv.patch create mode 100644 patches-6.6/034-10-v6.8-drm-gpuvm-add-drm_gpuvm_flags-to-drm_gpuvm.patch create mode 100644 patches-6.6/034-11-v6.8-drm-nouveau-separately-allocate-struct-nouveau_uvmm.patch create mode 100644 patches-6.6/034-12-v6.8-drm-gpuvm-reference-count-drm_gpuvm-structures.patch create mode 100644 patches-6.6/034-13-v6.8-drm-gpuvm-add-an-abstraction-for-a-VM-BO-combination.patch create mode 100644 patches-6.6/034-14-v6.8-drm-gpuvm-track-lock-validate-external-evicted-objects.patch create mode 100644 patches-6.6/034-15-v6.8-drm-nouveau-use-GPUVM-common-infrastructure.patch create mode 100644 patches-6.6/034-16-v6.8-drm-gpuvm-Helper-to-get-range-of-unmap-from-a-remap-op..patch create mode 100644 patches-6.6/034-17-v6.8-drm-gpuvm-Fix-deprecated-license-identifier.patch create mode 100644 patches-6.6/034-18-v6.8-drm-gpuvm-fall-back-to-drm_exec_lock_obj.patch create mode 100644 patches-6.6/034-19-v6.8-drm-gpuvm-Let-drm_gpuvm_bo_put-report-when-the-vm_bo.patch create mode 100644 patches-6.6/034-20-v6.7-drm-sched-Convert-the-GPU-scheduler-to-variable-number-of.patch create mode 100644 patches-6.6/034-21-v6.8-drm-sched-Add-drm_sched_wqueue_-helpers.patch create mode 100644 patches-6.6/034-22-v6.8-drm-sched-Convert-drm-scheduler-to-use-a-work-queue-rathe.patch create mode 100644 patches-6.6/034-23-v6.8-drm-sched-Split-free_job-into-own-work-item.patch create mode 100644 patches-6.6/034-24-v6.8-drm-sched-Add-a-helper-to-queue-TDR-immediately.patch create mode 100644 patches-6.6/034-25-v6.8-drm-sched-Drop-suffix-from-drm_sched_wakeup_if_can_queue.patch create mode 100644 patches-6.6/034-26-v6.8-drm-sched-Qualify-drm_sched_wakeup-by.patch create mode 100644 patches-6.6/034-27-v6.8-drm-sched-implement-dynamic-job-flow-control.patch create mode 100644 patches-6.6/034-28-v6.8-iommu-Allow-passing-custom-allocators-to-pgtable-drivers.patch create mode 100644 patches-6.6/034-29-v6.9-drm-exec-drm-gpuvm-Prefer-u32-over-uint32_t.patch create mode 100644 patches-6.6/034-30-v6.10-drm-panthor-Add-uAPI.patch create mode 100644 patches-6.6/034-31-v6.10-drm-panthor-Add-GPU-register-definitions.patch create mode 100644 patches-6.6/034-32-v6.10-drm-panthor-Add-the-device-logical-block.patch create mode 100644 patches-6.6/034-33-v6.10-drm-panthor-Add-the-GPU-logical-block.patch create mode 100644 patches-6.6/034-34-v6.10-drm-panthor-Add-GEM-logical-block.patch create mode 100644 patches-6.6/034-35-v6.10-drm-panthor-Add-the-devfreq-logical-block.patch create mode 100644 patches-6.6/034-36-v6.10-drm-panthor-Add-the-MMU-VM-logical-block.patch create mode 100644 patches-6.6/034-37-v6.10-drm-panthor-Add-the-FW-logical-block.patch create mode 100644 patches-6.6/034-38-v6.10-drm-panthor-Add-the-heap-logical-block.patch create mode 100644 patches-6.6/034-39-v6.10-drm-panthor-Add-the-scheduler-logical-block.patch create mode 100644 patches-6.6/034-40-v6.10-drm-panthor-Add-the-driver-frontend-block.patch create mode 100644 patches-6.6/034-41-v6.10-drm-panthor-Allow-driver-compilation.patch create mode 100644 patches-6.6/034-42-v6.10-drm-panthor-Fix-panthor_devfreq-kerneldoc.patch create mode 100644 patches-6.6/034-43-v6.10-drm-panthor-Explicitly-include-mm-h-for-the-virt-.patch create mode 100644 patches-6.6/034-44-v6.10-drm-panthor-Fix-the-CONFIG_PM_n-case.patch create mode 100644 patches-6.6/034-45-v6.10-drm-panthor-Don-t-use-virt_to_pfn.patch create mode 100644 patches-6.6/034-46-v6.10-drm-panthor-Fix-spelling-mistake-readyness-readiness.patch create mode 100644 patches-6.6/034-47-v6.10-drm-panthor-Fix-IO-page-mmap-for-32-bit-userspace-on.patch create mode 100644 patches-6.6/034-48-v6.10-drm-panthor-Fix-ordering-in-_irq_suspend.patch create mode 100644 patches-6.6/034-49-v6.10-drm-panthor-Drop-the-dev_enter-exit-sections-in.patch create mode 100644 patches-6.6/034-50-v6.10-drm-panthor-Fix-clang-Wunused-but-set-variable-in.patch create mode 100644 patches-6.6/034-51-v6.10-drm-panthor-Cleanup-unused-variable-cookie-.patch create mode 100644 patches-6.6/034-52-v6.10-drm-panthor-Fix-some-kerneldoc-warnings.patch create mode 100644 patches-6.6/034-53-v6.10-drm-panthor-Fix-a-couple-ENOMEM-error-codes.patch create mode 100644 patches-6.6/034-54-v6.10-drm-panthor-Fix-error-code-in-panthor_gpu_init.patch create mode 100644 patches-6.6/034-55-v6.10-drm-panthor-Fix-off-by-one-in-panthor_fw_get_cs_iface.patch create mode 100644 patches-6.6/034-56-v6.10-drm-panthor-Don-t-return-NULL-from.patch create mode 100644 patches-6.6/034-57-v6.10-drm-panthor-Fix-NULL-vs-IS_ERR-bug-in-panthor_probe.patch create mode 100644 patches-6.6/034-58-v6.10-drm-panthor-clean-up-some-types-in-panthor_sched_suspend.patch diff --git a/armv8/config-6.6 b/armv8/config-6.6 index 7fd040c..ec72bb0 100644 --- a/armv8/config-6.6 +++ b/armv8/config-6.6 @@ -765,7 +765,6 @@ CONFIG_USB_ULPI_BUS=y CONFIG_USB_ULPI_VIEWPORT=y CONFIG_USB_XHCI_HCD=y CONFIG_USB_XHCI_PLATFORM=y -CONFIG_VENDOR_FRIENDLYELEC=y # CONFIG_VIRTIO_MENU is not set CONFIG_VIRTUALIZATION=y CONFIG_VMAP_STACK=y diff --git a/image/nanopi-r4s.bootscript b/image/nanopi-r4s.bootscript index 2909aee..fc67b29 100644 --- a/image/nanopi-r4s.bootscript +++ b/image/nanopi-r4s.bootscript @@ -1,6 +1,6 @@ part uuid mmc ${devnum}:2 uuid -setenv bootargs "console=ttyS2,1500000 earlycon=uart8250,mmio32,0xff1a0000 root=PARTUUID=${uuid} rw rootwait mitigations=off" +setenv bootargs "coherent_pool=2M console=ttyS2,1500000 earlycon=uart8250,mmio32,0xff1a0000 root=PARTUUID=${uuid} rw rootwait mitigations=off" load mmc ${devnum}:1 ${kernel_addr_r} kernel.img diff --git a/image/nanopi-r5s.bootscript b/image/nanopi-r5s.bootscript index 0cea211..27b55c5 100644 --- a/image/nanopi-r5s.bootscript +++ b/image/nanopi-r5s.bootscript @@ -1,6 +1,6 @@ part uuid mmc ${devnum}:2 uuid -setenv bootargs "console=tty1 console=ttyS2,1500000 earlycon=uart8250,mmio32,0xfe660000 root=PARTUUID=${uuid} rw rootwait mitigations=off pcie_aspm=off" +setenv bootargs "coherent_pool=2M console=tty1 console=ttyS2,1500000 earlycon=uart8250,mmio32,0xfe660000 root=PARTUUID=${uuid} rw rootwait mitigations=off pcie_aspm=off" load mmc ${devnum}:1 ${kernel_addr_r} kernel.img diff --git a/modules.mk b/modules.mk index af5de3d..d69241f 100644 --- a/modules.mk +++ b/modules.mk @@ -5,7 +5,8 @@ define KernelPackage/drm-rockchip SUBMENU:=$(VIDEO_MENU) TITLE:=Rockchip DRM support - DEPENDS:=@TARGET_rockchip +kmod-backlight +kmod-drm-kms-helper +kmod-multimedia-input + DEPENDS:=@TARGET_rockchip +kmod-backlight +kmod-drm-kms-helper \ + +kmod-multimedia-input +kmod-drm-display-helper KCONFIG:= \ CONFIG_DRM_ROCKCHIP \ CONFIG_DRM_LOAD_EDID_FIRMWARE=y \ @@ -14,36 +15,35 @@ define KernelPackage/drm-rockchip CONFIG_DRM_BRIDGE=y \ CONFIG_HDMI=y \ CONFIG_PHY_ROCKCHIP_INNO_HDMI \ + CONFIG_PHY_ROCKCHIP_SAMSUNG_HDPTX \ CONFIG_DRM_DW_HDMI \ CONFIG_DRM_DW_HDMI_CEC \ - CONFIG_ROCKCHIP_VOP=y \ - CONFIG_ROCKCHIP_VOP2=y \ CONFIG_ROCKCHIP_ANALOGIX_DP=n \ CONFIG_ROCKCHIP_CDN_DP=n \ CONFIG_ROCKCHIP_DW_HDMI=y \ - CONFIG_ROCKCHIP_INNO_HDMI=y \ CONFIG_ROCKCHIP_DW_MIPI_DSI=y \ + CONFIG_ROCKCHIP_INNO_HDMI=y \ CONFIG_ROCKCHIP_LVDS=y \ CONFIG_ROCKCHIP_RGB=n \ CONFIG_ROCKCHIP_RK3066_HDMI=n \ + CONFIG_ROCKCHIP_VOP=y \ + CONFIG_ROCKCHIP_VOP2=y \ CONFIG_DRM_DP_AUX_BUS \ - CONFIG_DRM_DW_HDMI_GP_AUDIO=n \ + CONFIG_DRM_GEM_DMA_HELPER \ CONFIG_DRM_PANEL=y \ CONFIG_DRM_PANEL_BRIDGE=y \ - CONFIG_DRM_PANEL_SIMPLE \ - CONFIG_MEDIA_CEC_RC=y + CONFIG_DRM_PANEL_SIMPLE FILES:= \ $(LINUX_DIR)/drivers/gpu/drm/bridge/synopsys/dw-hdmi.ko \ $(LINUX_DIR)/drivers/gpu/drm/bridge/synopsys/dw-hdmi-cec.ko \ $(LINUX_DIR)/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.ko \ - $(LINUX_DIR)/drivers/media/cec/core/cec.ko \ $(LINUX_DIR)/drivers/phy/rockchip/phy-rockchip-inno-hdmi.ko \ - $(LINUX_DIR)/drivers/gpu/drm/display/drm_dp_aux_bus.ko@le5.15 \ + $(LINUX_DIR)/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.ko \ $(LINUX_DIR)/drivers/gpu/drm/drm_dma_helper.ko \ - $(LINUX_DIR)/drivers/gpu/drm/display/drm_display_helper.ko \ $(LINUX_DIR)/drivers/gpu/drm/panel/panel-simple.ko \ - $(LINUX_DIR)/drivers/gpu/drm/rockchip/rockchipdrm.ko - AUTOLOAD:=$(call AutoProbe,rockchipdrm phy-rockchip-inno-hdmi dw-hdmi-cec) + $(LINUX_DIR)/drivers/gpu/drm/rockchip/rockchipdrm.ko \ + $(LINUX_DIR)/drivers/media/cec/core/cec.ko + AUTOLOAD:=$(call AutoProbe,rockchipdrm phy-rockchip-inno-hdmi phy-rockchip-samsung-hdptx dw-hdmi-cec) endef define KernelPackage/drm-rockchip/description diff --git a/patches-6.6/003-rk3568-general-patch-set.patch b/patches-6.6/003-rk3568-general-patch-set.patch deleted file mode 100644 index 71440e7..0000000 --- a/patches-6.6/003-rk3568-general-patch-set.patch +++ /dev/null @@ -1,423 +0,0 @@ ---- a/drivers/clk/rockchip/clk-half-divider.c -+++ b/drivers/clk/rockchip/clk-half-divider.c -@@ -166,7 +166,7 @@ struct clk *rockchip_clk_register_halfdi - unsigned long flags, - spinlock_t *lock) - { -- struct clk_hw *hw = ERR_PTR(-ENOMEM); -+ struct clk_hw *hw; - struct clk_mux *mux = NULL; - struct clk_gate *gate = NULL; - struct clk_divider *div = NULL; ---- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c -+++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c -@@ -92,74 +92,70 @@ static struct rockchip_hdmi *to_rockchip - - static const struct dw_hdmi_mpll_config rockchip_mpll_cfg[] = { - { -- 27000000, { -- { 0x00b3, 0x0000}, -- { 0x2153, 0x0000}, -- { 0x40f3, 0x0000} -+ 30666000, { -+ { 0x00b3, 0x0000 }, -+ { 0x2153, 0x0000 }, -+ { 0x40f3, 0x0000 }, - }, - }, { -- 36000000, { -- { 0x00b3, 0x0000}, -- { 0x2153, 0x0000}, -- { 0x40f3, 0x0000} -+ 36800000, { -+ { 0x00b3, 0x0000 }, -+ { 0x2153, 0x0000 }, -+ { 0x40a2, 0x0001 }, - }, - }, { -- 40000000, { -- { 0x00b3, 0x0000}, -- { 0x2153, 0x0000}, -- { 0x40f3, 0x0000} -+ 46000000, { -+ { 0x00b3, 0x0000 }, -+ { 0x2142, 0x0001 }, -+ { 0x40a2, 0x0001 }, - }, - }, { -- 54000000, { -- { 0x0072, 0x0001}, -- { 0x2142, 0x0001}, -- { 0x40a2, 0x0001}, -+ 61333000, { -+ { 0x0072, 0x0001 }, -+ { 0x2142, 0x0001 }, -+ { 0x40a2, 0x0001 }, - }, - }, { -- 65000000, { -- { 0x0072, 0x0001}, -- { 0x2142, 0x0001}, -- { 0x40a2, 0x0001}, -+ 73600000, { -+ { 0x0072, 0x0001 }, -+ { 0x2142, 0x0001 }, -+ { 0x4061, 0x0002 }, - }, - }, { -- 66000000, { -- { 0x013e, 0x0003}, -- { 0x217e, 0x0002}, -- { 0x4061, 0x0002} -+ 92000000, { -+ { 0x0072, 0x0001 }, -+ { 0x2145, 0x0002 }, -+ { 0x4061, 0x0002 }, - }, - }, { -- 74250000, { -- { 0x0072, 0x0001}, -- { 0x2145, 0x0002}, -- { 0x4061, 0x0002} -+ 122666000, { -+ { 0x0051, 0x0002 }, -+ { 0x2145, 0x0002 }, -+ { 0x4061, 0x0002 }, - }, - }, { -- 83500000, { -- { 0x0072, 0x0001}, -+ 147200000, { -+ { 0x0051, 0x0002 }, -+ { 0x2145, 0x0002 }, -+ { 0x4064, 0x0003 }, - }, - }, { -- 108000000, { -- { 0x0051, 0x0002}, -- { 0x2145, 0x0002}, -- { 0x4061, 0x0002} -+ 184000000, { -+ { 0x0051, 0x0002 }, -+ { 0x214c, 0x0003 }, -+ { 0x4064, 0x0003 }, - }, - }, { -- 106500000, { -- { 0x0051, 0x0002}, -- { 0x2145, 0x0002}, -- { 0x4061, 0x0002} -- }, -- }, { -- 146250000, { -- { 0x0051, 0x0002}, -- { 0x2145, 0x0002}, -- { 0x4061, 0x0002} -+ 226666000, { -+ { 0x0040, 0x0003 }, -+ { 0x214c, 0x0003 }, -+ { 0x4064, 0x0003 }, - }, - }, { -- 148500000, { -- { 0x0051, 0x0003}, -- { 0x214c, 0x0003}, -- { 0x4064, 0x0003} -+ 272000000, { -+ { 0x0040, 0x0003 }, -+ { 0x214c, 0x0003 }, -+ { 0x5a64, 0x0003 }, - }, - }, { - 340000000, { -@@ -168,10 +164,16 @@ static const struct dw_hdmi_mpll_config - { 0x5a64, 0x0003 }, - }, - }, { -+ 600000000, { -+ { 0x1a40, 0x0003 }, -+ { 0x3b4c, 0x0003 }, -+ { 0x5a64, 0x0003 }, -+ }, -+ }, { - ~0UL, { -- { 0x00a0, 0x000a }, -- { 0x2001, 0x000f }, -- { 0x4002, 0x000f }, -+ { 0x0000, 0x0000 }, -+ { 0x0000, 0x0000 }, -+ { 0x0000, 0x0000 }, - }, - } - }; -@@ -179,20 +181,6 @@ static const struct dw_hdmi_mpll_config - static const struct dw_hdmi_curr_ctrl rockchip_cur_ctr[] = { - /* pixelclk bpp8 bpp10 bpp12 */ - { -- 40000000, { 0x0018, 0x0018, 0x0018 }, -- }, { -- 65000000, { 0x0028, 0x0028, 0x0028 }, -- }, { -- 66000000, { 0x0038, 0x0038, 0x0038 }, -- }, { -- 74250000, { 0x0028, 0x0038, 0x0038 }, -- }, { -- 83500000, { 0x0028, 0x0038, 0x0038 }, -- }, { -- 146250000, { 0x0038, 0x0038, 0x0038 }, -- }, { -- 148500000, { 0x0000, 0x0038, 0x0038 }, -- }, { - 600000000, { 0x0000, 0x0000, 0x0000 }, - }, { - ~0UL, { 0x0000, 0x0000, 0x0000}, -@@ -204,6 +192,7 @@ static const struct dw_hdmi_phy_config r - { 74250000, 0x8009, 0x0004, 0x0272}, - { 148500000, 0x802b, 0x0004, 0x028d}, - { 297000000, 0x8039, 0x0005, 0x028d}, -+ { 594000000, 0x8039, 0x0000, 0x019d}, - { ~0UL, 0x0000, 0x0000, 0x0000} - }; - -@@ -249,42 +238,6 @@ static int rockchip_hdmi_parse_dt(struct - return 0; - } - --static enum drm_mode_status --dw_hdmi_rockchip_mode_valid(struct dw_hdmi *dw_hdmi, void *data, -- const struct drm_display_info *info, -- const struct drm_display_mode *mode) --{ -- struct rockchip_hdmi *hdmi = data; -- const struct dw_hdmi_mpll_config *mpll_cfg = rockchip_mpll_cfg; -- int pclk = mode->clock * 1000; -- bool exact_match = hdmi->plat_data->phy_force_vendor; -- int i; -- -- if (hdmi->ref_clk) { -- int rpclk = clk_round_rate(hdmi->ref_clk, pclk); -- -- if (abs(rpclk - pclk) > pclk / 1000) -- return MODE_NOCLOCK; -- } -- -- for (i = 0; mpll_cfg[i].mpixelclock != (~0UL); i++) { -- /* -- * For vendor specific phys force an exact match of the pixelclock -- * to preserve the original behaviour of the driver. -- */ -- if (exact_match && pclk == mpll_cfg[i].mpixelclock) -- return MODE_OK; -- /* -- * The Synopsys phy can work with pixelclocks up to the value given -- * in the corresponding mpll_cfg entry. -- */ -- if (!exact_match && pclk <= mpll_cfg[i].mpixelclock) -- return MODE_OK; -- } -- -- return MODE_BAD; --} -- - static void dw_hdmi_rockchip_encoder_disable(struct drm_encoder *encoder) - { - } -@@ -450,7 +403,6 @@ static struct rockchip_hdmi_chip_data rk - }; - - static const struct dw_hdmi_plat_data rk3228_hdmi_drv_data = { -- .mode_valid = dw_hdmi_rockchip_mode_valid, - .mpll_cfg = rockchip_mpll_cfg, - .cur_ctr = rockchip_cur_ctr, - .phy_config = rockchip_phy_config, -@@ -467,7 +419,6 @@ static struct rockchip_hdmi_chip_data rk - }; - - static const struct dw_hdmi_plat_data rk3288_hdmi_drv_data = { -- .mode_valid = dw_hdmi_rockchip_mode_valid, - .mpll_cfg = rockchip_mpll_cfg, - .cur_ctr = rockchip_cur_ctr, - .phy_config = rockchip_phy_config, -@@ -487,7 +438,6 @@ static struct rockchip_hdmi_chip_data rk - }; - - static const struct dw_hdmi_plat_data rk3328_hdmi_drv_data = { -- .mode_valid = dw_hdmi_rockchip_mode_valid, - .mpll_cfg = rockchip_mpll_cfg, - .cur_ctr = rockchip_cur_ctr, - .phy_config = rockchip_phy_config, -@@ -505,7 +455,6 @@ static struct rockchip_hdmi_chip_data rk - }; - - static const struct dw_hdmi_plat_data rk3399_hdmi_drv_data = { -- .mode_valid = dw_hdmi_rockchip_mode_valid, - .mpll_cfg = rockchip_mpll_cfg, - .cur_ctr = rockchip_cur_ctr, - .phy_config = rockchip_phy_config, -@@ -518,7 +467,6 @@ static struct rockchip_hdmi_chip_data rk - }; - - static const struct dw_hdmi_plat_data rk3568_hdmi_drv_data = { -- .mode_valid = dw_hdmi_rockchip_mode_valid, - .mpll_cfg = rockchip_mpll_cfg, - .cur_ctr = rockchip_cur_ctr, - .phy_config = rockchip_phy_config, -@@ -625,6 +573,14 @@ static int dw_hdmi_rockchip_bind(struct - } - - if (hdmi->chip_data == &rk3568_chip_data) { -+ regmap_write(hdmi->regmap, RK3568_GRF_VO_CON1, -+ HIWORD_UPDATE(RK3568_HDMI_SDAIN_MSK | -+ RK3568_HDMI_SCLIN_MSK, -+ RK3568_HDMI_SDAIN_MSK | -+ RK3568_HDMI_SCLIN_MSK)); -+ } -+ -+ if (hdmi->chip_data == &rk3568_chip_data) { - regmap_write(hdmi->regmap, RK3568_GRF_VO_CON1, - HIWORD_UPDATE(RK3568_HDMI_SDAIN_MSK | - RK3568_HDMI_SCLIN_MSK, ---- a/drivers/usb/dwc3/dwc3-of-simple.c -+++ b/drivers/usb/dwc3/dwc3-of-simple.c -@@ -30,12 +30,18 @@ struct dwc3_of_simple { - bool need_reset; - }; - -+struct dwc3_of_simple_data { -+ bool need_reset; -+}; -+ - static int dwc3_of_simple_probe(struct platform_device *pdev) - { - struct dwc3_of_simple *simple; - struct device *dev = &pdev->dev; - struct device_node *np = dev->of_node; - -+ const struct dwc3_of_simple_data *data = of_device_get_match_data(dev); -+ - int ret; - - simple = devm_kzalloc(dev, sizeof(*simple), GFP_KERNEL); -@@ -49,8 +55,8 @@ static int dwc3_of_simple_probe(struct p - * Some controllers need to toggle the usb3-otg reset before trying to - * initialize the PHY, otherwise the PHY times out. - */ -- if (of_device_is_compatible(np, "rockchip,rk3399-dwc3")) -- simple->need_reset = true; -+ if (data->need_reset) -+ simple->need_reset = data->need_reset; - - simple->resets = of_reset_control_array_get(np, false, true, - true); -@@ -168,12 +174,34 @@ static const struct dev_pm_ops dwc3_of_s - dwc3_of_simple_runtime_resume, NULL) - }; - -+static const struct dwc3_of_simple_data dwc3_of_simple_data_rk3399 = { -+ .need_reset = true, -+}; -+ - static const struct of_device_id of_dwc3_simple_match[] = { -- { .compatible = "rockchip,rk3399-dwc3" }, -- { .compatible = "sprd,sc9860-dwc3" }, -- { .compatible = "allwinner,sun50i-h6-dwc3" }, -- { .compatible = "hisilicon,hi3670-dwc3" }, -- { .compatible = "intel,keembay-dwc3" }, -+ { -+ .compatible = "allwinner,sun50i-h6-dwc3", -+ }, -+ { -+ .compatible = "cavium,octeon-7130-usb-uctl", -+ }, -+ { -+ .compatible = "hisilicon,hi3670-dwc3", -+ }, -+ { -+ .compatible = "intel,keembay-dwc3", -+ }, -+ { -+ .compatible = "rockchip,rk3399-dwc3", -+ .data = &dwc3_of_simple_data_rk3399, -+ }, -+ { -+ .compatible = "rockchip,rk3568-dwc3", -+ .data = &dwc3_of_simple_data_rk3399, -+ }, -+ { -+ .compatible = "sprd,sc9860-dwc3", -+ }, - { /* Sentinel */ } - }; - MODULE_DEVICE_TABLE(of, of_dwc3_simple_match); ---- a/kernel/dma/pool.c -+++ b/kernel/dma/pool.c -@@ -191,11 +191,10 @@ static int __init dma_atomic_pool_init(v - /* - * If coherent_pool was not used on the command line, default the pool - * sizes to 128KB per 1GB of memory, min 128KB, max MAX_ORDER. -+ * Use 2MiB as default pool size. - */ - if (!atomic_pool_size) { -- unsigned long pages = totalram_pages() / (SZ_1G / SZ_128K); -- pages = min_t(unsigned long, pages, MAX_ORDER_NR_PAGES); -- atomic_pool_size = max_t(size_t, pages << PAGE_SHIFT, SZ_128K); -+ atomic_pool_size = SZ_2M; - } - INIT_WORK(&atomic_pool_work, atomic_pool_work_fn); - ---- a/sound/soc/codecs/rt5651.c -+++ b/sound/soc/codecs/rt5651.c -@@ -24,6 +24,7 @@ - #include - #include - #include -+#include - - #include "rl6231.h" - #include "rt5651.h" -@@ -1511,6 +1512,7 @@ static int rt5651_set_dai_pll(struct snd - static int rt5651_set_bias_level(struct snd_soc_component *component, - enum snd_soc_bias_level level) - { -+ struct rt5651_priv *rt5651 = snd_soc_component_get_drvdata(component); - switch (level) { - case SND_SOC_BIAS_PREPARE: - if (SND_SOC_BIAS_STANDBY == snd_soc_component_get_bias_level(component)) { -@@ -1518,6 +1520,13 @@ static int rt5651_set_bias_level(struct - snd_soc_component_update_bits(component, RT5651_D_MISC, - 0xc00, 0xc00); - } -+ if (!IS_ERR(rt5651->mclk)){ -+ if (snd_soc_component_get_bias_level(component) == SND_SOC_BIAS_ON) { -+ clk_disable_unprepare(rt5651->mclk); -+ } else { -+ clk_prepare_enable(rt5651->mclk); -+ } -+ } - break; - case SND_SOC_BIAS_STANDBY: - if (SND_SOC_BIAS_OFF == snd_soc_component_get_bias_level(component)) { -@@ -2059,6 +2068,13 @@ static int rt5651_probe(struct snd_soc_c - { - struct rt5651_priv *rt5651 = snd_soc_component_get_drvdata(component); - -+ /* Check if MCLK provided */ -+ rt5651->mclk = devm_clk_get(component->dev, "mclk"); -+ if (PTR_ERR(rt5651->mclk) == -EPROBE_DEFER){ -+ dev_err(component->dev, "unable to get mclk\n"); -+ return -EPROBE_DEFER; -+ } -+ - rt5651->component = component; - - snd_soc_component_update_bits(component, RT5651_PWR_ANLG1, ---- a/sound/soc/codecs/rt5651.h -+++ b/sound/soc/codecs/rt5651.h -@@ -2097,6 +2097,7 @@ struct rt5651_priv { - - int dmic_en; - bool hp_mute; -+ struct clk *mclk; - }; - - #endif /* __RT5651_H__ */ diff --git a/patches-6.6/005-friendlyelec-nanopi-series.patch b/patches-6.6/005-friendlyelec-nanopi-series.patch deleted file mode 100644 index 03257e2..0000000 --- a/patches-6.6/005-friendlyelec-nanopi-series.patch +++ /dev/null @@ -1,196 +0,0 @@ -From e134dcd7dda9048f4ac2cab96322a8a7f08a9d22 Mon Sep 17 00:00:00 2001 -From: sbwml <984419930@qq.com> -Date: Sat, 12 Nov 2022 10:24:30 +0800 -Subject: [PATCH] friendlyelec-nanopi-series - ---- - drivers/soc/Kconfig | 1 + - drivers/soc/Makefile | 1 + - drivers/soc/friendlyelec/Kconfig | 11 +++ - drivers/soc/friendlyelec/Makefile | 1 + - drivers/soc/friendlyelec/board.c | 143 ++++++++++++++++++++++++++++++ - 5 files changed, 157 insertions(+) - create mode 100644 drivers/soc/friendlyelec/Kconfig - create mode 100644 drivers/soc/friendlyelec/Makefile - create mode 100644 drivers/soc/friendlyelec/board.c - ---- a/drivers/soc/Kconfig -+++ b/drivers/soc/Kconfig -@@ -31,5 +31,6 @@ source "drivers/soc/ti/Kconfig" - source "drivers/soc/ux500/Kconfig" - source "drivers/soc/versatile/Kconfig" - source "drivers/soc/xilinx/Kconfig" -+source "drivers/soc/friendlyelec/Kconfig" - - endmenu ---- a/drivers/soc/Makefile -+++ b/drivers/soc/Makefile -@@ -35,3 +35,4 @@ obj-y += ti/ - obj-$(CONFIG_ARCH_U8500) += ux500/ - obj-$(CONFIG_PLAT_VERSATILE) += versatile/ - obj-y += xilinx/ -+obj-$(CONFIG_VENDOR_FRIENDLYELEC) += friendlyelec/ ---- /dev/null -+++ b/drivers/soc/friendlyelec/Kconfig -@@ -0,0 +1,11 @@ -+# -+# Machine drivers -+# -+ -+if ARCH_ROCKCHIP -+ -+config VENDOR_FRIENDLYELEC -+ bool "FriendlyElec board based on Rockchip SoCs" -+ default n -+ -+endif ---- /dev/null -+++ b/drivers/soc/friendlyelec/Makefile -@@ -0,0 +1 @@ -+obj-$(CONFIG_VENDOR_FRIENDLYELEC) += board.o ---- /dev/null -+++ b/drivers/soc/friendlyelec/board.c -@@ -0,0 +1,143 @@ -+/* -+ * Copyright (C) Guangzhou FriendlyELEC Computer Tech. Co., Ltd. -+ * (http://www.friendlyarm.com) -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version 2 -+ * of the License, or (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, you can access it online at -+ * http://www.gnu.org/licenses/gpl-2.0.html. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define BOARD_MANF "FriendlyELEC Computer Tech. Co., Ltd." -+ -+static const char *board_mach; -+static const char *board_name; -+static u32 board_rev; -+static u32 board_serial_high, board_serial_low; -+ -+static ssize_t board_sys_info_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ char *s = buf; -+ -+ s += sprintf(s, "Hardware\t: %s\n", board_mach); -+ s += sprintf(s, "Revision\t: %04x\n", board_rev); -+ s += sprintf(s, "Serial\t\t: %08x%08x\n", -+ board_serial_high, board_serial_low); -+ s += sprintf(s, "\nModel\t\t: %s\n", board_name); -+ s += sprintf(s, "Manufacturer\t: %s\n", BOARD_MANF); -+ -+ return (s - buf); -+} -+ -+static struct device_attribute board_attr_info = -+ __ATTR(info, S_IRUGO, board_sys_info_show, NULL); -+ -+static int rockchip_cpuinfo_probe(struct platform_device *pdev) -+{ -+ struct device *dev = &pdev->dev; -+ struct nvmem_cell *cell; -+ unsigned char *efuse_buf, buf[16]; -+ size_t len; -+ int i; -+ -+ cell = nvmem_cell_get(dev, "id"); -+ if (IS_ERR(cell)) { -+ dev_err(dev, "failed to get id cell: %ld\n", PTR_ERR(cell)); -+ return PTR_ERR(cell); -+ } -+ -+ efuse_buf = nvmem_cell_read(cell, &len); -+ nvmem_cell_put(cell); -+ -+ if (len != 16) { -+ kfree(efuse_buf); -+ dev_err(dev, "invalid id len: %zu\n", len); -+ return -EINVAL; -+ } -+ -+ for (i = 0; i < 8; i++) { -+ buf[i] = efuse_buf[1 + (i << 1)]; -+ buf[i + 8] = efuse_buf[i << 1]; -+ } -+ -+ kfree(efuse_buf); -+ -+ board_serial_low = crc32(0, buf, 8); -+ board_serial_high = crc32(board_serial_low, buf + 8, 8); -+ -+ dev_info(dev, "Serial\t\t: %08x%08x\n", -+ board_serial_high, board_serial_low); -+ -+ return 0; -+} -+ -+static int board_sys_probe(struct platform_device *pdev) -+{ -+ struct device_node *np = pdev->dev.of_node; -+ struct device_node *root; -+ -+ root = of_find_node_by_path("/"); -+ -+ of_property_read_u32(np, "hwrev", &board_rev); -+ -+ if (of_property_read_string(np, "machine", &board_mach)) -+ of_property_read_string(root, "compatible", &board_mach); -+ -+ if (of_property_read_string(np, "model", &board_name)) -+ of_property_read_string(root, "model", &board_name); -+ -+ of_node_put(root); -+ -+ rockchip_cpuinfo_probe(pdev); -+ -+ device_create_file(&pdev->dev, &board_attr_info); -+ -+ return 0; -+} -+ -+static const struct of_device_id board_sys_of_match[] = { -+ { .compatible = "friendlyelec,board" }, -+ {} -+}; -+MODULE_DEVICE_TABLE(of, board_sys_of_match); -+ -+static struct platform_driver board_sys_driver = { -+ .probe = board_sys_probe, -+ .driver = { -+ .name = "friendlyelec-board", -+ .of_match_table = board_sys_of_match, -+ }, -+}; -+ -+static int __init board_sys_init(void) -+{ -+ return platform_driver_register(&board_sys_driver); -+} -+late_initcall(board_sys_init); -+ -+MODULE_AUTHOR("support@friendlyarm.com"); -+MODULE_DESCRIPTION("FriendlyElec NanoPi Series Machine Driver"); -+MODULE_LICENSE("GPL v2"); diff --git a/patches-6.6/012-rk356x-add-dwc3-xhci-usb-trb-quirk.patch b/patches-6.6/012-rk356x-add-dwc3-xhci-usb-trb-quirk.patch index ca8f2e1..a11c727 100644 --- a/patches-6.6/012-rk356x-add-dwc3-xhci-usb-trb-quirk.patch +++ b/patches-6.6/012-rk356x-add-dwc3-xhci-usb-trb-quirk.patch @@ -18,7 +18,7 @@ --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c -@@ -1560,6 +1560,8 @@ static void dwc3_get_properties(struct d +@@ -1578,6 +1578,8 @@ static void dwc3_get_properties(struct d "snps,dis-del-phy-power-chg-quirk"); dwc->dis_tx_ipgap_linecheck_quirk = device_property_read_bool(dev, "snps,dis-tx-ipgap-linecheck-quirk"); @@ -39,7 +39,7 @@ * @resume_hs_terminations: Set if we enable quirk for fixing improper crc * generation after resume from suspend. * @ulpi_ext_vbus_drv: Set to confiure the upli chip to drives CPEN pin -@@ -1332,6 +1335,7 @@ struct dwc3 { +@@ -1333,6 +1336,7 @@ struct dwc3 { unsigned dis_u2_freeclk_exists_quirk:1; unsigned dis_del_phy_power_chg_quirk:1; unsigned dis_tx_ipgap_linecheck_quirk:1; @@ -73,7 +73,7 @@ } --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c -@@ -3606,6 +3606,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd * +@@ -3637,6 +3637,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd * bool more_trbs_coming = true; bool need_zero_pkt = false; bool first_trb = true; @@ -81,7 +81,7 @@ unsigned int num_trbs; unsigned int start_cycle, num_sgs = 0; unsigned int enqd_len, block_len, trb_buff_len, full_len; -@@ -3642,6 +3643,13 @@ int xhci_queue_bulk_tx(struct xhci_hcd * +@@ -3673,6 +3674,13 @@ int xhci_queue_bulk_tx(struct xhci_hcd * if (urb->transfer_flags & URB_ZERO_PACKET && urb_priv->num_tds > 1) need_zero_pkt = true; @@ -95,7 +95,7 @@ td = &urb_priv->td[0]; /* -@@ -3670,6 +3678,13 @@ int xhci_queue_bulk_tx(struct xhci_hcd * +@@ -3701,6 +3709,13 @@ int xhci_queue_bulk_tx(struct xhci_hcd * first_trb = false; if (start_cycle == 0) field |= TRB_CYCLE; @@ -109,7 +109,7 @@ } else field |= ring->cycle_state; -@@ -3678,6 +3693,8 @@ int xhci_queue_bulk_tx(struct xhci_hcd * +@@ -3709,6 +3724,8 @@ int xhci_queue_bulk_tx(struct xhci_hcd * */ if (enqd_len + trb_buff_len < full_len) { field |= TRB_CHAIN; @@ -132,7 +132,7 @@ #define TRB_MAX_BUFF_SIZE (1 << TRB_MAX_BUFF_SHIFT) /* How much data is left before the 64KB boundary? */ #define TRB_BUFF_LEN_UP_TO_BOUNDARY(addr) (TRB_MAX_BUFF_SIZE - \ -@@ -1854,6 +1858,7 @@ struct xhci_hcd { +@@ -1855,6 +1859,7 @@ struct xhci_hcd { #define XHCI_STATE_HALTED (1 << 1) #define XHCI_STATE_REMOVING (1 << 2) unsigned long long quirks; diff --git a/patches-6.6/032-01-v6.9-phy-rockchip-Add-Samsung-HDMI-eDP-Combo-PHY-driver.patch b/patches-6.6/032-01-v6.9-phy-rockchip-Add-Samsung-HDMI-eDP-Combo-PHY-driver.patch new file mode 100644 index 0000000..30e14cd --- /dev/null +++ b/patches-6.6/032-01-v6.9-phy-rockchip-Add-Samsung-HDMI-eDP-Combo-PHY-driver.patch @@ -0,0 +1,1084 @@ +From 553be2830c5f33308483e8118de748a2c69fe593 Mon Sep 17 00:00:00 2001 +From: Cristian Ciocaltea +Date: Wed, 14 Feb 2024 13:45:37 +0200 +Subject: [PATCH] phy: rockchip: Add Samsung HDMI/eDP Combo PHY driver + +Add driver for the HDMI/eDP TX Combo PHY found on Rockchip RK3588 SoC. + +The PHY is based on a Samsung IP block and supports HDMI 2.1 TMDS, FRL +and eDP links. The maximum data rate is 12Gbps (FRL), while the minimum +is 250Mbps (TMDS). + +Only the TMDS link is currently supported. + +Co-developed-by: Algea Cao +Signed-off-by: Algea Cao +Tested-by: Heiko Stuebner +Signed-off-by: Cristian Ciocaltea +Link: https://lore.kernel.org/r/20240214-phy-hdptx-v4-2-e7974f46c1a7@collabora.com +Signed-off-by: Vinod Koul +--- + drivers/phy/rockchip/Kconfig | 8 + + drivers/phy/rockchip/Makefile | 1 + + .../phy/rockchip/phy-rockchip-samsung-hdptx.c | 1028 +++++++++++++++++ + 3 files changed, 1037 insertions(+) + create mode 100644 drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c + +--- a/drivers/phy/rockchip/Kconfig ++++ b/drivers/phy/rockchip/Kconfig +@@ -83,6 +83,14 @@ config PHY_ROCKCHIP_PCIE + help + Enable this to support the Rockchip PCIe PHY. + ++config PHY_ROCKCHIP_SAMSUNG_HDPTX ++ tristate "Rockchip Samsung HDMI/eDP Combo PHY driver" ++ depends on (ARCH_ROCKCHIP || COMPILE_TEST) && OF ++ select GENERIC_PHY ++ help ++ Enable this to support the Rockchip HDMI/eDP Combo PHY ++ with Samsung IP block. ++ + config PHY_ROCKCHIP_SNPS_PCIE3 + tristate "Rockchip Snps PCIe3 PHY Driver" + depends on (ARCH_ROCKCHIP && OF) || COMPILE_TEST +--- a/drivers/phy/rockchip/Makefile ++++ b/drivers/phy/rockchip/Makefile +@@ -8,6 +8,7 @@ obj-$(CONFIG_PHY_ROCKCHIP_INNO_HDMI) += + obj-$(CONFIG_PHY_ROCKCHIP_INNO_USB2) += phy-rockchip-inno-usb2.o + obj-$(CONFIG_PHY_ROCKCHIP_NANENG_COMBO_PHY) += phy-rockchip-naneng-combphy.o + obj-$(CONFIG_PHY_ROCKCHIP_PCIE) += phy-rockchip-pcie.o ++obj-$(CONFIG_PHY_ROCKCHIP_SAMSUNG_HDPTX) += phy-rockchip-samsung-hdptx.o + obj-$(CONFIG_PHY_ROCKCHIP_SNPS_PCIE3) += phy-rockchip-snps-pcie3.o + obj-$(CONFIG_PHY_ROCKCHIP_TYPEC) += phy-rockchip-typec.o + obj-$(CONFIG_PHY_ROCKCHIP_USB) += phy-rockchip-usb.o +--- /dev/null ++++ b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c +@@ -0,0 +1,1028 @@ ++// SPDX-License-Identifier: GPL-2.0+ ++/* ++ * Copyright (c) 2021-2022 Rockchip Electronics Co., Ltd. ++ * Copyright (c) 2024 Collabora Ltd. ++ * ++ * Author: Algea Cao ++ * Author: Cristian Ciocaltea ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define GRF_HDPTX_CON0 0x00 ++#define HDPTX_I_PLL_EN BIT(7) ++#define HDPTX_I_BIAS_EN BIT(6) ++#define HDPTX_I_BGR_EN BIT(5) ++#define GRF_HDPTX_STATUS 0x80 ++#define HDPTX_O_PLL_LOCK_DONE BIT(3) ++#define HDPTX_O_PHY_CLK_RDY BIT(2) ++#define HDPTX_O_PHY_RDY BIT(1) ++#define HDPTX_O_SB_RDY BIT(0) ++ ++#define HDTPX_REG(_n, _min, _max) \ ++ ( \ ++ BUILD_BUG_ON_ZERO((0x##_n) < (0x##_min)) + \ ++ BUILD_BUG_ON_ZERO((0x##_n) > (0x##_max)) + \ ++ ((0x##_n) * 4) \ ++ ) ++ ++#define CMN_REG(n) HDTPX_REG(n, 0000, 00a7) ++#define SB_REG(n) HDTPX_REG(n, 0100, 0129) ++#define LNTOP_REG(n) HDTPX_REG(n, 0200, 0229) ++#define LANE_REG(n) HDTPX_REG(n, 0300, 062d) ++ ++/* CMN_REG(0008) */ ++#define LCPLL_EN_MASK BIT(6) ++#define LCPLL_LCVCO_MODE_EN_MASK BIT(4) ++/* CMN_REG(001e) */ ++#define LCPLL_PI_EN_MASK BIT(5) ++#define LCPLL_100M_CLK_EN_MASK BIT(0) ++/* CMN_REG(0025) */ ++#define LCPLL_PMS_IQDIV_RSTN BIT(4) ++/* CMN_REG(0028) */ ++#define LCPLL_SDC_FRAC_EN BIT(2) ++#define LCPLL_SDC_FRAC_RSTN BIT(0) ++/* CMN_REG(002d) */ ++#define LCPLL_SDC_N_MASK GENMASK(3, 1) ++/* CMN_REG(002e) */ ++#define LCPLL_SDC_NUMBERATOR_MASK GENMASK(5, 0) ++/* CMN_REG(002f) */ ++#define LCPLL_SDC_DENOMINATOR_MASK GENMASK(7, 2) ++#define LCPLL_SDC_NDIV_RSTN BIT(0) ++/* CMN_REG(003d) */ ++#define ROPLL_LCVCO_EN BIT(4) ++/* CMN_REG(004e) */ ++#define ROPLL_PI_EN BIT(5) ++/* CMN_REG(005c) */ ++#define ROPLL_PMS_IQDIV_RSTN BIT(5) ++/* CMN_REG(005e) */ ++#define ROPLL_SDM_EN_MASK BIT(6) ++#define ROPLL_SDM_FRAC_EN_RBR BIT(3) ++#define ROPLL_SDM_FRAC_EN_HBR BIT(2) ++#define ROPLL_SDM_FRAC_EN_HBR2 BIT(1) ++#define ROPLL_SDM_FRAC_EN_HBR3 BIT(0) ++/* CMN_REG(0064) */ ++#define ROPLL_SDM_NUM_SIGN_RBR_MASK BIT(3) ++/* CMN_REG(0069) */ ++#define ROPLL_SDC_N_RBR_MASK GENMASK(2, 0) ++/* CMN_REG(0074) */ ++#define ROPLL_SDC_NDIV_RSTN BIT(2) ++#define ROPLL_SSC_EN BIT(0) ++/* CMN_REG(0081) */ ++#define OVRD_PLL_CD_CLK_EN BIT(8) ++#define PLL_CD_HSCLK_EAST_EN BIT(0) ++/* CMN_REG(0086) */ ++#define PLL_PCG_POSTDIV_SEL_MASK GENMASK(7, 4) ++#define PLL_PCG_CLK_SEL_MASK GENMASK(3, 1) ++#define PLL_PCG_CLK_EN BIT(0) ++/* CMN_REG(0087) */ ++#define PLL_FRL_MODE_EN BIT(3) ++#define PLL_TX_HS_CLK_EN BIT(2) ++/* CMN_REG(0089) */ ++#define LCPLL_ALONE_MODE BIT(1) ++/* CMN_REG(0097) */ ++#define DIG_CLK_SEL BIT(1) ++#define ROPLL_REF BIT(1) ++#define LCPLL_REF 0 ++/* CMN_REG(0099) */ ++#define CMN_ROPLL_ALONE_MODE BIT(2) ++#define ROPLL_ALONE_MODE BIT(2) ++/* CMN_REG(009a) */ ++#define HS_SPEED_SEL BIT(0) ++#define DIV_10_CLOCK BIT(0) ++/* CMN_REG(009b) */ ++#define IS_SPEED_SEL BIT(4) ++#define LINK_SYMBOL_CLOCK BIT(4) ++#define LINK_SYMBOL_CLOCK1_2 0 ++ ++/* SB_REG(0102) */ ++#define OVRD_SB_RXTERM_EN_MASK BIT(5) ++#define SB_RXTERM_EN_MASK BIT(4) ++#define ANA_SB_RXTERM_OFFSP_MASK GENMASK(3, 0) ++/* SB_REG(0103) */ ++#define ANA_SB_RXTERM_OFFSN_MASK GENMASK(6, 3) ++#define OVRD_SB_RX_RESCAL_DONE_MASK BIT(1) ++#define SB_RX_RESCAL_DONE_MASK BIT(0) ++/* SB_REG(0104) */ ++#define OVRD_SB_EN_MASK BIT(5) ++#define SB_EN_MASK BIT(4) ++/* SB_REG(0105) */ ++#define OVRD_SB_EARC_CMDC_EN_MASK BIT(6) ++#define SB_EARC_CMDC_EN_MASK BIT(5) ++#define ANA_SB_TX_HLVL_PROG_MASK GENMASK(2, 0) ++/* SB_REG(0106) */ ++#define ANA_SB_TX_LLVL_PROG_MASK GENMASK(6, 4) ++/* SB_REG(0109) */ ++#define ANA_SB_DMRX_AFC_DIV_RATIO_MASK GENMASK(2, 0) ++/* SB_REG(010f) */ ++#define OVRD_SB_VREG_EN_MASK BIT(7) ++#define SB_VREG_EN_MASK BIT(6) ++#define OVRD_SB_VREG_LPF_BYPASS_MASK BIT(5) ++#define SB_VREG_LPF_BYPASS_MASK BIT(4) ++#define ANA_SB_VREG_GAIN_CTRL_MASK GENMASK(3, 0) ++/* SB_REG(0110) */ ++#define ANA_SB_VREG_REF_SEL_MASK BIT(0) ++/* SB_REG(0113) */ ++#define SB_RX_RCAL_OPT_CODE_MASK GENMASK(5, 4) ++#define SB_RX_RTERM_CTRL_MASK GENMASK(3, 0) ++/* SB_REG(0114) */ ++#define SB_TG_SB_EN_DELAY_TIME_MASK GENMASK(5, 3) ++#define SB_TG_RXTERM_EN_DELAY_TIME_MASK GENMASK(2, 0) ++/* SB_REG(0115) */ ++#define SB_READY_DELAY_TIME_MASK GENMASK(5, 3) ++#define SB_TG_OSC_EN_DELAY_TIME_MASK GENMASK(2, 0) ++/* SB_REG(0116) */ ++#define AFC_RSTN_DELAY_TIME_MASK GENMASK(6, 4) ++/* SB_REG(0117) */ ++#define FAST_PULSE_TIME_MASK GENMASK(3, 0) ++/* SB_REG(011b) */ ++#define SB_EARC_SIG_DET_BYPASS_MASK BIT(4) ++#define SB_AFC_TOL_MASK GENMASK(3, 0) ++/* SB_REG(011f) */ ++#define SB_PWM_AFC_CTRL_MASK GENMASK(7, 2) ++#define SB_RCAL_RSTN_MASK BIT(1) ++/* SB_REG(0120) */ ++#define SB_EARC_EN_MASK BIT(1) ++#define SB_EARC_AFC_EN_MASK BIT(2) ++/* SB_REG(0123) */ ++#define OVRD_SB_READY_MASK BIT(5) ++#define SB_READY_MASK BIT(4) ++ ++/* LNTOP_REG(0200) */ ++#define PROTOCOL_SEL BIT(2) ++#define HDMI_MODE BIT(2) ++#define HDMI_TMDS_FRL_SEL BIT(1) ++/* LNTOP_REG(0206) */ ++#define DATA_BUS_SEL BIT(0) ++#define DATA_BUS_36_40 BIT(0) ++/* LNTOP_REG(0207) */ ++#define LANE_EN 0xf ++#define ALL_LANE_EN 0xf ++ ++/* LANE_REG(0312) */ ++#define LN0_TX_SER_RATE_SEL_RBR BIT(5) ++#define LN0_TX_SER_RATE_SEL_HBR BIT(4) ++#define LN0_TX_SER_RATE_SEL_HBR2 BIT(3) ++#define LN0_TX_SER_RATE_SEL_HBR3 BIT(2) ++/* LANE_REG(0412) */ ++#define LN1_TX_SER_RATE_SEL_RBR BIT(5) ++#define LN1_TX_SER_RATE_SEL_HBR BIT(4) ++#define LN1_TX_SER_RATE_SEL_HBR2 BIT(3) ++#define LN1_TX_SER_RATE_SEL_HBR3 BIT(2) ++/* LANE_REG(0512) */ ++#define LN2_TX_SER_RATE_SEL_RBR BIT(5) ++#define LN2_TX_SER_RATE_SEL_HBR BIT(4) ++#define LN2_TX_SER_RATE_SEL_HBR2 BIT(3) ++#define LN2_TX_SER_RATE_SEL_HBR3 BIT(2) ++/* LANE_REG(0612) */ ++#define LN3_TX_SER_RATE_SEL_RBR BIT(5) ++#define LN3_TX_SER_RATE_SEL_HBR BIT(4) ++#define LN3_TX_SER_RATE_SEL_HBR2 BIT(3) ++#define LN3_TX_SER_RATE_SEL_HBR3 BIT(2) ++ ++struct lcpll_config { ++ u32 bit_rate; ++ u8 lcvco_mode_en; ++ u8 pi_en; ++ u8 clk_en_100m; ++ u8 pms_mdiv; ++ u8 pms_mdiv_afc; ++ u8 pms_pdiv; ++ u8 pms_refdiv; ++ u8 pms_sdiv; ++ u8 pi_cdiv_rstn; ++ u8 pi_cdiv_sel; ++ u8 sdm_en; ++ u8 sdm_rstn; ++ u8 sdc_frac_en; ++ u8 sdc_rstn; ++ u8 sdm_deno; ++ u8 sdm_num_sign; ++ u8 sdm_num; ++ u8 sdc_n; ++ u8 sdc_n2; ++ u8 sdc_num; ++ u8 sdc_deno; ++ u8 sdc_ndiv_rstn; ++ u8 ssc_en; ++ u8 ssc_fm_dev; ++ u8 ssc_fm_freq; ++ u8 ssc_clk_div_sel; ++ u8 cd_tx_ser_rate_sel; ++}; ++ ++struct ropll_config { ++ u32 bit_rate; ++ u8 pms_mdiv; ++ u8 pms_mdiv_afc; ++ u8 pms_pdiv; ++ u8 pms_refdiv; ++ u8 pms_sdiv; ++ u8 pms_iqdiv_rstn; ++ u8 ref_clk_sel; ++ u8 sdm_en; ++ u8 sdm_rstn; ++ u8 sdc_frac_en; ++ u8 sdc_rstn; ++ u8 sdm_clk_div; ++ u8 sdm_deno; ++ u8 sdm_num_sign; ++ u8 sdm_num; ++ u8 sdc_n; ++ u8 sdc_num; ++ u8 sdc_deno; ++ u8 sdc_ndiv_rstn; ++ u8 ssc_en; ++ u8 ssc_fm_dev; ++ u8 ssc_fm_freq; ++ u8 ssc_clk_div_sel; ++ u8 ana_cpp_ctrl; ++ u8 ana_lpf_c_sel; ++ u8 cd_tx_ser_rate_sel; ++}; ++ ++enum rk_hdptx_reset { ++ RST_PHY = 0, ++ RST_APB, ++ RST_INIT, ++ RST_CMN, ++ RST_LANE, ++ RST_ROPLL, ++ RST_LCPLL, ++ RST_MAX ++}; ++ ++struct rk_hdptx_phy { ++ struct device *dev; ++ struct regmap *regmap; ++ struct regmap *grf; ++ ++ struct phy *phy; ++ struct phy_config *phy_cfg; ++ struct clk_bulk_data *clks; ++ int nr_clks; ++ struct reset_control_bulk_data rsts[RST_MAX]; ++}; ++ ++static const struct ropll_config ropll_tmds_cfg[] = { ++ { 5940000, 124, 124, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0, ++ 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 3712500, 155, 155, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0, ++ 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 2970000, 124, 124, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0, ++ 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 1620000, 135, 135, 1, 1, 3, 1, 1, 0, 1, 1, 1, 1, 4, 0, 3, 5, 5, 0x10, ++ 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 1856250, 155, 155, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0, ++ 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 1540000, 193, 193, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 193, 1, 32, 2, 1, ++ 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 1485000, 0x7b, 0x7b, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 4, 0, 3, 5, 5, ++ 0x10, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 1462500, 122, 122, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 244, 1, 16, 2, 1, 1, ++ 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 1190000, 149, 149, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 149, 1, 16, 2, 1, 1, ++ 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 1065000, 89, 89, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 89, 1, 16, 1, 0, 1, ++ 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 1080000, 135, 135, 1, 1, 5, 1, 1, 0, 1, 0, 1, 1, 0x9, 0, 0x05, 0, ++ 0x14, 0x18, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 855000, 214, 214, 1, 1, 11, 1, 1, 1, 1, 1, 1, 1, 214, 1, 16, 2, 1, ++ 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 835000, 105, 105, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 42, 1, 16, 1, 0, ++ 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 928125, 155, 155, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0, ++ 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 742500, 124, 124, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0, ++ 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 650000, 162, 162, 1, 1, 11, 1, 1, 1, 1, 1, 1, 1, 54, 0, 16, 4, 1, ++ 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 337500, 0x70, 0x70, 1, 1, 0xf, 1, 1, 1, 1, 1, 1, 1, 0x2, 0, 0x01, 5, ++ 1, 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 400000, 100, 100, 1, 1, 11, 1, 1, 0, 1, 0, 1, 1, 0x9, 0, 0x05, 0, ++ 0x14, 0x18, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 270000, 0x5a, 0x5a, 1, 1, 0xf, 1, 1, 0, 1, 0, 1, 1, 0x9, 0, 0x05, 0, ++ 0x14, 0x18, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 251750, 84, 84, 1, 1, 0xf, 1, 1, 1, 1, 1, 1, 1, 168, 1, 16, 4, 1, 1, ++ 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++}; ++ ++static const struct reg_sequence rk_hdtpx_common_cmn_init_seq[] = { ++ REG_SEQ0(CMN_REG(0009), 0x0c), ++ REG_SEQ0(CMN_REG(000a), 0x83), ++ REG_SEQ0(CMN_REG(000b), 0x06), ++ REG_SEQ0(CMN_REG(000c), 0x20), ++ REG_SEQ0(CMN_REG(000d), 0xb8), ++ REG_SEQ0(CMN_REG(000e), 0x0f), ++ REG_SEQ0(CMN_REG(000f), 0x0f), ++ REG_SEQ0(CMN_REG(0010), 0x04), ++ REG_SEQ0(CMN_REG(0011), 0x00), ++ REG_SEQ0(CMN_REG(0012), 0x26), ++ REG_SEQ0(CMN_REG(0013), 0x22), ++ REG_SEQ0(CMN_REG(0014), 0x24), ++ REG_SEQ0(CMN_REG(0015), 0x77), ++ REG_SEQ0(CMN_REG(0016), 0x08), ++ REG_SEQ0(CMN_REG(0017), 0x00), ++ REG_SEQ0(CMN_REG(0018), 0x04), ++ REG_SEQ0(CMN_REG(0019), 0x48), ++ REG_SEQ0(CMN_REG(001a), 0x01), ++ REG_SEQ0(CMN_REG(001b), 0x00), ++ REG_SEQ0(CMN_REG(001c), 0x01), ++ REG_SEQ0(CMN_REG(001d), 0x64), ++ REG_SEQ0(CMN_REG(001f), 0x00), ++ REG_SEQ0(CMN_REG(0026), 0x53), ++ REG_SEQ0(CMN_REG(0029), 0x01), ++ REG_SEQ0(CMN_REG(0030), 0x00), ++ REG_SEQ0(CMN_REG(0031), 0x20), ++ REG_SEQ0(CMN_REG(0032), 0x30), ++ REG_SEQ0(CMN_REG(0033), 0x0b), ++ REG_SEQ0(CMN_REG(0034), 0x23), ++ REG_SEQ0(CMN_REG(0035), 0x00), ++ REG_SEQ0(CMN_REG(0038), 0x00), ++ REG_SEQ0(CMN_REG(0039), 0x00), ++ REG_SEQ0(CMN_REG(003a), 0x00), ++ REG_SEQ0(CMN_REG(003b), 0x00), ++ REG_SEQ0(CMN_REG(003c), 0x80), ++ REG_SEQ0(CMN_REG(003e), 0x0c), ++ REG_SEQ0(CMN_REG(003f), 0x83), ++ REG_SEQ0(CMN_REG(0040), 0x06), ++ REG_SEQ0(CMN_REG(0041), 0x20), ++ REG_SEQ0(CMN_REG(0042), 0xb8), ++ REG_SEQ0(CMN_REG(0043), 0x00), ++ REG_SEQ0(CMN_REG(0044), 0x46), ++ REG_SEQ0(CMN_REG(0045), 0x24), ++ REG_SEQ0(CMN_REG(0046), 0xff), ++ REG_SEQ0(CMN_REG(0047), 0x00), ++ REG_SEQ0(CMN_REG(0048), 0x44), ++ REG_SEQ0(CMN_REG(0049), 0xfa), ++ REG_SEQ0(CMN_REG(004a), 0x08), ++ REG_SEQ0(CMN_REG(004b), 0x00), ++ REG_SEQ0(CMN_REG(004c), 0x01), ++ REG_SEQ0(CMN_REG(004d), 0x64), ++ REG_SEQ0(CMN_REG(004e), 0x14), ++ REG_SEQ0(CMN_REG(004f), 0x00), ++ REG_SEQ0(CMN_REG(0050), 0x00), ++ REG_SEQ0(CMN_REG(005d), 0x0c), ++ REG_SEQ0(CMN_REG(005f), 0x01), ++ REG_SEQ0(CMN_REG(006b), 0x04), ++ REG_SEQ0(CMN_REG(0073), 0x30), ++ REG_SEQ0(CMN_REG(0074), 0x00), ++ REG_SEQ0(CMN_REG(0075), 0x20), ++ REG_SEQ0(CMN_REG(0076), 0x30), ++ REG_SEQ0(CMN_REG(0077), 0x08), ++ REG_SEQ0(CMN_REG(0078), 0x0c), ++ REG_SEQ0(CMN_REG(0079), 0x00), ++ REG_SEQ0(CMN_REG(007b), 0x00), ++ REG_SEQ0(CMN_REG(007c), 0x00), ++ REG_SEQ0(CMN_REG(007d), 0x00), ++ REG_SEQ0(CMN_REG(007e), 0x00), ++ REG_SEQ0(CMN_REG(007f), 0x00), ++ REG_SEQ0(CMN_REG(0080), 0x00), ++ REG_SEQ0(CMN_REG(0081), 0x09), ++ REG_SEQ0(CMN_REG(0082), 0x04), ++ REG_SEQ0(CMN_REG(0083), 0x24), ++ REG_SEQ0(CMN_REG(0084), 0x20), ++ REG_SEQ0(CMN_REG(0085), 0x03), ++ REG_SEQ0(CMN_REG(0086), 0x01), ++ REG_SEQ0(CMN_REG(0087), 0x0c), ++ REG_SEQ0(CMN_REG(008a), 0x55), ++ REG_SEQ0(CMN_REG(008b), 0x25), ++ REG_SEQ0(CMN_REG(008c), 0x2c), ++ REG_SEQ0(CMN_REG(008d), 0x22), ++ REG_SEQ0(CMN_REG(008e), 0x14), ++ REG_SEQ0(CMN_REG(008f), 0x20), ++ REG_SEQ0(CMN_REG(0090), 0x00), ++ REG_SEQ0(CMN_REG(0091), 0x00), ++ REG_SEQ0(CMN_REG(0092), 0x00), ++ REG_SEQ0(CMN_REG(0093), 0x00), ++ REG_SEQ0(CMN_REG(009a), 0x11), ++ REG_SEQ0(CMN_REG(009b), 0x10), ++}; ++ ++static const struct reg_sequence rk_hdtpx_tmds_cmn_init_seq[] = { ++ REG_SEQ0(CMN_REG(0008), 0x00), ++ REG_SEQ0(CMN_REG(0011), 0x01), ++ REG_SEQ0(CMN_REG(0017), 0x20), ++ REG_SEQ0(CMN_REG(001e), 0x14), ++ REG_SEQ0(CMN_REG(0020), 0x00), ++ REG_SEQ0(CMN_REG(0021), 0x00), ++ REG_SEQ0(CMN_REG(0022), 0x11), ++ REG_SEQ0(CMN_REG(0023), 0x00), ++ REG_SEQ0(CMN_REG(0024), 0x00), ++ REG_SEQ0(CMN_REG(0025), 0x53), ++ REG_SEQ0(CMN_REG(0026), 0x00), ++ REG_SEQ0(CMN_REG(0027), 0x00), ++ REG_SEQ0(CMN_REG(0028), 0x01), ++ REG_SEQ0(CMN_REG(002a), 0x00), ++ REG_SEQ0(CMN_REG(002b), 0x00), ++ REG_SEQ0(CMN_REG(002c), 0x00), ++ REG_SEQ0(CMN_REG(002d), 0x00), ++ REG_SEQ0(CMN_REG(002e), 0x04), ++ REG_SEQ0(CMN_REG(002f), 0x00), ++ REG_SEQ0(CMN_REG(0030), 0x20), ++ REG_SEQ0(CMN_REG(0031), 0x30), ++ REG_SEQ0(CMN_REG(0032), 0x0b), ++ REG_SEQ0(CMN_REG(0033), 0x23), ++ REG_SEQ0(CMN_REG(0034), 0x00), ++ REG_SEQ0(CMN_REG(003d), 0x40), ++ REG_SEQ0(CMN_REG(0042), 0x78), ++ REG_SEQ0(CMN_REG(004e), 0x34), ++ REG_SEQ0(CMN_REG(005c), 0x25), ++ REG_SEQ0(CMN_REG(005e), 0x4f), ++ REG_SEQ0(CMN_REG(0074), 0x04), ++ REG_SEQ0(CMN_REG(0081), 0x01), ++ REG_SEQ0(CMN_REG(0087), 0x04), ++ REG_SEQ0(CMN_REG(0089), 0x00), ++ REG_SEQ0(CMN_REG(0095), 0x00), ++ REG_SEQ0(CMN_REG(0097), 0x02), ++ REG_SEQ0(CMN_REG(0099), 0x04), ++ REG_SEQ0(CMN_REG(009b), 0x00), ++}; ++ ++static const struct reg_sequence rk_hdtpx_common_sb_init_seq[] = { ++ REG_SEQ0(SB_REG(0114), 0x00), ++ REG_SEQ0(SB_REG(0115), 0x00), ++ REG_SEQ0(SB_REG(0116), 0x00), ++ REG_SEQ0(SB_REG(0117), 0x00), ++}; ++ ++static const struct reg_sequence rk_hdtpx_tmds_lntop_highbr_seq[] = { ++ REG_SEQ0(LNTOP_REG(0201), 0x00), ++ REG_SEQ0(LNTOP_REG(0202), 0x00), ++ REG_SEQ0(LNTOP_REG(0203), 0x0f), ++ REG_SEQ0(LNTOP_REG(0204), 0xff), ++ REG_SEQ0(LNTOP_REG(0205), 0xff), ++}; ++ ++static const struct reg_sequence rk_hdtpx_tmds_lntop_lowbr_seq[] = { ++ REG_SEQ0(LNTOP_REG(0201), 0x07), ++ REG_SEQ0(LNTOP_REG(0202), 0xc1), ++ REG_SEQ0(LNTOP_REG(0203), 0xf0), ++ REG_SEQ0(LNTOP_REG(0204), 0x7c), ++ REG_SEQ0(LNTOP_REG(0205), 0x1f), ++}; ++ ++static const struct reg_sequence rk_hdtpx_common_lane_init_seq[] = { ++ REG_SEQ0(LANE_REG(0303), 0x0c), ++ REG_SEQ0(LANE_REG(0307), 0x20), ++ REG_SEQ0(LANE_REG(030a), 0x17), ++ REG_SEQ0(LANE_REG(030b), 0x77), ++ REG_SEQ0(LANE_REG(030c), 0x77), ++ REG_SEQ0(LANE_REG(030d), 0x77), ++ REG_SEQ0(LANE_REG(030e), 0x38), ++ REG_SEQ0(LANE_REG(0310), 0x03), ++ REG_SEQ0(LANE_REG(0311), 0x0f), ++ REG_SEQ0(LANE_REG(0316), 0x02), ++ REG_SEQ0(LANE_REG(031b), 0x01), ++ REG_SEQ0(LANE_REG(031f), 0x15), ++ REG_SEQ0(LANE_REG(0320), 0xa0), ++ REG_SEQ0(LANE_REG(0403), 0x0c), ++ REG_SEQ0(LANE_REG(0407), 0x20), ++ REG_SEQ0(LANE_REG(040a), 0x17), ++ REG_SEQ0(LANE_REG(040b), 0x77), ++ REG_SEQ0(LANE_REG(040c), 0x77), ++ REG_SEQ0(LANE_REG(040d), 0x77), ++ REG_SEQ0(LANE_REG(040e), 0x38), ++ REG_SEQ0(LANE_REG(0410), 0x03), ++ REG_SEQ0(LANE_REG(0411), 0x0f), ++ REG_SEQ0(LANE_REG(0416), 0x02), ++ REG_SEQ0(LANE_REG(041b), 0x01), ++ REG_SEQ0(LANE_REG(041f), 0x15), ++ REG_SEQ0(LANE_REG(0420), 0xa0), ++ REG_SEQ0(LANE_REG(0503), 0x0c), ++ REG_SEQ0(LANE_REG(0507), 0x20), ++ REG_SEQ0(LANE_REG(050a), 0x17), ++ REG_SEQ0(LANE_REG(050b), 0x77), ++ REG_SEQ0(LANE_REG(050c), 0x77), ++ REG_SEQ0(LANE_REG(050d), 0x77), ++ REG_SEQ0(LANE_REG(050e), 0x38), ++ REG_SEQ0(LANE_REG(0510), 0x03), ++ REG_SEQ0(LANE_REG(0511), 0x0f), ++ REG_SEQ0(LANE_REG(0516), 0x02), ++ REG_SEQ0(LANE_REG(051b), 0x01), ++ REG_SEQ0(LANE_REG(051f), 0x15), ++ REG_SEQ0(LANE_REG(0520), 0xa0), ++ REG_SEQ0(LANE_REG(0603), 0x0c), ++ REG_SEQ0(LANE_REG(0607), 0x20), ++ REG_SEQ0(LANE_REG(060a), 0x17), ++ REG_SEQ0(LANE_REG(060b), 0x77), ++ REG_SEQ0(LANE_REG(060c), 0x77), ++ REG_SEQ0(LANE_REG(060d), 0x77), ++ REG_SEQ0(LANE_REG(060e), 0x38), ++ REG_SEQ0(LANE_REG(0610), 0x03), ++ REG_SEQ0(LANE_REG(0611), 0x0f), ++ REG_SEQ0(LANE_REG(0616), 0x02), ++ REG_SEQ0(LANE_REG(061b), 0x01), ++ REG_SEQ0(LANE_REG(061f), 0x15), ++ REG_SEQ0(LANE_REG(0620), 0xa0), ++}; ++ ++static const struct reg_sequence rk_hdtpx_tmds_lane_init_seq[] = { ++ REG_SEQ0(LANE_REG(0312), 0x00), ++ REG_SEQ0(LANE_REG(031e), 0x00), ++ REG_SEQ0(LANE_REG(0412), 0x00), ++ REG_SEQ0(LANE_REG(041e), 0x00), ++ REG_SEQ0(LANE_REG(0512), 0x00), ++ REG_SEQ0(LANE_REG(051e), 0x00), ++ REG_SEQ0(LANE_REG(0612), 0x00), ++ REG_SEQ0(LANE_REG(061e), 0x08), ++ REG_SEQ0(LANE_REG(0303), 0x2f), ++ REG_SEQ0(LANE_REG(0403), 0x2f), ++ REG_SEQ0(LANE_REG(0503), 0x2f), ++ REG_SEQ0(LANE_REG(0603), 0x2f), ++ REG_SEQ0(LANE_REG(0305), 0x03), ++ REG_SEQ0(LANE_REG(0405), 0x03), ++ REG_SEQ0(LANE_REG(0505), 0x03), ++ REG_SEQ0(LANE_REG(0605), 0x03), ++ REG_SEQ0(LANE_REG(0306), 0x1c), ++ REG_SEQ0(LANE_REG(0406), 0x1c), ++ REG_SEQ0(LANE_REG(0506), 0x1c), ++ REG_SEQ0(LANE_REG(0606), 0x1c), ++}; ++ ++static bool rk_hdptx_phy_is_rw_reg(struct device *dev, unsigned int reg) ++{ ++ switch (reg) { ++ case 0x0000 ... 0x029c: ++ case 0x0400 ... 0x04a4: ++ case 0x0800 ... 0x08a4: ++ case 0x0c00 ... 0x0cb4: ++ case 0x1000 ... 0x10b4: ++ case 0x1400 ... 0x14b4: ++ case 0x1800 ... 0x18b4: ++ return true; ++ } ++ ++ return false; ++} ++ ++static const struct regmap_config rk_hdptx_phy_regmap_config = { ++ .reg_bits = 32, ++ .reg_stride = 4, ++ .val_bits = 32, ++ .writeable_reg = rk_hdptx_phy_is_rw_reg, ++ .readable_reg = rk_hdptx_phy_is_rw_reg, ++ .fast_io = true, ++ .max_register = 0x18b4, ++}; ++ ++#define rk_hdptx_multi_reg_write(hdptx, seq) \ ++ regmap_multi_reg_write((hdptx)->regmap, seq, ARRAY_SIZE(seq)) ++ ++static void rk_hdptx_pre_power_up(struct rk_hdptx_phy *hdptx) ++{ ++ u32 val; ++ ++ reset_control_assert(hdptx->rsts[RST_APB].rstc); ++ usleep_range(20, 25); ++ reset_control_deassert(hdptx->rsts[RST_APB].rstc); ++ ++ reset_control_assert(hdptx->rsts[RST_LANE].rstc); ++ reset_control_assert(hdptx->rsts[RST_CMN].rstc); ++ reset_control_assert(hdptx->rsts[RST_INIT].rstc); ++ ++ val = (HDPTX_I_PLL_EN | HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN) << 16; ++ regmap_write(hdptx->grf, GRF_HDPTX_CON0, val); ++} ++ ++static int rk_hdptx_post_enable_lane(struct rk_hdptx_phy *hdptx) ++{ ++ u32 val; ++ int ret; ++ ++ reset_control_deassert(hdptx->rsts[RST_LANE].rstc); ++ ++ val = (HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN) << 16 | ++ HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN; ++ regmap_write(hdptx->grf, GRF_HDPTX_CON0, val); ++ ++ ret = regmap_read_poll_timeout(hdptx->grf, GRF_HDPTX_STATUS, val, ++ (val & HDPTX_O_PHY_RDY) && ++ (val & HDPTX_O_PLL_LOCK_DONE), ++ 100, 5000); ++ if (ret) { ++ dev_err(hdptx->dev, "Failed to get PHY lane lock: %d\n", ret); ++ return ret; ++ } ++ ++ dev_dbg(hdptx->dev, "PHY lane locked\n"); ++ ++ return 0; ++} ++ ++static int rk_hdptx_post_enable_pll(struct rk_hdptx_phy *hdptx) ++{ ++ u32 val; ++ int ret; ++ ++ val = (HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN) << 16 | ++ HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN; ++ regmap_write(hdptx->grf, GRF_HDPTX_CON0, val); ++ ++ usleep_range(10, 15); ++ reset_control_deassert(hdptx->rsts[RST_INIT].rstc); ++ ++ usleep_range(10, 15); ++ val = HDPTX_I_PLL_EN << 16 | HDPTX_I_PLL_EN; ++ regmap_write(hdptx->grf, GRF_HDPTX_CON0, val); ++ ++ usleep_range(10, 15); ++ reset_control_deassert(hdptx->rsts[RST_CMN].rstc); ++ ++ ret = regmap_read_poll_timeout(hdptx->grf, GRF_HDPTX_STATUS, val, ++ val & HDPTX_O_PHY_CLK_RDY, 20, 400); ++ if (ret) { ++ dev_err(hdptx->dev, "Failed to get PHY clk ready: %d\n", ret); ++ return ret; ++ } ++ ++ dev_dbg(hdptx->dev, "PHY clk ready\n"); ++ ++ return 0; ++} ++ ++static void rk_hdptx_phy_disable(struct rk_hdptx_phy *hdptx) ++{ ++ u32 val; ++ ++ /* reset phy and apb, or phy locked flag may keep 1 */ ++ reset_control_assert(hdptx->rsts[RST_PHY].rstc); ++ usleep_range(20, 30); ++ reset_control_deassert(hdptx->rsts[RST_PHY].rstc); ++ ++ reset_control_assert(hdptx->rsts[RST_APB].rstc); ++ usleep_range(20, 30); ++ reset_control_deassert(hdptx->rsts[RST_APB].rstc); ++ ++ regmap_write(hdptx->regmap, LANE_REG(0300), 0x82); ++ regmap_write(hdptx->regmap, SB_REG(010f), 0xc1); ++ regmap_write(hdptx->regmap, SB_REG(0110), 0x1); ++ regmap_write(hdptx->regmap, LANE_REG(0301), 0x80); ++ regmap_write(hdptx->regmap, LANE_REG(0401), 0x80); ++ regmap_write(hdptx->regmap, LANE_REG(0501), 0x80); ++ regmap_write(hdptx->regmap, LANE_REG(0601), 0x80); ++ ++ reset_control_assert(hdptx->rsts[RST_LANE].rstc); ++ reset_control_assert(hdptx->rsts[RST_CMN].rstc); ++ reset_control_assert(hdptx->rsts[RST_INIT].rstc); ++ ++ val = (HDPTX_I_PLL_EN | HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN) << 16; ++ regmap_write(hdptx->grf, GRF_HDPTX_CON0, val); ++} ++ ++static bool rk_hdptx_phy_clk_pll_calc(unsigned int data_rate, ++ struct ropll_config *cfg) ++{ ++ const unsigned int fout = data_rate / 2, fref = 24000; ++ unsigned long k = 0, lc, k_sub, lc_sub; ++ unsigned int fvco, sdc; ++ u32 mdiv, sdiv, n = 8; ++ ++ if (fout > 0xfffffff) ++ return false; ++ ++ for (sdiv = 16; sdiv >= 1; sdiv--) { ++ if (sdiv % 2 && sdiv != 1) ++ continue; ++ ++ fvco = fout * sdiv; ++ ++ if (fvco < 2000000 || fvco > 4000000) ++ continue; ++ ++ mdiv = DIV_ROUND_UP(fvco, fref); ++ if (mdiv < 20 || mdiv > 255) ++ continue; ++ ++ if (fref * mdiv - fvco) { ++ for (sdc = 264000; sdc <= 750000; sdc += fref) ++ if (sdc * n > fref * mdiv) ++ break; ++ ++ if (sdc > 750000) ++ continue; ++ ++ rational_best_approximation(fref * mdiv - fvco, ++ sdc / 16, ++ GENMASK(6, 0), ++ GENMASK(7, 0), ++ &k, &lc); ++ ++ rational_best_approximation(sdc * n - fref * mdiv, ++ sdc, ++ GENMASK(6, 0), ++ GENMASK(7, 0), ++ &k_sub, &lc_sub); ++ } ++ ++ break; ++ } ++ ++ if (sdiv < 1) ++ return false; ++ ++ if (cfg) { ++ cfg->pms_mdiv = mdiv; ++ cfg->pms_mdiv_afc = mdiv; ++ cfg->pms_pdiv = 1; ++ cfg->pms_refdiv = 1; ++ cfg->pms_sdiv = sdiv - 1; ++ ++ cfg->sdm_en = k > 0 ? 1 : 0; ++ if (cfg->sdm_en) { ++ cfg->sdm_deno = lc; ++ cfg->sdm_num_sign = 1; ++ cfg->sdm_num = k; ++ cfg->sdc_n = n - 3; ++ cfg->sdc_num = k_sub; ++ cfg->sdc_deno = lc_sub; ++ } ++ } ++ ++ return true; ++} ++ ++static int rk_hdptx_ropll_tmds_cmn_config(struct rk_hdptx_phy *hdptx, ++ unsigned int rate) ++{ ++ const struct ropll_config *cfg = NULL; ++ struct ropll_config rc = {0}; ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(ropll_tmds_cfg); i++) ++ if (rate == ropll_tmds_cfg[i].bit_rate) { ++ cfg = &ropll_tmds_cfg[i]; ++ break; ++ } ++ ++ if (!cfg) { ++ if (rk_hdptx_phy_clk_pll_calc(rate, &rc)) { ++ cfg = &rc; ++ } else { ++ dev_err(hdptx->dev, "%s cannot find pll cfg\n", __func__); ++ return -EINVAL; ++ } ++ } ++ ++ dev_dbg(hdptx->dev, "mdiv=%u, sdiv=%u, sdm_en=%u, k_sign=%u, k=%u, lc=%u\n", ++ cfg->pms_mdiv, cfg->pms_sdiv + 1, cfg->sdm_en, ++ cfg->sdm_num_sign, cfg->sdm_num, cfg->sdm_deno); ++ ++ rk_hdptx_pre_power_up(hdptx); ++ ++ reset_control_assert(hdptx->rsts[RST_ROPLL].rstc); ++ usleep_range(20, 30); ++ reset_control_deassert(hdptx->rsts[RST_ROPLL].rstc); ++ ++ rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_common_cmn_init_seq); ++ rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_tmds_cmn_init_seq); ++ ++ regmap_write(hdptx->regmap, CMN_REG(0051), cfg->pms_mdiv); ++ regmap_write(hdptx->regmap, CMN_REG(0055), cfg->pms_mdiv_afc); ++ regmap_write(hdptx->regmap, CMN_REG(0059), ++ (cfg->pms_pdiv << 4) | cfg->pms_refdiv); ++ regmap_write(hdptx->regmap, CMN_REG(005a), cfg->pms_sdiv << 4); ++ ++ regmap_update_bits(hdptx->regmap, CMN_REG(005e), ROPLL_SDM_EN_MASK, ++ FIELD_PREP(ROPLL_SDM_EN_MASK, cfg->sdm_en)); ++ if (!cfg->sdm_en) ++ regmap_update_bits(hdptx->regmap, CMN_REG(005e), 0xf, 0); ++ ++ regmap_update_bits(hdptx->regmap, CMN_REG(0064), ROPLL_SDM_NUM_SIGN_RBR_MASK, ++ FIELD_PREP(ROPLL_SDM_NUM_SIGN_RBR_MASK, cfg->sdm_num_sign)); ++ ++ regmap_write(hdptx->regmap, CMN_REG(0060), cfg->sdm_deno); ++ regmap_write(hdptx->regmap, CMN_REG(0065), cfg->sdm_num); ++ ++ regmap_update_bits(hdptx->regmap, CMN_REG(0069), ROPLL_SDC_N_RBR_MASK, ++ FIELD_PREP(ROPLL_SDC_N_RBR_MASK, cfg->sdc_n)); ++ ++ regmap_write(hdptx->regmap, CMN_REG(006c), cfg->sdc_num); ++ regmap_write(hdptx->regmap, CMN_REG(0070), cfg->sdc_deno); ++ ++ regmap_update_bits(hdptx->regmap, CMN_REG(0086), PLL_PCG_POSTDIV_SEL_MASK, ++ FIELD_PREP(PLL_PCG_POSTDIV_SEL_MASK, cfg->pms_sdiv)); ++ ++ regmap_update_bits(hdptx->regmap, CMN_REG(0086), PLL_PCG_CLK_EN, ++ PLL_PCG_CLK_EN); ++ ++ return rk_hdptx_post_enable_pll(hdptx); ++} ++ ++static int rk_hdptx_ropll_tmds_mode_config(struct rk_hdptx_phy *hdptx, ++ unsigned int rate) ++{ ++ u32 val; ++ int ret; ++ ++ ret = regmap_read(hdptx->grf, GRF_HDPTX_STATUS, &val); ++ if (ret) ++ return ret; ++ ++ if (!(val & HDPTX_O_PLL_LOCK_DONE)) { ++ ret = rk_hdptx_ropll_tmds_cmn_config(hdptx, rate); ++ if (ret) ++ return ret; ++ } ++ ++ rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_common_sb_init_seq); ++ ++ regmap_write(hdptx->regmap, LNTOP_REG(0200), 0x06); ++ ++ if (rate >= 3400000) { ++ /* For 1/40 bitrate clk */ ++ rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_tmds_lntop_highbr_seq); ++ } else { ++ /* For 1/10 bitrate clk */ ++ rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_tmds_lntop_lowbr_seq); ++ } ++ ++ regmap_write(hdptx->regmap, LNTOP_REG(0206), 0x07); ++ regmap_write(hdptx->regmap, LNTOP_REG(0207), 0x0f); ++ ++ rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_common_lane_init_seq); ++ rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_tmds_lane_init_seq); ++ ++ return rk_hdptx_post_enable_lane(hdptx); ++} ++ ++static int rk_hdptx_phy_power_on(struct phy *phy) ++{ ++ struct rk_hdptx_phy *hdptx = phy_get_drvdata(phy); ++ int ret, bus_width = phy_get_bus_width(hdptx->phy); ++ /* ++ * FIXME: Temporary workaround to pass pixel_clk_rate ++ * from the HDMI bridge driver until phy_configure_opts_hdmi ++ * becomes available in the PHY API. ++ */ ++ unsigned int rate = bus_width & 0xfffffff; ++ ++ dev_dbg(hdptx->dev, "%s bus_width=%x rate=%u\n", ++ __func__, bus_width, rate); ++ ++ ret = pm_runtime_resume_and_get(hdptx->dev); ++ if (ret) { ++ dev_err(hdptx->dev, "Failed to resume phy: %d\n", ret); ++ return ret; ++ } ++ ++ ret = rk_hdptx_ropll_tmds_mode_config(hdptx, rate); ++ if (ret) ++ pm_runtime_put(hdptx->dev); ++ ++ return ret; ++} ++ ++static int rk_hdptx_phy_power_off(struct phy *phy) ++{ ++ struct rk_hdptx_phy *hdptx = phy_get_drvdata(phy); ++ u32 val; ++ int ret; ++ ++ ret = regmap_read(hdptx->grf, GRF_HDPTX_STATUS, &val); ++ if (ret == 0 && (val & HDPTX_O_PLL_LOCK_DONE)) ++ rk_hdptx_phy_disable(hdptx); ++ ++ pm_runtime_put(hdptx->dev); ++ ++ return ret; ++} ++ ++static const struct phy_ops rk_hdptx_phy_ops = { ++ .power_on = rk_hdptx_phy_power_on, ++ .power_off = rk_hdptx_phy_power_off, ++ .owner = THIS_MODULE, ++}; ++ ++static int rk_hdptx_phy_runtime_suspend(struct device *dev) ++{ ++ struct rk_hdptx_phy *hdptx = dev_get_drvdata(dev); ++ ++ clk_bulk_disable_unprepare(hdptx->nr_clks, hdptx->clks); ++ ++ return 0; ++} ++ ++static int rk_hdptx_phy_runtime_resume(struct device *dev) ++{ ++ struct rk_hdptx_phy *hdptx = dev_get_drvdata(dev); ++ int ret; ++ ++ ret = clk_bulk_prepare_enable(hdptx->nr_clks, hdptx->clks); ++ if (ret) ++ dev_err(hdptx->dev, "Failed to enable clocks: %d\n", ret); ++ ++ return ret; ++} ++ ++static int rk_hdptx_phy_probe(struct platform_device *pdev) ++{ ++ struct phy_provider *phy_provider; ++ struct device *dev = &pdev->dev; ++ struct rk_hdptx_phy *hdptx; ++ void __iomem *regs; ++ int ret; ++ ++ hdptx = devm_kzalloc(dev, sizeof(*hdptx), GFP_KERNEL); ++ if (!hdptx) ++ return -ENOMEM; ++ ++ hdptx->dev = dev; ++ ++ regs = devm_platform_ioremap_resource(pdev, 0); ++ if (IS_ERR(regs)) ++ return dev_err_probe(dev, PTR_ERR(regs), ++ "Failed to ioremap resource\n"); ++ ++ ret = devm_clk_bulk_get_all(dev, &hdptx->clks); ++ if (ret < 0) ++ return dev_err_probe(dev, ret, "Failed to get clocks\n"); ++ if (ret == 0) ++ return dev_err_probe(dev, -EINVAL, "Missing clocks\n"); ++ ++ hdptx->nr_clks = ret; ++ ++ hdptx->regmap = devm_regmap_init_mmio(dev, regs, ++ &rk_hdptx_phy_regmap_config); ++ if (IS_ERR(hdptx->regmap)) ++ return dev_err_probe(dev, PTR_ERR(hdptx->regmap), ++ "Failed to init regmap\n"); ++ ++ hdptx->rsts[RST_PHY].id = "phy"; ++ hdptx->rsts[RST_APB].id = "apb"; ++ hdptx->rsts[RST_INIT].id = "init"; ++ hdptx->rsts[RST_CMN].id = "cmn"; ++ hdptx->rsts[RST_LANE].id = "lane"; ++ hdptx->rsts[RST_ROPLL].id = "ropll"; ++ hdptx->rsts[RST_LCPLL].id = "lcpll"; ++ ++ ret = devm_reset_control_bulk_get_exclusive(dev, RST_MAX, hdptx->rsts); ++ if (ret) ++ return dev_err_probe(dev, ret, "Failed to get resets\n"); ++ ++ hdptx->grf = syscon_regmap_lookup_by_phandle(dev->of_node, ++ "rockchip,grf"); ++ if (IS_ERR(hdptx->grf)) ++ return dev_err_probe(dev, PTR_ERR(hdptx->grf), ++ "Could not get GRF syscon\n"); ++ ++ hdptx->phy = devm_phy_create(dev, NULL, &rk_hdptx_phy_ops); ++ if (IS_ERR(hdptx->phy)) ++ return dev_err_probe(dev, PTR_ERR(hdptx->phy), ++ "Failed to create HDMI PHY\n"); ++ ++ platform_set_drvdata(pdev, hdptx); ++ phy_set_drvdata(hdptx->phy, hdptx); ++ phy_set_bus_width(hdptx->phy, 8); ++ ++ ret = devm_pm_runtime_enable(dev); ++ if (ret) ++ return dev_err_probe(dev, ret, "Failed to enable runtime PM\n"); ++ ++ phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); ++ if (IS_ERR(phy_provider)) ++ return dev_err_probe(dev, PTR_ERR(phy_provider), ++ "Failed to register PHY provider\n"); ++ ++ reset_control_deassert(hdptx->rsts[RST_APB].rstc); ++ reset_control_deassert(hdptx->rsts[RST_CMN].rstc); ++ reset_control_deassert(hdptx->rsts[RST_INIT].rstc); ++ ++ return 0; ++} ++ ++static const struct dev_pm_ops rk_hdptx_phy_pm_ops = { ++ RUNTIME_PM_OPS(rk_hdptx_phy_runtime_suspend, ++ rk_hdptx_phy_runtime_resume, NULL) ++}; ++ ++static const struct of_device_id rk_hdptx_phy_of_match[] = { ++ { .compatible = "rockchip,rk3588-hdptx-phy", }, ++ {} ++}; ++MODULE_DEVICE_TABLE(of, rk_hdptx_phy_of_match); ++ ++static struct platform_driver rk_hdptx_phy_driver = { ++ .probe = rk_hdptx_phy_probe, ++ .driver = { ++ .name = "rockchip-hdptx-phy", ++ .pm = &rk_hdptx_phy_pm_ops, ++ .of_match_table = rk_hdptx_phy_of_match, ++ }, ++}; ++module_platform_driver(rk_hdptx_phy_driver); ++ ++MODULE_AUTHOR("Algea Cao "); ++MODULE_AUTHOR("Cristian Ciocaltea "); ++MODULE_DESCRIPTION("Samsung HDMI/eDP Transmitter Combo PHY Driver"); ++MODULE_LICENSE("GPL"); diff --git a/patches-6.6/033-01-v6.7-drm-rockchip-vop-Add-rv1126-vop_lite-support.patch b/patches-6.6/033-01-v6.7-drm-rockchip-vop-Add-rv1126-vop_lite-support.patch new file mode 100644 index 0000000..b2ce0c3 --- /dev/null +++ b/patches-6.6/033-01-v6.7-drm-rockchip-vop-Add-rv1126-vop_lite-support.patch @@ -0,0 +1,88 @@ +From 3c3cfcb93f6e6e1cede0cdfe3ec24f16ee108929 Mon Sep 17 00:00:00 2001 +From: Jagan Teki +Date: Mon, 31 Jul 2023 16:30:04 +0530 +Subject: [PATCH] drm/rockchip: vop: Add rv1126 vop_lite support + +RV1126 VOP_LITE supports the video output processing ofMIPI DSI, +RGB display interfaces with max output resolution of 1920x1080. + +Add support for rv1126 vop. + +Signed-off-by: Jagan Teki +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20230731110012.2913742-7-jagan@edgeble.ai +--- + drivers/gpu/drm/rockchip/rockchip_vop_reg.c | 55 +++++++++++++++++++++ + 1 file changed, 55 insertions(+) + +--- a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c ++++ b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c +@@ -1120,6 +1120,59 @@ static const struct vop_data rk3328_vop + .max_output = { 4096, 2160 }, + }; + ++static const struct vop_common rv1126_common = { ++ .standby = VOP_REG_SYNC(PX30_SYS_CTRL2, 0x1, 1), ++ .out_mode = VOP_REG(PX30_DSP_CTRL2, 0xf, 16), ++ .dsp_blank = VOP_REG(PX30_DSP_CTRL2, 0x1, 14), ++ .dither_down_en = VOP_REG(PX30_DSP_CTRL2, 0x1, 8), ++ .dither_down_sel = VOP_REG(PX30_DSP_CTRL2, 0x1, 7), ++ .dither_down_mode = VOP_REG(PX30_DSP_CTRL2, 0x1, 6), ++ .cfg_done = VOP_REG_SYNC(PX30_REG_CFG_DONE, 0x1, 0), ++ .dither_up = VOP_REG(PX30_DSP_CTRL2, 0x1, 2), ++ .dsp_lut_en = VOP_REG(PX30_DSP_CTRL2, 0x1, 5), ++ .gate_en = VOP_REG(PX30_DSP_CTRL2, 0x1, 0), ++}; ++ ++static const struct vop_modeset rv1126_modeset = { ++ .htotal_pw = VOP_REG(PX30_DSP_HTOTAL_HS_END, 0x0fff0fff, 0), ++ .hact_st_end = VOP_REG(PX30_DSP_HACT_ST_END, 0x0fff0fff, 0), ++ .vtotal_pw = VOP_REG(PX30_DSP_VTOTAL_VS_END, 0x0fff0fff, 0), ++ .vact_st_end = VOP_REG(PX30_DSP_VACT_ST_END, 0x0fff0fff, 0), ++}; ++ ++static const struct vop_output rv1126_output = { ++ .rgb_dclk_pol = VOP_REG(PX30_DSP_CTRL0, 0x1, 1), ++ .rgb_pin_pol = VOP_REG(PX30_DSP_CTRL0, 0x7, 2), ++ .rgb_en = VOP_REG(PX30_DSP_CTRL0, 0x1, 0), ++ .mipi_dclk_pol = VOP_REG(PX30_DSP_CTRL0, 0x1, 25), ++ .mipi_pin_pol = VOP_REG(PX30_DSP_CTRL0, 0x7, 26), ++ .mipi_en = VOP_REG(PX30_DSP_CTRL0, 0x1, 24), ++}; ++ ++static const struct vop_misc rv1126_misc = { ++ .global_regdone_en = VOP_REG(PX30_SYS_CTRL2, 0x1, 13), ++}; ++ ++static const struct vop_win_data rv1126_vop_win_data[] = { ++ { .base = 0x00, .phy = &px30_win0_data, ++ .type = DRM_PLANE_TYPE_OVERLAY }, ++ { .base = 0x00, .phy = &px30_win2_data, ++ .type = DRM_PLANE_TYPE_PRIMARY }, ++}; ++ ++static const struct vop_data rv1126_vop = { ++ .version = VOP_VERSION(2, 0xb), ++ .intr = &px30_intr, ++ .common = &rv1126_common, ++ .modeset = &rv1126_modeset, ++ .output = &rv1126_output, ++ .misc = &rv1126_misc, ++ .win = rv1126_vop_win_data, ++ .win_size = ARRAY_SIZE(rv1126_vop_win_data), ++ .max_output = { 1920, 1080 }, ++ .lut_size = 1024, ++}; ++ + static const struct of_device_id vop_driver_dt_match[] = { + { .compatible = "rockchip,rk3036-vop", + .data = &rk3036_vop }, +@@ -1147,6 +1200,8 @@ static const struct of_device_id vop_dri + .data = &rk3228_vop }, + { .compatible = "rockchip,rk3328-vop", + .data = &rk3328_vop }, ++ { .compatible = "rockchip,rv1126-vop", ++ .data = &rv1126_vop }, + {}, + }; + MODULE_DEVICE_TABLE(of, vop_driver_dt_match); diff --git a/patches-6.6/033-02-v6.7-drm-rockchip-dsi-Add-rv1126-MIPI-DSI-support.patch b/patches-6.6/033-02-v6.7-drm-rockchip-dsi-Add-rv1126-MIPI-DSI-support.patch new file mode 100644 index 0000000..83571e2 --- /dev/null +++ b/patches-6.6/033-02-v6.7-drm-rockchip-dsi-Add-rv1126-MIPI-DSI-support.patch @@ -0,0 +1,60 @@ +From 11fdb231f4127bf60839a63a8c7ed640ebe4751a Mon Sep 17 00:00:00 2001 +From: Jagan Teki +Date: Mon, 31 Jul 2023 16:30:06 +0530 +Subject: [PATCH] drm/rockchip: dsi: Add rv1126 MIPI DSI support + +RV1126 MIPI DSI supports V1.2 DPHY with 4 lanes and 1Gbps transfer +rate for lane. + +Add support for it. + +Signed-off-by: Jagan Teki +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20230731110012.2913742-9-jagan@edgeble.ai +--- + .../gpu/drm/rockchip/dw-mipi-dsi-rockchip.c | 20 +++++++++++++++++++ + 1 file changed, 20 insertions(+) + +--- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c ++++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c +@@ -198,6 +198,11 @@ + #define RK3568_DSI1_TURNDISABLE BIT(2) + #define RK3568_DSI1_FORCERXMODE BIT(0) + ++#define RV1126_GRF_DSIPHY_CON 0x10220 ++#define RV1126_DSI_FORCETXSTOPMODE (0xf << 4) ++#define RV1126_DSI_TURNDISABLE BIT(2) ++#define RV1126_DSI_FORCERXMODE BIT(0) ++ + #define HIWORD_UPDATE(val, mask) (val | (mask) << 16) + + enum { +@@ -1651,6 +1656,18 @@ static const struct rockchip_dw_dsi_chip + { /* sentinel */ } + }; + ++static const struct rockchip_dw_dsi_chip_data rv1126_chip_data[] = { ++ { ++ .reg = 0xffb30000, ++ .lanecfg1_grf_reg = RV1126_GRF_DSIPHY_CON, ++ .lanecfg1 = HIWORD_UPDATE(0, RV1126_DSI_TURNDISABLE | ++ RV1126_DSI_FORCERXMODE | ++ RV1126_DSI_FORCETXSTOPMODE), ++ .max_data_lanes = 4, ++ }, ++ { /* sentinel */ } ++}; ++ + static const struct of_device_id dw_mipi_dsi_rockchip_dt_ids[] = { + { + .compatible = "rockchip,px30-mipi-dsi", +@@ -1664,6 +1681,9 @@ static const struct of_device_id dw_mipi + }, { + .compatible = "rockchip,rk3568-mipi-dsi", + .data = &rk3568_chip_data, ++ }, { ++ .compatible = "rockchip,rv1126-mipi-dsi", ++ .data = &rv1126_chip_data, + }, + { /* sentinel */ } + }; diff --git a/patches-6.6/033-03-v6.7-drm-rockchip-vop-Use-cleanup-helper-directly-as-destroy.patch b/patches-6.6/033-03-v6.7-drm-rockchip-vop-Use-cleanup-helper-directly-as-destroy.patch new file mode 100644 index 0000000..96cca08 --- /dev/null +++ b/patches-6.6/033-03-v6.7-drm-rockchip-vop-Use-cleanup-helper-directly-as-destroy.patch @@ -0,0 +1,71 @@ +From 800f7c332df7cd9614c416fd005a6bb53f96f13c Mon Sep 17 00:00:00 2001 +From: Jonas Karlman +Date: Wed, 21 Jun 2023 22:33:18 +0000 +Subject: [PATCH] drm/rockchip: vop: Use cleanup helper directly as destroy + funcs + +vop_plane_destroy and vop_crtc_destroy are plain wrappers around +drm_plane_cleanup and drm_crtc_cleanup. Use them directly as plane and +crtc funcs to closer match VOP2 driver. + +Signed-off-by: Jonas Karlman +Reviewed-by: Sascha Hauer +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20230621223311.2239547-3-jonas@kwiboo.se +--- + drivers/gpu/drm/rockchip/rockchip_drm_vop.c | 16 +++------------- + 1 file changed, 3 insertions(+), 13 deletions(-) + +--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +@@ -773,11 +773,6 @@ out: + } + } + +-static void vop_plane_destroy(struct drm_plane *plane) +-{ +- drm_plane_cleanup(plane); +-} +- + static inline bool rockchip_afbc(u64 modifier) + { + return modifier == ROCKCHIP_AFBC_MOD; +@@ -1139,7 +1134,7 @@ static const struct drm_plane_helper_fun + static const struct drm_plane_funcs vop_plane_funcs = { + .update_plane = drm_atomic_helper_update_plane, + .disable_plane = drm_atomic_helper_disable_plane, +- .destroy = vop_plane_destroy, ++ .destroy = drm_plane_cleanup, + .reset = drm_atomic_helper_plane_reset, + .atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_plane_destroy_state, +@@ -1610,11 +1605,6 @@ static const struct drm_crtc_helper_func + .atomic_disable = vop_crtc_atomic_disable, + }; + +-static void vop_crtc_destroy(struct drm_crtc *crtc) +-{ +- drm_crtc_cleanup(crtc); +-} +- + static struct drm_crtc_state *vop_crtc_duplicate_state(struct drm_crtc *crtc) + { + struct rockchip_crtc_state *rockchip_state; +@@ -1722,7 +1712,7 @@ vop_crtc_verify_crc_source(struct drm_cr + static const struct drm_crtc_funcs vop_crtc_funcs = { + .set_config = drm_atomic_helper_set_config, + .page_flip = drm_atomic_helper_page_flip, +- .destroy = vop_crtc_destroy, ++ .destroy = drm_crtc_cleanup, + .reset = vop_crtc_reset, + .atomic_duplicate_state = vop_crtc_duplicate_state, + .atomic_destroy_state = vop_crtc_destroy_state, +@@ -1973,7 +1963,7 @@ static void vop_destroy_crtc(struct vop + */ + list_for_each_entry_safe(plane, tmp, &drm_dev->mode_config.plane_list, + head) +- vop_plane_destroy(plane); ++ drm_plane_cleanup(plane); + + /* + * Destroy CRTC after vop_plane_destroy() since vop_disable_plane() diff --git a/patches-6.6/033-04-v6.7-drm-rockchip-vop2-Demote-message-in-mod_supported-to.patch b/patches-6.6/033-04-v6.7-drm-rockchip-vop2-Demote-message-in-mod_supported-to.patch new file mode 100644 index 0000000..f0811b3 --- /dev/null +++ b/patches-6.6/033-04-v6.7-drm-rockchip-vop2-Demote-message-in-mod_supported-to.patch @@ -0,0 +1,35 @@ +From eb23cffdd7f085149799e5eda12a9aff792cc34d Mon Sep 17 00:00:00 2001 +From: Michael Tretter +Date: Mon, 9 Oct 2023 12:37:53 +0200 +Subject: [PATCH] drm/rockchip: vop2: Demote message in mod_supported to + drm_dbg_kms + +Checking if a modifier is supported by a plane is normal behavior. It is +normal that a plane may not support certain modifiers. Failing the check +doesn't justify an error message in the kernel log and may mislead +users. + +Demote the error message to drm_dbg_kms to only print the message if the +respective debug messages are enabled. This is similar to the behavior +in rockchip_drm_vop.c. + +Signed-off-by: Michael Tretter +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20231009103753.830458-1-m.tretter@pengutronix.de +--- + drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +@@ -469,8 +469,8 @@ static bool rockchip_vop2_mod_supported( + return true; + + if (!rockchip_afbc(plane, modifier)) { +- drm_err(vop2->drm, "Unsupported format modifier 0x%llx\n", +- modifier); ++ drm_dbg_kms(vop2->drm, "Unsupported format modifier 0x%llx\n", ++ modifier); + + return false; + } diff --git a/patches-6.6/033-05-v6.7-drm-rockchip-remove-redundant-of_match_ptr.patch b/patches-6.6/033-05-v6.7-drm-rockchip-remove-redundant-of_match_ptr.patch new file mode 100644 index 0000000..41d6e9d --- /dev/null +++ b/patches-6.6/033-05-v6.7-drm-rockchip-remove-redundant-of_match_ptr.patch @@ -0,0 +1,53 @@ +From 63a06c9fe30bf84d1ab6f07d0e408bd1d4ccaf85 Mon Sep 17 00:00:00 2001 +From: Zhu Wang +Date: Mon, 31 Jul 2023 20:53:04 +0800 +Subject: [PATCH] drm/rockchip: remove redundant of_match_ptr + +The driver depends on CONFIG_OF, so it is not necessary to use +of_match_ptr here. + +Even for drivers that do not depend on CONFIG_OF, it's almost always +better to leave out the of_match_ptr(), since the only thing it can +possibly do is to save a few bytes of .text if a driver can be used both +with and without it. Hence we remove of_match_ptr. + +Signed-off-by: Zhu Wang +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20230731125304.87059-1-wangzhu9@huawei.com +--- + drivers/gpu/drm/rockchip/cdn-dp-core.c | 2 +- + drivers/gpu/drm/rockchip/rockchip_lvds.c | 2 +- + drivers/gpu/drm/rockchip/rockchip_vop2_reg.c | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/gpu/drm/rockchip/cdn-dp-core.c ++++ b/drivers/gpu/drm/rockchip/cdn-dp-core.c +@@ -1261,7 +1261,7 @@ struct platform_driver cdn_dp_driver = { + .driver = { + .name = "cdn-dp", + .owner = THIS_MODULE, +- .of_match_table = of_match_ptr(cdn_dp_dt_ids), ++ .of_match_table = cdn_dp_dt_ids, + .pm = &cdn_dp_pm_ops, + }, + }; +--- a/drivers/gpu/drm/rockchip/rockchip_lvds.c ++++ b/drivers/gpu/drm/rockchip/rockchip_lvds.c +@@ -751,6 +751,6 @@ struct platform_driver rockchip_lvds_dri + .remove_new = rockchip_lvds_remove, + .driver = { + .name = "rockchip-lvds", +- .of_match_table = of_match_ptr(rockchip_lvds_dt_ids), ++ .of_match_table = rockchip_lvds_dt_ids, + }, + }; +--- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c ++++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c +@@ -274,6 +274,6 @@ struct platform_driver vop2_platform_dri + .remove_new = vop2_remove, + .driver = { + .name = "rockchip-vop2", +- .of_match_table = of_match_ptr(vop2_dt_match), ++ .of_match_table = vop2_dt_match, + }, + }; diff --git a/patches-6.6/033-06-v6.7-drm-rockchip-dsi-Use-devm_platform_get_and_ioremap_reso.patch b/patches-6.6/033-06-v6.7-drm-rockchip-dsi-Use-devm_platform_get_and_ioremap_reso.patch new file mode 100644 index 0000000..b7c34d9 --- /dev/null +++ b/patches-6.6/033-06-v6.7-drm-rockchip-dsi-Use-devm_platform_get_and_ioremap_reso.patch @@ -0,0 +1,29 @@ +From 253a1d33e5cfdf62525f5d6ed2bf03acbadd1582 Mon Sep 17 00:00:00 2001 +From: Yang Li +Date: Fri, 21 Apr 2023 16:13:03 +0800 +Subject: [PATCH] drm/rockchip: dsi: Use + devm_platform_get_and_ioremap_resource() + +Convert platform_get_resource(), devm_ioremap_resource() to a single +call to devm_platform_get_and_ioremap_resource(), as this is exactly +what this function does. + +Signed-off-by: Yang Li +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20230421081303.122452-1-yang.lee@linux.alibaba.com +--- + drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c ++++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c +@@ -1358,8 +1358,7 @@ static int dw_mipi_dsi_rockchip_probe(st + if (!dsi) + return -ENOMEM; + +- res = platform_get_resource(pdev, IORESOURCE_MEM, 0); +- dsi->base = devm_ioremap_resource(dev, res); ++ dsi->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); + if (IS_ERR(dsi->base)) { + DRM_DEV_ERROR(dev, "Unable to get dsi registers\n"); + return PTR_ERR(dsi->base); diff --git a/patches-6.6/033-07-v6.7-drm-rockchip-remove-unused-struct-in-vop2.patch b/patches-6.6/033-07-v6.7-drm-rockchip-remove-unused-struct-in-vop2.patch new file mode 100644 index 0000000..d38f249 --- /dev/null +++ b/patches-6.6/033-07-v6.7-drm-rockchip-remove-unused-struct-in-vop2.patch @@ -0,0 +1,54 @@ +From ac1c11c23fc51c1ba51a3ed586df40ffe6b1de35 Mon Sep 17 00:00:00 2001 +From: Andy Yan +Date: Fri, 13 Oct 2023 20:20:36 +0800 +Subject: [PATCH] drm/rockchip: remove unused struct in vop2 + +These structs are undefined and un used. + +Fixes: 604be85547ce ("drm/rockchip: Add VOP2 driver") +Signed-off-by: Andy Yan +Reviewed-by: Sascha Hauer +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20231013122036.1594090-1-andyshrk@163.com +--- + drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 2 -- + drivers/gpu/drm/rockchip/rockchip_drm_vop2.h | 3 --- + 2 files changed, 5 deletions(-) + +--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +@@ -160,7 +160,6 @@ struct vop2_video_port { + struct vop2 *vop2; + struct clk *dclk; + unsigned int id; +- const struct vop2_video_port_regs *regs; + const struct vop2_video_port_data *data; + + struct completion dsp_hold_completion; +@@ -2275,7 +2274,6 @@ static int vop2_create_crtcs(struct vop2 + vp = &vop2->vps[i]; + vp->vop2 = vop2; + vp->id = vp_data->id; +- vp->regs = vp_data->regs; + vp->data = vp_data; + + snprintf(dclk_name, sizeof(dclk_name), "dclk_vp%d", vp->id); +--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h +@@ -134,16 +134,13 @@ struct vop2_video_port_data { + u16 cubic_lut_len; + struct vop_rect max_output; + const u8 pre_scan_max_dly[4]; +- const struct vop2_video_port_regs *regs; + unsigned int offset; + }; + + struct vop2_data { + u8 nr_vps; +- const struct vop2_ctrl *ctrl; + const struct vop2_win_data *win; + const struct vop2_video_port_data *vp; +- const struct vop_csc_table *csc_table; + struct vop_rect max_input; + struct vop_rect max_output; + diff --git a/patches-6.6/033-08-v6.7-drm-rockchip-remove-NR_LAYERS-macro-on-vop2.patch b/patches-6.6/033-08-v6.7-drm-rockchip-remove-NR_LAYERS-macro-on-vop2.patch new file mode 100644 index 0000000..b4499db --- /dev/null +++ b/patches-6.6/033-08-v6.7-drm-rockchip-remove-NR_LAYERS-macro-on-vop2.patch @@ -0,0 +1,36 @@ +From dc00748adcf03d754bf43035c668bc5b20fb6597 Mon Sep 17 00:00:00 2001 +From: Andy Yan +Date: Fri, 13 Oct 2023 20:20:51 +0800 +Subject: [PATCH] drm/rockchip: remove NR_LAYERS macro on vop2 + +There are 8 layers on rk3588, so a fix defined macro is +not appropriate. + +Signed-off-by: Andy Yan +Reviewed-by: Sascha Hauer +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20231013122051.1594164-1-andyshrk@163.com +--- + drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +@@ -2252,8 +2252,6 @@ static struct vop2_video_port *find_vp_w + return NULL; + } + +-#define NR_LAYERS 6 +- + static int vop2_create_crtcs(struct vop2 *vop2) + { + const struct vop2_data *vop2_data = vop2->data; +@@ -2372,7 +2370,7 @@ static int vop2_create_crtcs(struct vop2 + struct vop2_video_port *vp = &vop2->vps[i]; + + if (vp->crtc.port) +- vp->nlayers = NR_LAYERS / nvps; ++ vp->nlayers = vop2_data->win_size / nvps; + } + + return 0; diff --git a/patches-6.6/033-09-v6.7-drm-rockchip-vop-fix-format-bpp-calculation.patch b/patches-6.6/033-09-v6.7-drm-rockchip-vop-fix-format-bpp-calculation.patch new file mode 100644 index 0000000..7793309 --- /dev/null +++ b/patches-6.6/033-09-v6.7-drm-rockchip-vop-fix-format-bpp-calculation.patch @@ -0,0 +1,57 @@ +From 45ad07c7053df0b67e13d8deb574920d11651fb2 Mon Sep 17 00:00:00 2001 +From: Andy Yan +Date: Wed, 18 Oct 2023 17:42:10 +0800 +Subject: [PATCH] drm/rockchip: vop: fix format bpp calculation + +We can't rely on cpp for bpp calculation as the cpp of +some formats(DRM_FORMAT_YUV420_8BIT/10BIT, etc) is zero. + +Signed-off-by: Andy Yan +Acked-by: Sascha Hauer +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20231018094210.2475771-1-andyshrk@163.com +--- + drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 18 ++++++++++++++++-- + 1 file changed, 16 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +@@ -282,6 +282,20 @@ static void vop2_win_disable(struct vop2 + vop2_win_write(win, VOP2_WIN_CLUSTER_ENABLE, 0); + } + ++static u32 vop2_get_bpp(const struct drm_format_info *format) ++{ ++ switch (format->format) { ++ case DRM_FORMAT_YUV420_8BIT: ++ return 12; ++ case DRM_FORMAT_YUV420_10BIT: ++ return 15; ++ case DRM_FORMAT_VUY101010: ++ return 30; ++ default: ++ return drm_format_info_bpp(format, 0); ++ } ++} ++ + static enum vop2_data_format vop2_convert_format(u32 format) + { + switch (format) { +@@ -482,7 +496,7 @@ static u32 vop2_afbc_transform_offset(st + { + struct drm_rect *src = &pstate->src; + struct drm_framebuffer *fb = pstate->fb; +- u32 bpp = fb->format->cpp[0] * 8; ++ u32 bpp = vop2_get_bpp(fb->format); + u32 vir_width = (fb->pitches[0] << 3) / bpp; + u32 width = drm_rect_width(src) >> 16; + u32 height = drm_rect_height(src) >> 16; +@@ -1082,7 +1096,7 @@ static void vop2_plane_atomic_update(str + struct drm_display_mode *adjusted_mode = &crtc->state->adjusted_mode; + struct vop2 *vop2 = win->vop2; + struct drm_framebuffer *fb = pstate->fb; +- u32 bpp = fb->format->cpp[0] * 8; ++ u32 bpp = vop2_get_bpp(fb->format); + u32 actual_w, actual_h, dsp_w, dsp_h; + u32 act_info, dsp_info; + u32 format; diff --git a/patches-6.6/033-10-v6.7-drm-rockchip-vop2-remove-the-unsupported-format-of-cluste.patch b/patches-6.6/033-10-v6.7-drm-rockchip-vop2-remove-the-unsupported-format-of-cluste.patch new file mode 100644 index 0000000..efc718b --- /dev/null +++ b/patches-6.6/033-10-v6.7-drm-rockchip-vop2-remove-the-unsupported-format-of-cluste.patch @@ -0,0 +1,89 @@ +From 01d5a75370a60c3a8d691347ae6ebb2a9f8dc44a Mon Sep 17 00:00:00 2001 +From: Andy Yan +Date: Wed, 18 Oct 2023 17:42:39 +0800 +Subject: [PATCH] drm/rockchip: vop2: remove the unsupported format of cluster + window + +The cluster window on vop2 doesn't support linear yuv +format(NV12/16/24), it only support afbc based yuv +format(DRM_FORMAT_YUV420_8BIT/10BIT), which will be +added in next patch. + +Fixes: 604be85547ce ("drm/rockchip: Add VOP2 driver") +Signed-off-by: Andy Yan +Acked-by: Sascha Hauer +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20231018094239.2475851-1-andyshrk@163.com +--- + drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 24 +------------------- + drivers/gpu/drm/rockchip/rockchip_vop2_reg.c | 3 --- + 2 files changed, 1 insertion(+), 26 deletions(-) + +--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +@@ -342,10 +342,6 @@ static enum vop2_afbc_format vop2_conver + case DRM_FORMAT_RGB565: + case DRM_FORMAT_BGR565: + return VOP2_AFBC_FMT_RGB565; +- case DRM_FORMAT_NV12: +- return VOP2_AFBC_FMT_YUV420; +- case DRM_FORMAT_NV16: +- return VOP2_AFBC_FMT_YUV422; + default: + return VOP2_AFBC_FMT_INVALID; + } +@@ -366,25 +362,9 @@ static bool vop2_win_rb_swap(u32 format) + } + } + +-static bool vop2_afbc_rb_swap(u32 format) +-{ +- switch (format) { +- case DRM_FORMAT_NV24: +- return true; +- default: +- return false; +- } +-} +- + static bool vop2_afbc_uv_swap(u32 format) + { +- switch (format) { +- case DRM_FORMAT_NV12: +- case DRM_FORMAT_NV16: +- return true; +- default: +- return false; +- } ++ return false; + } + + static bool vop2_win_uv_swap(u32 format) +@@ -1234,7 +1214,6 @@ static void vop2_plane_atomic_update(str + drm_err(vop2->drm, "vp%d %s stride[%d] not 64 pixel aligned\n", + vp->id, win->data->name, stride); + +- rb_swap = vop2_afbc_rb_swap(fb->format->format); + uv_swap = vop2_afbc_uv_swap(fb->format->format); + /* + * This is a workaround for crazy IC design, Cluster +@@ -1251,7 +1230,6 @@ static void vop2_plane_atomic_update(str + if (vop2_cluster_window(win)) + vop2_win_write(win, VOP2_WIN_AFBC_ENABLE, 1); + vop2_win_write(win, VOP2_WIN_AFBC_FORMAT, afbc_format); +- vop2_win_write(win, VOP2_WIN_AFBC_RB_SWAP, rb_swap); + vop2_win_write(win, VOP2_WIN_AFBC_UV_SWAP, uv_swap); + vop2_win_write(win, VOP2_WIN_AFBC_AUTO_GATING_EN, 0); + vop2_win_write(win, VOP2_WIN_AFBC_BLOCK_SPLIT_EN, 0); +--- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c ++++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c +@@ -24,9 +24,6 @@ static const uint32_t formats_win_full_1 + DRM_FORMAT_BGR888, + DRM_FORMAT_RGB565, + DRM_FORMAT_BGR565, +- DRM_FORMAT_NV12, +- DRM_FORMAT_NV16, +- DRM_FORMAT_NV24, + }; + + static const uint32_t formats_win_full_10bit_yuyv[] = { diff --git a/patches-6.6/033-11-v6.7-drm-rockchip-vop2-Add-more-supported-10bit-formats.patch b/patches-6.6/033-11-v6.7-drm-rockchip-vop2-Add-more-supported-10bit-formats.patch new file mode 100644 index 0000000..660845e --- /dev/null +++ b/patches-6.6/033-11-v6.7-drm-rockchip-vop2-Add-more-supported-10bit-formats.patch @@ -0,0 +1,162 @@ +From bfd8a5c228fa3bb97884f77529c09e8745da08b9 Mon Sep 17 00:00:00 2001 +From: Andy Yan +Date: Wed, 18 Oct 2023 17:43:18 +0800 +Subject: [PATCH] drm/rockchip: vop2: Add more supported 10bit formats + +Add 10 bit RGB and AFBC based YUV format supported +by vop2. + +Signed-off-by: Andy Yan +Acked-by: Sascha Hauer +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20231018094318.2476081-1-andyshrk@163.com +--- + drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 45 +++++++++++++++++++- + drivers/gpu/drm/rockchip/rockchip_vop2_reg.c | 22 +++++++--- + 2 files changed, 61 insertions(+), 6 deletions(-) + +--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +@@ -299,6 +299,11 @@ static u32 vop2_get_bpp(const struct drm + static enum vop2_data_format vop2_convert_format(u32 format) + { + switch (format) { ++ case DRM_FORMAT_XRGB2101010: ++ case DRM_FORMAT_ARGB2101010: ++ case DRM_FORMAT_XBGR2101010: ++ case DRM_FORMAT_ABGR2101010: ++ return VOP2_FMT_XRGB101010; + case DRM_FORMAT_XRGB8888: + case DRM_FORMAT_ARGB8888: + case DRM_FORMAT_XBGR8888: +@@ -311,10 +316,19 @@ static enum vop2_data_format vop2_conver + case DRM_FORMAT_BGR565: + return VOP2_FMT_RGB565; + case DRM_FORMAT_NV12: ++ case DRM_FORMAT_NV21: ++ case DRM_FORMAT_YUV420_8BIT: + return VOP2_FMT_YUV420SP; ++ case DRM_FORMAT_NV15: ++ case DRM_FORMAT_YUV420_10BIT: ++ return VOP2_FMT_YUV420SP_10; + case DRM_FORMAT_NV16: ++ case DRM_FORMAT_NV61: + return VOP2_FMT_YUV422SP; ++ case DRM_FORMAT_Y210: ++ return VOP2_FMT_YUV422SP_10; + case DRM_FORMAT_NV24: ++ case DRM_FORMAT_NV42: + return VOP2_FMT_YUV444SP; + case DRM_FORMAT_YUYV: + case DRM_FORMAT_YVYU: +@@ -331,6 +345,11 @@ static enum vop2_data_format vop2_conver + static enum vop2_afbc_format vop2_convert_afbc_format(u32 format) + { + switch (format) { ++ case DRM_FORMAT_XRGB2101010: ++ case DRM_FORMAT_ARGB2101010: ++ case DRM_FORMAT_XBGR2101010: ++ case DRM_FORMAT_ABGR2101010: ++ return VOP2_AFBC_FMT_ARGB2101010; + case DRM_FORMAT_XRGB8888: + case DRM_FORMAT_ARGB8888: + case DRM_FORMAT_XBGR8888: +@@ -342,6 +361,17 @@ static enum vop2_afbc_format vop2_conver + case DRM_FORMAT_RGB565: + case DRM_FORMAT_BGR565: + return VOP2_AFBC_FMT_RGB565; ++ case DRM_FORMAT_YUV420_8BIT: ++ return VOP2_AFBC_FMT_YUV420; ++ case DRM_FORMAT_YUV420_10BIT: ++ return VOP2_AFBC_FMT_YUV420_10BIT; ++ case DRM_FORMAT_YVYU: ++ case DRM_FORMAT_YUYV: ++ case DRM_FORMAT_VYUY: ++ case DRM_FORMAT_UYVY: ++ return VOP2_AFBC_FMT_YUV422; ++ case DRM_FORMAT_Y210: ++ return VOP2_AFBC_FMT_YUV422_10BIT; + default: + return VOP2_AFBC_FMT_INVALID; + } +@@ -352,6 +382,8 @@ static enum vop2_afbc_format vop2_conver + static bool vop2_win_rb_swap(u32 format) + { + switch (format) { ++ case DRM_FORMAT_XBGR2101010: ++ case DRM_FORMAT_ABGR2101010: + case DRM_FORMAT_XBGR8888: + case DRM_FORMAT_ABGR8888: + case DRM_FORMAT_BGR888: +@@ -364,7 +396,15 @@ static bool vop2_win_rb_swap(u32 format) + + static bool vop2_afbc_uv_swap(u32 format) + { +- return false; ++ switch (format) { ++ case DRM_FORMAT_YUYV: ++ case DRM_FORMAT_Y210: ++ case DRM_FORMAT_YUV420_8BIT: ++ case DRM_FORMAT_YUV420_10BIT: ++ return true; ++ default: ++ return false; ++ } + } + + static bool vop2_win_uv_swap(u32 format) +@@ -373,6 +413,9 @@ static bool vop2_win_uv_swap(u32 format) + case DRM_FORMAT_NV12: + case DRM_FORMAT_NV16: + case DRM_FORMAT_NV24: ++ case DRM_FORMAT_NV15: ++ case DRM_FORMAT_YUYV: ++ case DRM_FORMAT_UYVY: + return true; + default: + return false; +--- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c ++++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c +@@ -16,6 +16,10 @@ + #include "rockchip_drm_vop2.h" + + static const uint32_t formats_win_full_10bit[] = { ++ DRM_FORMAT_XRGB2101010, ++ DRM_FORMAT_ARGB2101010, ++ DRM_FORMAT_XBGR2101010, ++ DRM_FORMAT_ABGR2101010, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_XBGR8888, +@@ -24,6 +28,10 @@ static const uint32_t formats_win_full_1 + DRM_FORMAT_BGR888, + DRM_FORMAT_RGB565, + DRM_FORMAT_BGR565, ++ DRM_FORMAT_YUV420_8BIT, /* yuv420_8bit non-Linear mode only */ ++ DRM_FORMAT_YUV420_10BIT, /* yuv420_10bit non-Linear mode only */ ++ DRM_FORMAT_YUYV, /* yuv422_8bit non-Linear mode only*/ ++ DRM_FORMAT_Y210, /* yuv422_10bit non-Linear mode only */ + }; + + static const uint32_t formats_win_full_10bit_yuyv[] = { +@@ -35,11 +43,15 @@ static const uint32_t formats_win_full_1 + DRM_FORMAT_BGR888, + DRM_FORMAT_RGB565, + DRM_FORMAT_BGR565, +- DRM_FORMAT_NV12, +- DRM_FORMAT_NV16, +- DRM_FORMAT_NV24, +- DRM_FORMAT_YVYU, +- DRM_FORMAT_VYUY, ++ DRM_FORMAT_NV12, /* yuv420_8bit linear mode, 2 plane */ ++ DRM_FORMAT_NV21, /* yuv420_8bit linear mode, 2 plane */ ++ DRM_FORMAT_NV15, /* yuv420_10bit linear mode, 2 plane, no padding */ ++ DRM_FORMAT_NV16, /* yuv422_8bit linear mode, 2 plane */ ++ DRM_FORMAT_NV61, /* yuv422_8bit linear mode, 2 plane */ ++ DRM_FORMAT_NV24, /* yuv444_8bit linear mode, 2 plane */ ++ DRM_FORMAT_NV42, /* yuv444_8bit linear mode, 2 plane */ ++ DRM_FORMAT_YVYU, /* yuv422_8bit[YVYU] linear mode */ ++ DRM_FORMAT_VYUY, /* yuv422_8bit[VYUY] linear mode */ + }; + + static const uint32_t formats_win_lite[] = { diff --git a/patches-6.6/033-12-v6.7-drm-rockchip-vop2-rename-window-formats-to-show-window-ty.patch b/patches-6.6/033-12-v6.7-drm-rockchip-vop2-rename-window-formats-to-show-window-ty.patch new file mode 100644 index 0000000..8d73568 --- /dev/null +++ b/patches-6.6/033-12-v6.7-drm-rockchip-vop2-rename-window-formats-to-show-window-ty.patch @@ -0,0 +1,116 @@ +From 215737e37d07ade8952048339e37aec6c6f82223 Mon Sep 17 00:00:00 2001 +From: Andy Yan +Date: Wed, 18 Oct 2023 17:43:39 +0800 +Subject: [PATCH] drm/rockchip: vop2: rename window formats to show window type + using them + +formats_win_full_10bit is for cluster window, +formats_win_full_10bit_yuyv is for rk356x esmart, rk3588 esmart window +will support more format. +formats_win_lite is for smart window. + +Rename it based the windows type may let meaning is clearer + +Signed-off-by: Andy Yan +Acked-by: Sascha Hauer +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20231018094339.2476142-1-andyshrk@163.com +--- + drivers/gpu/drm/rockchip/rockchip_vop2_reg.c | 30 ++++++++++---------- + 1 file changed, 15 insertions(+), 15 deletions(-) + +--- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c ++++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c +@@ -15,7 +15,7 @@ + + #include "rockchip_drm_vop2.h" + +-static const uint32_t formats_win_full_10bit[] = { ++static const uint32_t formats_cluster[] = { + DRM_FORMAT_XRGB2101010, + DRM_FORMAT_ARGB2101010, + DRM_FORMAT_XBGR2101010, +@@ -34,7 +34,7 @@ static const uint32_t formats_win_full_1 + DRM_FORMAT_Y210, /* yuv422_10bit non-Linear mode only */ + }; + +-static const uint32_t formats_win_full_10bit_yuyv[] = { ++static const uint32_t formats_rk356x_esmart[] = { + DRM_FORMAT_XRGB8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_XBGR8888, +@@ -54,7 +54,7 @@ static const uint32_t formats_win_full_1 + DRM_FORMAT_VYUY, /* yuv422_8bit[VYUY] linear mode */ + }; + +-static const uint32_t formats_win_lite[] = { ++static const uint32_t formats_smart[] = { + DRM_FORMAT_XRGB8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_XBGR8888, +@@ -153,8 +153,8 @@ static const struct vop2_win_data rk3568 + .name = "Smart0-win0", + .phys_id = ROCKCHIP_VOP2_SMART0, + .base = 0x1c00, +- .formats = formats_win_lite, +- .nformats = ARRAY_SIZE(formats_win_lite), ++ .formats = formats_smart, ++ .nformats = ARRAY_SIZE(formats_smart), + .format_modifiers = format_modifiers, + .layer_sel_id = 3, + .supported_rotations = DRM_MODE_REFLECT_Y, +@@ -165,8 +165,8 @@ static const struct vop2_win_data rk3568 + }, { + .name = "Smart1-win0", + .phys_id = ROCKCHIP_VOP2_SMART1, +- .formats = formats_win_lite, +- .nformats = ARRAY_SIZE(formats_win_lite), ++ .formats = formats_smart, ++ .nformats = ARRAY_SIZE(formats_smart), + .format_modifiers = format_modifiers, + .base = 0x1e00, + .layer_sel_id = 7, +@@ -178,8 +178,8 @@ static const struct vop2_win_data rk3568 + }, { + .name = "Esmart1-win0", + .phys_id = ROCKCHIP_VOP2_ESMART1, +- .formats = formats_win_full_10bit_yuyv, +- .nformats = ARRAY_SIZE(formats_win_full_10bit_yuyv), ++ .formats = formats_rk356x_esmart, ++ .nformats = ARRAY_SIZE(formats_rk356x_esmart), + .format_modifiers = format_modifiers, + .base = 0x1a00, + .layer_sel_id = 6, +@@ -191,8 +191,8 @@ static const struct vop2_win_data rk3568 + }, { + .name = "Esmart0-win0", + .phys_id = ROCKCHIP_VOP2_ESMART0, +- .formats = formats_win_full_10bit_yuyv, +- .nformats = ARRAY_SIZE(formats_win_full_10bit_yuyv), ++ .formats = formats_rk356x_esmart, ++ .nformats = ARRAY_SIZE(formats_rk356x_esmart), + .format_modifiers = format_modifiers, + .base = 0x1800, + .layer_sel_id = 2, +@@ -205,8 +205,8 @@ static const struct vop2_win_data rk3568 + .name = "Cluster0-win0", + .phys_id = ROCKCHIP_VOP2_CLUSTER0, + .base = 0x1000, +- .formats = formats_win_full_10bit, +- .nformats = ARRAY_SIZE(formats_win_full_10bit), ++ .formats = formats_cluster, ++ .nformats = ARRAY_SIZE(formats_cluster), + .format_modifiers = format_modifiers_afbc, + .layer_sel_id = 0, + .supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 | +@@ -220,8 +220,8 @@ static const struct vop2_win_data rk3568 + .name = "Cluster1-win0", + .phys_id = ROCKCHIP_VOP2_CLUSTER1, + .base = 0x1200, +- .formats = formats_win_full_10bit, +- .nformats = ARRAY_SIZE(formats_win_full_10bit), ++ .formats = formats_cluster, ++ .nformats = ARRAY_SIZE(formats_cluster), + .format_modifiers = format_modifiers_afbc, + .layer_sel_id = 1, + .supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 | diff --git a/patches-6.6/033-13-v6.7-drm-fourcc-Add-NV20-and-NV30-YUV-formats.patch b/patches-6.6/033-13-v6.7-drm-fourcc-Add-NV20-and-NV30-YUV-formats.patch new file mode 100644 index 0000000..43b0d5a --- /dev/null +++ b/patches-6.6/033-13-v6.7-drm-fourcc-Add-NV20-and-NV30-YUV-formats.patch @@ -0,0 +1,57 @@ +From 728c15b4b5f3369cbde73d5e0f14701ab370f985 Mon Sep 17 00:00:00 2001 +From: Jonas Karlman +Date: Mon, 23 Oct 2023 17:37:14 +0000 +Subject: [PATCH] drm/fourcc: Add NV20 and NV30 YUV formats + +DRM_FORMAT_NV20 and DRM_FORMAT_NV30 formats is the 2x1 and non-subsampled +variant of NV15, a 10-bit 2-plane YUV format that has no padding between +components. Instead, luminance and chrominance samples are grouped into 4s +so that each group is packed into an integer number of bytes: + +YYYY = UVUV = 4 * 10 bits = 40 bits = 5 bytes + +The '20' and '30' suffix refers to the optimum effective bits per pixel +which is achieved when the total number of luminance samples is a multiple +of 4. + +V2: Added NV30 format + +Signed-off-by: Jonas Karlman +Reviewed-by: Sandy Huang +Reviewed-by: Christopher Obbard +Tested-by: Christopher Obbard +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20231023173718.188102-2-jonas@kwiboo.se +--- + drivers/gpu/drm/drm_fourcc.c | 8 ++++++++ + include/uapi/drm/drm_fourcc.h | 2 ++ + 2 files changed, 10 insertions(+) + +--- a/drivers/gpu/drm/drm_fourcc.c ++++ b/drivers/gpu/drm/drm_fourcc.c +@@ -299,6 +299,14 @@ const struct drm_format_info *__drm_form + .num_planes = 2, .char_per_block = { 5, 5, 0 }, + .block_w = { 4, 2, 0 }, .block_h = { 1, 1, 0 }, .hsub = 2, + .vsub = 2, .is_yuv = true }, ++ { .format = DRM_FORMAT_NV20, .depth = 0, ++ .num_planes = 2, .char_per_block = { 5, 5, 0 }, ++ .block_w = { 4, 2, 0 }, .block_h = { 1, 1, 0 }, .hsub = 2, ++ .vsub = 1, .is_yuv = true }, ++ { .format = DRM_FORMAT_NV30, .depth = 0, ++ .num_planes = 2, .char_per_block = { 5, 5, 0 }, ++ .block_w = { 4, 2, 0 }, .block_h = { 1, 1, 0 }, .hsub = 1, ++ .vsub = 1, .is_yuv = true }, + { .format = DRM_FORMAT_Q410, .depth = 0, + .num_planes = 3, .char_per_block = { 2, 2, 2 }, + .block_w = { 1, 1, 1 }, .block_h = { 1, 1, 1 }, .hsub = 1, +--- a/include/uapi/drm/drm_fourcc.h ++++ b/include/uapi/drm/drm_fourcc.h +@@ -323,6 +323,8 @@ extern "C" { + * index 1 = Cr:Cb plane, [39:0] Cr1:Cb1:Cr0:Cb0 little endian + */ + #define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5') /* 2x2 subsampled Cr:Cb plane */ ++#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0') /* 2x1 subsampled Cr:Cb plane */ ++#define DRM_FORMAT_NV30 fourcc_code('N', 'V', '3', '0') /* non-subsampled Cr:Cb plane */ + + /* + * 2 plane YCbCr MSB aligned diff --git a/patches-6.6/033-14-v6.7-drm-rockchip-vop-Add-NV15-NV20-and-NV30-support.patch b/patches-6.6/033-14-v6.7-drm-rockchip-vop-Add-NV15-NV20-and-NV30-support.patch new file mode 100644 index 0000000..38c9d2b --- /dev/null +++ b/patches-6.6/033-14-v6.7-drm-rockchip-vop-Add-NV15-NV20-and-NV30-support.patch @@ -0,0 +1,231 @@ +From d4b384228562848e4b76b608a5876c92160e993c Mon Sep 17 00:00:00 2001 +From: Jonas Karlman +Date: Mon, 23 Oct 2023 17:37:15 +0000 +Subject: [PATCH] drm/rockchip: vop: Add NV15, NV20 and NV30 support + +Add support for displaying 10-bit 4:2:0 and 4:2:2 formats produced by +the Rockchip Video Decoder on RK322X, RK3288, RK3328 and RK3399. +Also add support for 10-bit 4:4:4 format while at it. + +V5: Use drm_format_info_min_pitch() for correct bpp + Add missing NV21, NV61 and NV42 formats +V4: Rework RK3328/RK3399 win0/1 data to not affect RK3368 +V2: Added NV30 support + +Signed-off-by: Jonas Karlman +Reviewed-by: Sandy Huang +Reviewed-by: Christopher Obbard +Tested-by: Christopher Obbard +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20231023173718.188102-3-jonas@kwiboo.se +--- + drivers/gpu/drm/rockchip/rockchip_drm_vop.c | 36 ++++++++--- + drivers/gpu/drm/rockchip/rockchip_drm_vop.h | 1 + + drivers/gpu/drm/rockchip/rockchip_vop_reg.c | 66 +++++++++++++++++---- + 3 files changed, 86 insertions(+), 17 deletions(-) + +--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +@@ -280,6 +280,18 @@ static bool has_uv_swapped(uint32_t form + } + } + ++static bool is_fmt_10(uint32_t format) ++{ ++ switch (format) { ++ case DRM_FORMAT_NV15: ++ case DRM_FORMAT_NV20: ++ case DRM_FORMAT_NV30: ++ return true; ++ default: ++ return false; ++ } ++} ++ + static enum vop_data_format vop_convert_format(uint32_t format) + { + switch (format) { +@@ -295,12 +307,15 @@ static enum vop_data_format vop_convert_ + case DRM_FORMAT_BGR565: + return VOP_FMT_RGB565; + case DRM_FORMAT_NV12: ++ case DRM_FORMAT_NV15: + case DRM_FORMAT_NV21: + return VOP_FMT_YUV420SP; + case DRM_FORMAT_NV16: ++ case DRM_FORMAT_NV20: + case DRM_FORMAT_NV61: + return VOP_FMT_YUV422SP; + case DRM_FORMAT_NV24: ++ case DRM_FORMAT_NV30: + case DRM_FORMAT_NV42: + return VOP_FMT_YUV444SP; + default: +@@ -947,7 +962,12 @@ static void vop_plane_atomic_update(stru + dsp_sty = dest->y1 + crtc->mode.vtotal - crtc->mode.vsync_start; + dsp_st = dsp_sty << 16 | (dsp_stx & 0xffff); + +- offset = (src->x1 >> 16) * fb->format->cpp[0]; ++ if (fb->format->char_per_block[0]) ++ offset = drm_format_info_min_pitch(fb->format, 0, ++ src->x1 >> 16); ++ else ++ offset = (src->x1 >> 16) * fb->format->cpp[0]; ++ + offset += (src->y1 >> 16) * fb->pitches[0]; + dma_addr = rk_obj->dma_addr + offset + fb->offsets[0]; + +@@ -973,6 +993,7 @@ static void vop_plane_atomic_update(stru + } + + VOP_WIN_SET(vop, win, format, format); ++ VOP_WIN_SET(vop, win, fmt_10, is_fmt_10(fb->format->format)); + VOP_WIN_SET(vop, win, yrgb_vir, DIV_ROUND_UP(fb->pitches[0], 4)); + VOP_WIN_SET(vop, win, yrgb_mst, dma_addr); + VOP_WIN_YUV2YUV_SET(vop, win_yuv2yuv, y2r_en, is_yuv); +@@ -982,15 +1003,16 @@ static void vop_plane_atomic_update(stru + (new_state->rotation & DRM_MODE_REFLECT_X) ? 1 : 0); + + if (is_yuv) { +- int hsub = fb->format->hsub; +- int vsub = fb->format->vsub; +- int bpp = fb->format->cpp[1]; +- + uv_obj = fb->obj[1]; + rk_uv_obj = to_rockchip_obj(uv_obj); + +- offset = (src->x1 >> 16) * bpp / hsub; +- offset += (src->y1 >> 16) * fb->pitches[1] / vsub; ++ if (fb->format->char_per_block[1]) ++ offset = drm_format_info_min_pitch(fb->format, 1, ++ src->x1 >> 16); ++ else ++ offset = (src->x1 >> 16) * fb->format->cpp[1]; ++ offset /= fb->format->hsub; ++ offset += (src->y1 >> 16) * fb->pitches[1] / fb->format->vsub; + + dma_addr = rk_uv_obj->dma_addr + offset + fb->offsets[1]; + VOP_WIN_SET(vop, win, uv_vir, DIV_ROUND_UP(fb->pitches[1], 4)); +--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h +@@ -186,6 +186,7 @@ struct vop_win_phy { + struct vop_reg enable; + struct vop_reg gate; + struct vop_reg format; ++ struct vop_reg fmt_10; + struct vop_reg rb_swap; + struct vop_reg uv_swap; + struct vop_reg act_info; +--- a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c ++++ b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c +@@ -53,6 +53,26 @@ static const uint32_t formats_win_full[] + DRM_FORMAT_NV42, + }; + ++static const uint32_t formats_win_full_10[] = { ++ DRM_FORMAT_XRGB8888, ++ DRM_FORMAT_ARGB8888, ++ DRM_FORMAT_XBGR8888, ++ DRM_FORMAT_ABGR8888, ++ DRM_FORMAT_RGB888, ++ DRM_FORMAT_BGR888, ++ DRM_FORMAT_RGB565, ++ DRM_FORMAT_BGR565, ++ DRM_FORMAT_NV12, ++ DRM_FORMAT_NV21, ++ DRM_FORMAT_NV16, ++ DRM_FORMAT_NV61, ++ DRM_FORMAT_NV24, ++ DRM_FORMAT_NV42, ++ DRM_FORMAT_NV15, ++ DRM_FORMAT_NV20, ++ DRM_FORMAT_NV30, ++}; ++ + static const uint64_t format_modifiers_win_full[] = { + DRM_FORMAT_MOD_LINEAR, + DRM_FORMAT_MOD_INVALID, +@@ -627,11 +647,12 @@ static const struct vop_scl_regs rk3288_ + + static const struct vop_win_phy rk3288_win01_data = { + .scl = &rk3288_win_full_scl, +- .data_formats = formats_win_full, +- .nformats = ARRAY_SIZE(formats_win_full), ++ .data_formats = formats_win_full_10, ++ .nformats = ARRAY_SIZE(formats_win_full_10), + .format_modifiers = format_modifiers_win_full, + .enable = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 0), + .format = VOP_REG(RK3288_WIN0_CTRL0, 0x7, 1), ++ .fmt_10 = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 4), + .rb_swap = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 12), + .uv_swap = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 15), + .act_info = VOP_REG(RK3288_WIN0_ACT_INFO, 0x1fff1fff, 0), +@@ -936,13 +957,38 @@ static const struct vop_win_yuv2yuv_data + + }; + +-static const struct vop_win_phy rk3399_win01_data = { ++static const struct vop_win_phy rk3399_win0_data = { + .scl = &rk3288_win_full_scl, +- .data_formats = formats_win_full, +- .nformats = ARRAY_SIZE(formats_win_full), ++ .data_formats = formats_win_full_10, ++ .nformats = ARRAY_SIZE(formats_win_full_10), + .format_modifiers = format_modifiers_win_full_afbc, + .enable = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 0), + .format = VOP_REG(RK3288_WIN0_CTRL0, 0x7, 1), ++ .fmt_10 = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 4), ++ .rb_swap = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 12), ++ .uv_swap = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 15), ++ .x_mir_en = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 21), ++ .y_mir_en = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 22), ++ .act_info = VOP_REG(RK3288_WIN0_ACT_INFO, 0x1fff1fff, 0), ++ .dsp_info = VOP_REG(RK3288_WIN0_DSP_INFO, 0x0fff0fff, 0), ++ .dsp_st = VOP_REG(RK3288_WIN0_DSP_ST, 0x1fff1fff, 0), ++ .yrgb_mst = VOP_REG(RK3288_WIN0_YRGB_MST, 0xffffffff, 0), ++ .uv_mst = VOP_REG(RK3288_WIN0_CBR_MST, 0xffffffff, 0), ++ .yrgb_vir = VOP_REG(RK3288_WIN0_VIR, 0x3fff, 0), ++ .uv_vir = VOP_REG(RK3288_WIN0_VIR, 0x3fff, 16), ++ .src_alpha_ctl = VOP_REG(RK3288_WIN0_SRC_ALPHA_CTRL, 0xff, 0), ++ .dst_alpha_ctl = VOP_REG(RK3288_WIN0_DST_ALPHA_CTRL, 0xff, 0), ++ .channel = VOP_REG(RK3288_WIN0_CTRL2, 0xff, 0), ++}; ++ ++static const struct vop_win_phy rk3399_win1_data = { ++ .scl = &rk3288_win_full_scl, ++ .data_formats = formats_win_full_10, ++ .nformats = ARRAY_SIZE(formats_win_full_10), ++ .format_modifiers = format_modifiers_win_full, ++ .enable = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 0), ++ .format = VOP_REG(RK3288_WIN0_CTRL0, 0x7, 1), ++ .fmt_10 = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 4), + .rb_swap = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 12), + .uv_swap = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 15), + .x_mir_en = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 21), +@@ -965,9 +1011,9 @@ static const struct vop_win_phy rk3399_w + * AFBC on the primary plane. + */ + static const struct vop_win_data rk3399_vop_win_data[] = { +- { .base = 0x00, .phy = &rk3399_win01_data, ++ { .base = 0x00, .phy = &rk3399_win0_data, + .type = DRM_PLANE_TYPE_PRIMARY }, +- { .base = 0x40, .phy = &rk3368_win01_data, ++ { .base = 0x40, .phy = &rk3399_win1_data, + .type = DRM_PLANE_TYPE_OVERLAY }, + { .base = 0x00, .phy = &rk3368_win23_data, + .type = DRM_PLANE_TYPE_OVERLAY }, +@@ -1099,11 +1145,11 @@ static const struct vop_intr rk3328_vop_ + }; + + static const struct vop_win_data rk3328_vop_win_data[] = { +- { .base = 0xd0, .phy = &rk3368_win01_data, ++ { .base = 0xd0, .phy = &rk3399_win1_data, + .type = DRM_PLANE_TYPE_PRIMARY }, +- { .base = 0x1d0, .phy = &rk3368_win01_data, ++ { .base = 0x1d0, .phy = &rk3399_win1_data, + .type = DRM_PLANE_TYPE_OVERLAY }, +- { .base = 0x2d0, .phy = &rk3368_win01_data, ++ { .base = 0x2d0, .phy = &rk3399_win1_data, + .type = DRM_PLANE_TYPE_CURSOR }, + }; + diff --git a/patches-6.6/033-15-v6.8-drm-rockchip-vop2-Add-NV20-and-NV30-support.patch b/patches-6.6/033-15-v6.8-drm-rockchip-vop2-Add-NV20-and-NV30-support.patch new file mode 100644 index 0000000..ead6b64 --- /dev/null +++ b/patches-6.6/033-15-v6.8-drm-rockchip-vop2-Add-NV20-and-NV30-support.patch @@ -0,0 +1,67 @@ +From 5fc6aa7db080fd90ef00846aac04e8a211088132 Mon Sep 17 00:00:00 2001 +From: Jonas Karlman +Date: Wed, 25 Oct 2023 21:32:46 +0000 +Subject: [PATCH] drm/rockchip: vop2: Add NV20 and NV30 support + +Add support for the 10-bit 4:2:2 and 4:4:4 formats NV20 and NV30. + +These formats can be tested using modetest [1]: + + modetest -P @:1920x1080@ + +e.g. on a ROCK 3 Model A (rk3568): + + modetest -P 43@67:1920x1080@NV20 -F tiles,tiles + modetest -P 43@67:1920x1080@NV30 -F smpte,smpte + +[1] https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/329 + +Signed-off-by: Jonas Karlman +Reviewed-by: Christopher Obbard +Tested-by: Christopher Obbard +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20231025213248.2641962-1-jonas@kwiboo.se +--- + drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 5 +++++ + drivers/gpu/drm/rockchip/rockchip_vop2_reg.c | 2 ++ + 2 files changed, 7 insertions(+) + +--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +@@ -325,11 +325,14 @@ static enum vop2_data_format vop2_conver + case DRM_FORMAT_NV16: + case DRM_FORMAT_NV61: + return VOP2_FMT_YUV422SP; ++ case DRM_FORMAT_NV20: + case DRM_FORMAT_Y210: + return VOP2_FMT_YUV422SP_10; + case DRM_FORMAT_NV24: + case DRM_FORMAT_NV42: + return VOP2_FMT_YUV444SP; ++ case DRM_FORMAT_NV30: ++ return VOP2_FMT_YUV444SP_10; + case DRM_FORMAT_YUYV: + case DRM_FORMAT_YVYU: + return VOP2_FMT_VYUY422; +@@ -414,6 +417,8 @@ static bool vop2_win_uv_swap(u32 format) + case DRM_FORMAT_NV16: + case DRM_FORMAT_NV24: + case DRM_FORMAT_NV15: ++ case DRM_FORMAT_NV20: ++ case DRM_FORMAT_NV30: + case DRM_FORMAT_YUYV: + case DRM_FORMAT_UYVY: + return true; +--- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c ++++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c +@@ -48,8 +48,10 @@ static const uint32_t formats_rk356x_esm + DRM_FORMAT_NV15, /* yuv420_10bit linear mode, 2 plane, no padding */ + DRM_FORMAT_NV16, /* yuv422_8bit linear mode, 2 plane */ + DRM_FORMAT_NV61, /* yuv422_8bit linear mode, 2 plane */ ++ DRM_FORMAT_NV20, /* yuv422_10bit linear mode, 2 plane, no padding */ + DRM_FORMAT_NV24, /* yuv444_8bit linear mode, 2 plane */ + DRM_FORMAT_NV42, /* yuv444_8bit linear mode, 2 plane */ ++ DRM_FORMAT_NV30, /* yuv444_10bit linear mode, 2 plane, no padding */ + DRM_FORMAT_YVYU, /* yuv422_8bit[YVYU] linear mode */ + DRM_FORMAT_VYUY, /* yuv422_8bit[VYUY] linear mode */ + }; diff --git a/patches-6.6/033-16-v6.8-drm-rockchip-rk3066_hdmi-Remove-useless-mode_fixup.patch b/patches-6.6/033-16-v6.8-drm-rockchip-rk3066_hdmi-Remove-useless-mode_fixup.patch new file mode 100644 index 0000000..861e58d --- /dev/null +++ b/patches-6.6/033-16-v6.8-drm-rockchip-rk3066_hdmi-Remove-useless-mode_fixup.patch @@ -0,0 +1,40 @@ +From 1044f4a31734eef000f42cdaaf35bb2f76286be5 Mon Sep 17 00:00:00 2001 +From: Johan Jonker +Date: Thu, 2 Nov 2023 14:41:48 +0100 +Subject: [PATCH] drm/rockchip: rk3066_hdmi: Remove useless mode_fixup + +The mode_fixup implementation doesn't do anything, so we can simply +remove it. + +Signed-off-by: Johan Jonker +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/5649ac03-db92-42a9-d86a-76dfa1af7c64@gmail.com +--- + drivers/gpu/drm/rockchip/rk3066_hdmi.c | 9 --------- + 1 file changed, 9 deletions(-) + +--- a/drivers/gpu/drm/rockchip/rk3066_hdmi.c ++++ b/drivers/gpu/drm/rockchip/rk3066_hdmi.c +@@ -434,14 +434,6 @@ static void rk3066_hdmi_encoder_disable( + rk3066_hdmi_set_power_mode(hdmi, HDMI_SYS_POWER_MODE_A); + } + +-static bool +-rk3066_hdmi_encoder_mode_fixup(struct drm_encoder *encoder, +- const struct drm_display_mode *mode, +- struct drm_display_mode *adj_mode) +-{ +- return true; +-} +- + static int + rk3066_hdmi_encoder_atomic_check(struct drm_encoder *encoder, + struct drm_crtc_state *crtc_state, +@@ -459,7 +451,6 @@ static const + struct drm_encoder_helper_funcs rk3066_hdmi_encoder_helper_funcs = { + .enable = rk3066_hdmi_encoder_enable, + .disable = rk3066_hdmi_encoder_disable, +- .mode_fixup = rk3066_hdmi_encoder_mode_fixup, + .mode_set = rk3066_hdmi_encoder_mode_set, + .atomic_check = rk3066_hdmi_encoder_atomic_check, + }; diff --git a/patches-6.6/033-17-v6.8-drm-rockchip-rk3066_hdmi-Switch-encoder-hooks-to-atomic.patch b/patches-6.6/033-17-v6.8-drm-rockchip-rk3066_hdmi-Switch-encoder-hooks-to-atomic.patch new file mode 100644 index 0000000..1167932 --- /dev/null +++ b/patches-6.6/033-17-v6.8-drm-rockchip-rk3066_hdmi-Switch-encoder-hooks-to-atomic.patch @@ -0,0 +1,88 @@ +From ae3436a5e7c2ef4f92938133bd99f92fc47ea34e Mon Sep 17 00:00:00 2001 +From: Johan Jonker +Date: Thu, 2 Nov 2023 14:42:04 +0100 +Subject: [PATCH] drm/rockchip: rk3066_hdmi: Switch encoder hooks to atomic + +The rk3066_hdmi encoder still uses the non atomic variants +of enable and disable. Convert to their atomic equivalents. +In atomic mode there is no need to save the adjusted mode, +so remove the mode_set function. + +Signed-off-by: Johan Jonker +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/034c3446-d619-f4c3-3aaa-ab51dc19d07f@gmail.com +--- + drivers/gpu/drm/rockchip/rk3066_hdmi.c | 35 +++++++++++++------------- + 1 file changed, 17 insertions(+), 18 deletions(-) + +--- a/drivers/gpu/drm/rockchip/rk3066_hdmi.c ++++ b/drivers/gpu/drm/rockchip/rk3066_hdmi.c +@@ -55,7 +55,6 @@ struct rk3066_hdmi { + unsigned int tmdsclk; + + struct hdmi_data_info hdmi_data; +- struct drm_display_mode previous_mode; + }; + + static struct rk3066_hdmi *encoder_to_rk3066_hdmi(struct drm_encoder *encoder) +@@ -387,21 +386,21 @@ static int rk3066_hdmi_setup(struct rk30 + return 0; + } + +-static void +-rk3066_hdmi_encoder_mode_set(struct drm_encoder *encoder, +- struct drm_display_mode *mode, +- struct drm_display_mode *adj_mode) ++static void rk3066_hdmi_encoder_enable(struct drm_encoder *encoder, ++ struct drm_atomic_state *state) + { + struct rk3066_hdmi *hdmi = encoder_to_rk3066_hdmi(encoder); ++ struct drm_connector_state *conn_state; ++ struct drm_crtc_state *crtc_state; ++ int mux, val; + +- /* Store the display mode for plugin/DPMS poweron events. */ +- drm_mode_copy(&hdmi->previous_mode, adj_mode); +-} ++ conn_state = drm_atomic_get_new_connector_state(state, &hdmi->connector); ++ if (WARN_ON(!conn_state)) ++ return; + +-static void rk3066_hdmi_encoder_enable(struct drm_encoder *encoder) +-{ +- struct rk3066_hdmi *hdmi = encoder_to_rk3066_hdmi(encoder); +- int mux, val; ++ crtc_state = drm_atomic_get_new_crtc_state(state, conn_state->crtc); ++ if (WARN_ON(!crtc_state)) ++ return; + + mux = drm_of_encoder_active_endpoint_id(hdmi->dev->of_node, encoder); + if (mux) +@@ -414,10 +413,11 @@ static void rk3066_hdmi_encoder_enable(s + DRM_DEV_DEBUG(hdmi->dev, "hdmi encoder enable select: vop%s\n", + (mux) ? "1" : "0"); + +- rk3066_hdmi_setup(hdmi, &hdmi->previous_mode); ++ rk3066_hdmi_setup(hdmi, &crtc_state->adjusted_mode); + } + +-static void rk3066_hdmi_encoder_disable(struct drm_encoder *encoder) ++static void rk3066_hdmi_encoder_disable(struct drm_encoder *encoder, ++ struct drm_atomic_state *state) + { + struct rk3066_hdmi *hdmi = encoder_to_rk3066_hdmi(encoder); + +@@ -449,10 +449,9 @@ rk3066_hdmi_encoder_atomic_check(struct + + static const + struct drm_encoder_helper_funcs rk3066_hdmi_encoder_helper_funcs = { +- .enable = rk3066_hdmi_encoder_enable, +- .disable = rk3066_hdmi_encoder_disable, +- .mode_set = rk3066_hdmi_encoder_mode_set, +- .atomic_check = rk3066_hdmi_encoder_atomic_check, ++ .atomic_check = rk3066_hdmi_encoder_atomic_check, ++ .atomic_enable = rk3066_hdmi_encoder_enable, ++ .atomic_disable = rk3066_hdmi_encoder_disable, + }; + + static enum drm_connector_status diff --git a/patches-6.6/033-18-v6.8-drm-rockchip-rk3066_hdmi-include-drm-drm_atomic.h.patch b/patches-6.6/033-18-v6.8-drm-rockchip-rk3066_hdmi-include-drm-drm_atomic.h.patch new file mode 100644 index 0000000..99acce8 --- /dev/null +++ b/patches-6.6/033-18-v6.8-drm-rockchip-rk3066_hdmi-include-drm-drm_atomic.h.patch @@ -0,0 +1,43 @@ +From f4814c20d14ca168382e8887c768f290e4a2a861 Mon Sep 17 00:00:00 2001 +From: Arnd Bergmann +Date: Wed, 22 Nov 2023 23:18:29 +0100 +Subject: [PATCH] drm/rockchip: rk3066_hdmi: include drm/drm_atomic.h + +Without this header, the newly added code fails to build: + +drivers/gpu/drm/rockchip/rk3066_hdmi.c: In function 'rk3066_hdmi_encoder_enable': +drivers/gpu/drm/rockchip/rk3066_hdmi.c:397:22: error: implicit declaration of function 'drm_atomic_get_new_connector_state'; did you mean 'drm_atomic_helper_connector_reset'? [-Werror=implicit-function-declaration] + 397 | conn_state = drm_atomic_get_new_connector_state(state, &hdmi->connector); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + | drm_atomic_helper_connector_reset +drivers/gpu/drm/rockchip/rk3066_hdmi.c:397:20: error: assignment to 'struct drm_connector_state *' from 'int' makes pointer from integer without a cast [-Werror=int-conversion] + 397 | conn_state = drm_atomic_get_new_connector_state(state, &hdmi->connector); + | ^ +drivers/gpu/drm/rockchip/rk3066_hdmi.c:401:22: error: implicit declaration of function 'drm_atomic_get_new_crtc_state'; did you mean 'drm_atomic_helper_swap_state'? [-Werror=implicit-function-declaration] + 401 | crtc_state = drm_atomic_get_new_crtc_state(state, conn_state->crtc); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + | drm_atomic_helper_swap_state +drivers/gpu/drm/rockchip/rk3066_hdmi.c:401:20: error: assignment to 'struct drm_crtc_state *' from 'int' makes pointer from integer without a cast [-Werror=int-conversion] + 401 | crtc_state = drm_atomic_get_new_crtc_state(state, conn_state->crtc); + | ^ + +Fixes: ae3436a5e7c2 ("drm/rockchip: rk3066_hdmi: Switch encoder hooks to atomic") +Signed-off-by: Arnd Bergmann +Acked-by: Randy Dunlap +Tested-by: Randy Dunlap # build-tested +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20231122221838.3164349-1-arnd@kernel.org +--- + drivers/gpu/drm/rockchip/rk3066_hdmi.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/gpu/drm/rockchip/rk3066_hdmi.c ++++ b/drivers/gpu/drm/rockchip/rk3066_hdmi.c +@@ -4,6 +4,7 @@ + * Zheng Yang + */ + ++#include + #include + #include + #include diff --git a/patches-6.6/033-19-v6.8-drm-rockchip-move-output-interface-related-definition-to.patch b/patches-6.6/033-19-v6.8-drm-rockchip-move-output-interface-related-definition-to.patch new file mode 100644 index 0000000..8694228 --- /dev/null +++ b/patches-6.6/033-19-v6.8-drm-rockchip-move-output-interface-related-definition-to.patch @@ -0,0 +1,189 @@ +From 8c8546546f256f834e9c7cab48e5946df340d1a8 Mon Sep 17 00:00:00 2001 +From: Andy Yan +Date: Mon, 11 Dec 2023 19:56:27 +0800 +Subject: [PATCH] drm/rockchip: move output interface related definition to + rockchip_drm_drv.h + +The output interface related definition can shared between +vop and vop2, move them to rockchip_drm_drv.h can avoid duplicated +definition. + +Signed-off-by: Andy Yan +Reviewed-by: Sascha Hauer +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20231211115627.1784735-1-andyshrk@163.com +--- + drivers/gpu/drm/rockchip/analogix_dp-rockchip.c | 1 - + drivers/gpu/drm/rockchip/cdn-dp-core.c | 1 - + drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c | 1 - + drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c | 1 - + drivers/gpu/drm/rockchip/inno_hdmi.c | 1 - + drivers/gpu/drm/rockchip/rk3066_hdmi.c | 1 - + drivers/gpu/drm/rockchip/rockchip_drm_drv.h | 17 +++++++++++++++++ + drivers/gpu/drm/rockchip/rockchip_drm_vop.h | 12 ------------ + drivers/gpu/drm/rockchip/rockchip_drm_vop2.h | 16 +--------------- + drivers/gpu/drm/rockchip/rockchip_lvds.c | 1 - + drivers/gpu/drm/rockchip/rockchip_rgb.c | 1 - + 11 files changed, 18 insertions(+), 35 deletions(-) + +--- a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c ++++ b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c +@@ -30,7 +30,6 @@ + #include + + #include "rockchip_drm_drv.h" +-#include "rockchip_drm_vop.h" + + #define RK3288_GRF_SOC_CON6 0x25c + #define RK3288_EDP_LCDC_SEL BIT(5) +--- a/drivers/gpu/drm/rockchip/cdn-dp-core.c ++++ b/drivers/gpu/drm/rockchip/cdn-dp-core.c +@@ -24,7 +24,6 @@ + + #include "cdn-dp-core.h" + #include "cdn-dp-reg.h" +-#include "rockchip_drm_vop.h" + + static inline struct cdn_dp_device *connector_to_dp(struct drm_connector *connector) + { +--- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c ++++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c +@@ -26,7 +26,6 @@ + #include + + #include "rockchip_drm_drv.h" +-#include "rockchip_drm_vop.h" + + #define DSI_PHY_RSTZ 0xa0 + #define PHY_DISFORCEPLL 0 +--- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c ++++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c +@@ -18,7 +18,6 @@ + #include + + #include "rockchip_drm_drv.h" +-#include "rockchip_drm_vop.h" + + #define RK3228_GRF_SOC_CON2 0x0408 + #define RK3228_HDMI_SDAIN_MSK BIT(14) +--- a/drivers/gpu/drm/rockchip/inno_hdmi.c ++++ b/drivers/gpu/drm/rockchip/inno_hdmi.c +@@ -23,7 +23,6 @@ + #include + + #include "rockchip_drm_drv.h" +-#include "rockchip_drm_vop.h" + + #include "inno_hdmi.h" + +--- a/drivers/gpu/drm/rockchip/rk3066_hdmi.c ++++ b/drivers/gpu/drm/rockchip/rk3066_hdmi.c +@@ -18,7 +18,6 @@ + #include "rk3066_hdmi.h" + + #include "rockchip_drm_drv.h" +-#include "rockchip_drm_vop.h" + + #define DEFAULT_PLLA_RATE 30000000 + +--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h +@@ -20,6 +20,23 @@ + #define ROCKCHIP_MAX_CONNECTOR 2 + #define ROCKCHIP_MAX_CRTC 4 + ++/* ++ * display output interface supported by rockchip lcdc ++ */ ++#define ROCKCHIP_OUT_MODE_P888 0 ++#define ROCKCHIP_OUT_MODE_BT1120 0 ++#define ROCKCHIP_OUT_MODE_P666 1 ++#define ROCKCHIP_OUT_MODE_P565 2 ++#define ROCKCHIP_OUT_MODE_BT656 5 ++#define ROCKCHIP_OUT_MODE_S888 8 ++#define ROCKCHIP_OUT_MODE_S888_DUMMY 12 ++#define ROCKCHIP_OUT_MODE_YUV420 14 ++/* for use special outface */ ++#define ROCKCHIP_OUT_MODE_AAAA 15 ++ ++/* output flags */ ++#define ROCKCHIP_OUTPUT_DSI_DUAL BIT(0) ++ + struct drm_device; + struct drm_connector; + struct iommu_domain; +--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h +@@ -277,18 +277,6 @@ struct vop_data { + /* dst alpha ctrl define */ + #define DST_FACTOR_M0(x) (((x) & 0x7) << 6) + +-/* +- * display output interface supported by rockchip lcdc +- */ +-#define ROCKCHIP_OUT_MODE_P888 0 +-#define ROCKCHIP_OUT_MODE_P666 1 +-#define ROCKCHIP_OUT_MODE_P565 2 +-/* for use special outface */ +-#define ROCKCHIP_OUT_MODE_AAAA 15 +- +-/* output flags */ +-#define ROCKCHIP_OUTPUT_DSI_DUAL BIT(0) +- + enum alpha_mode { + ALPHA_STRAIGHT, + ALPHA_INVERSE, +--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h +@@ -7,10 +7,9 @@ + #ifndef _ROCKCHIP_DRM_VOP2_H + #define _ROCKCHIP_DRM_VOP2_H + +-#include "rockchip_drm_vop.h" +- + #include + #include ++#include "rockchip_drm_vop.h" + + #define VOP_FEATURE_OUTPUT_10BIT BIT(0) + +@@ -166,19 +165,6 @@ struct vop2_data { + #define WB_YRGB_FIFO_FULL_INTR BIT(18) + #define WB_COMPLETE_INTR BIT(19) + +-/* +- * display output interface supported by rockchip lcdc +- */ +-#define ROCKCHIP_OUT_MODE_P888 0 +-#define ROCKCHIP_OUT_MODE_BT1120 0 +-#define ROCKCHIP_OUT_MODE_P666 1 +-#define ROCKCHIP_OUT_MODE_P565 2 +-#define ROCKCHIP_OUT_MODE_BT656 5 +-#define ROCKCHIP_OUT_MODE_S888 8 +-#define ROCKCHIP_OUT_MODE_S888_DUMMY 12 +-#define ROCKCHIP_OUT_MODE_YUV420 14 +-/* for use special outface */ +-#define ROCKCHIP_OUT_MODE_AAAA 15 + + enum vop_csc_format { + CSC_BT601L, +--- a/drivers/gpu/drm/rockchip/rockchip_lvds.c ++++ b/drivers/gpu/drm/rockchip/rockchip_lvds.c +@@ -27,7 +27,6 @@ + #include + + #include "rockchip_drm_drv.h" +-#include "rockchip_drm_vop.h" + #include "rockchip_lvds.h" + + #define DISPLAY_OUTPUT_RGB 0 +--- a/drivers/gpu/drm/rockchip/rockchip_rgb.c ++++ b/drivers/gpu/drm/rockchip/rockchip_rgb.c +@@ -19,7 +19,6 @@ + #include + + #include "rockchip_drm_drv.h" +-#include "rockchip_drm_vop.h" + #include "rockchip_rgb.h" + + struct rockchip_rgb { diff --git a/patches-6.6/033-20-v6.8-Revert-drm-rockchip-vop2-Use-regcache_sync-to-fix.patch b/patches-6.6/033-20-v6.8-Revert-drm-rockchip-vop2-Use-regcache_sync-to-fix.patch new file mode 100644 index 0000000..563a67e --- /dev/null +++ b/patches-6.6/033-20-v6.8-Revert-drm-rockchip-vop2-Use-regcache_sync-to-fix.patch @@ -0,0 +1,60 @@ +From 81a06f1d02e588cfa14c5e5953d9dc50b1d404be Mon Sep 17 00:00:00 2001 +From: Andy Yan +Date: Mon, 11 Dec 2023 19:57:19 +0800 +Subject: [PATCH] Revert "drm/rockchip: vop2: Use regcache_sync() to fix + suspend/resume" + +This reverts commit b63a553e8f5aa6574eeb535a551817a93c426d8c. + +regcache_sync will try to reload the configuration in regcache to +hardware, but the registers of 4 Cluster windows and Esmart1/2/3 on +the upcoming rk3588 can not be set successfully before internal PD +power on. + +Also it's better to keep the hardware register as it is before we really +enable it. + +So let's revert this version, and keep the first version: +commit afa965a45e01 ("drm/rockchip: vop2: fix suspend/resume") + +Signed-off-by: Andy Yan +Reviewed-by: Sascha Hauer +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20231211115719.1784834-1-andyshrk@163.com +--- + drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +@@ -217,6 +217,8 @@ struct vop2 { + struct vop2_win win[]; + }; + ++static const struct regmap_config vop2_regmap_config; ++ + static struct vop2_video_port *to_vop2_video_port(struct drm_crtc *crtc) + { + return container_of(crtc, struct vop2_video_port, crtc); +@@ -885,7 +887,11 @@ static void vop2_enable(struct vop2 *vop + return; + } + +- regcache_sync(vop2->map); ++ ret = regmap_reinit_cache(vop2->map, &vop2_regmap_config); ++ if (ret) { ++ drm_err(vop2->drm, "failed to reinit cache: %d\n", ret); ++ return; ++ } + + if (vop2->data->soc_id == 3566) + vop2_writel(vop2, RK3568_OTP_WIN_EN, 1); +@@ -915,8 +921,6 @@ static void vop2_disable(struct vop2 *vo + + pm_runtime_put_sync(vop2->dev); + +- regcache_mark_dirty(vop2->map); +- + clk_disable_unprepare(vop2->aclk); + clk_disable_unprepare(vop2->hclk); + } diff --git a/patches-6.6/033-21-v6.8-drm-rockchip-vop2-set-half_block_en-bit-in-all-mode.patch b/patches-6.6/033-21-v6.8-drm-rockchip-vop2-set-half_block_en-bit-in-all-mode.patch new file mode 100644 index 0000000..b4cd238 --- /dev/null +++ b/patches-6.6/033-21-v6.8-drm-rockchip-vop2-set-half_block_en-bit-in-all-mode.patch @@ -0,0 +1,83 @@ +From bebad6bd4fbdc448ad3b337ad281b813e68f6f53 Mon Sep 17 00:00:00 2001 +From: Andy Yan +Date: Mon, 11 Dec 2023 19:57:30 +0800 +Subject: [PATCH] drm/rockchip: vop2: set half_block_en bit in all mode + +At first we thought the half_block_en bit in AFBCD_CTRL register +only work in afbc mode. But the fact is that it control the line +buffer in all mode(afbc/tile/linear), so we need configure it in +all case. + +As the cluster windows of rk3568 only supports afbc format +so is therefore not affected. + +Signed-off-by: Andy Yan +Reviewed-by: Sascha Hauer +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20231211115730.1784893-1-andyshrk@163.com +--- + drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 25 ++++++++++++++------ + 1 file changed, 18 insertions(+), 7 deletions(-) + +--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +@@ -521,6 +521,18 @@ static bool rockchip_vop2_mod_supported( + return vop2_convert_afbc_format(format) >= 0; + } + ++/* ++ * 0: Full mode, 16 lines for one tail ++ * 1: half block mode, 8 lines one tail ++ */ ++static bool vop2_half_block_enable(struct drm_plane_state *pstate) ++{ ++ if (pstate->rotation & (DRM_MODE_ROTATE_270 | DRM_MODE_ROTATE_90)) ++ return false; ++ else ++ return true; ++} ++ + static u32 vop2_afbc_transform_offset(struct drm_plane_state *pstate, + bool afbc_half_block_en) + { +@@ -1146,6 +1158,7 @@ static void vop2_plane_atomic_update(str + bool rotate_90 = pstate->rotation & DRM_MODE_ROTATE_90; + struct rockchip_gem_object *rk_obj; + unsigned long offset; ++ bool half_block_en; + bool afbc_en; + dma_addr_t yrgb_mst; + dma_addr_t uv_mst; +@@ -1238,6 +1251,7 @@ static void vop2_plane_atomic_update(str + dsp_info = (dsp_h - 1) << 16 | ((dsp_w - 1) & 0xffff); + + format = vop2_convert_format(fb->format->format); ++ half_block_en = vop2_half_block_enable(pstate); + + drm_dbg(vop2->drm, "vp%d update %s[%dx%d->%dx%d@%dx%d] fmt[%p4cc_%s] addr[%pad]\n", + vp->id, win->data->name, actual_w, actual_h, dsp_w, dsp_h, +@@ -1245,6 +1259,9 @@ static void vop2_plane_atomic_update(str + &fb->format->format, + afbc_en ? "AFBC" : "", &yrgb_mst); + ++ if (vop2_cluster_window(win)) ++ vop2_win_write(win, VOP2_WIN_AFBC_HALF_BLOCK_EN, half_block_en); ++ + if (afbc_en) { + u32 stride; + +@@ -1285,13 +1302,7 @@ static void vop2_plane_atomic_update(str + vop2_win_write(win, VOP2_WIN_AFBC_UV_SWAP, uv_swap); + vop2_win_write(win, VOP2_WIN_AFBC_AUTO_GATING_EN, 0); + vop2_win_write(win, VOP2_WIN_AFBC_BLOCK_SPLIT_EN, 0); +- if (pstate->rotation & (DRM_MODE_ROTATE_270 | DRM_MODE_ROTATE_90)) { +- vop2_win_write(win, VOP2_WIN_AFBC_HALF_BLOCK_EN, 0); +- transform_offset = vop2_afbc_transform_offset(pstate, false); +- } else { +- vop2_win_write(win, VOP2_WIN_AFBC_HALF_BLOCK_EN, 1); +- transform_offset = vop2_afbc_transform_offset(pstate, true); +- } ++ transform_offset = vop2_afbc_transform_offset(pstate, half_block_en); + vop2_win_write(win, VOP2_WIN_AFBC_HDR_PTR, yrgb_mst); + vop2_win_write(win, VOP2_WIN_AFBC_PIC_SIZE, act_info); + vop2_win_write(win, VOP2_WIN_AFBC_TRANSFORM_OFFSET, transform_offset); diff --git a/patches-6.6/033-22-v6.8-drm-rockchip-vop2-clear-afbc-en-and-transform-bit-for.patch b/patches-6.6/033-22-v6.8-drm-rockchip-vop2-clear-afbc-en-and-transform-bit-for.patch new file mode 100644 index 0000000..f129011 --- /dev/null +++ b/patches-6.6/033-22-v6.8-drm-rockchip-vop2-clear-afbc-en-and-transform-bit-for.patch @@ -0,0 +1,36 @@ +From 20529a68307feed00dd3d431d3fff0572616b0f2 Mon Sep 17 00:00:00 2001 +From: Andy Yan +Date: Mon, 11 Dec 2023 19:57:41 +0800 +Subject: [PATCH] drm/rockchip: vop2: clear afbc en and transform bit for + cluster window at linear mode + +The enable bit and transform offset of cluster windows should be +cleared when it work at linear mode, or we may have a iommu fault +issue on rk3588 which cluster windows switch between afbc and linear +mode. + +As the cluster windows of rk3568 only supports afbc format +so is therefore not affected. + +Signed-off-by: Andy Yan +Reviewed-by: Sascha Hauer +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20231211115741.1784954-1-andyshrk@163.com +--- + drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +@@ -1314,6 +1314,11 @@ static void vop2_plane_atomic_update(str + vop2_win_write(win, VOP2_WIN_AFBC_ROTATE_270, rotate_270); + vop2_win_write(win, VOP2_WIN_AFBC_ROTATE_90, rotate_90); + } else { ++ if (vop2_cluster_window(win)) { ++ vop2_win_write(win, VOP2_WIN_AFBC_ENABLE, 0); ++ vop2_win_write(win, VOP2_WIN_AFBC_TRANSFORM_OFFSET, 0); ++ } ++ + vop2_win_write(win, VOP2_WIN_YRGB_VIR, DIV_ROUND_UP(fb->pitches[0], 4)); + } + diff --git a/patches-6.6/033-23-v6.8-drm-rockchip-vop2-Add-write-mask-for-VP-config-done.patch b/patches-6.6/033-23-v6.8-drm-rockchip-vop2-Add-write-mask-for-VP-config-done.patch new file mode 100644 index 0000000..33ca068 --- /dev/null +++ b/patches-6.6/033-23-v6.8-drm-rockchip-vop2-Add-write-mask-for-VP-config-done.patch @@ -0,0 +1,50 @@ +From d1f8face0fc1298c88ef4a0479c3027b46ca2c77 Mon Sep 17 00:00:00 2001 +From: Andy Yan +Date: Mon, 11 Dec 2023 19:57:52 +0800 +Subject: [PATCH] drm/rockchip: vop2: Add write mask for VP config done + +The write mask bit is used to make sure when writing +config done bit for one VP will not overwrite the other. + +Unfortunately, the write mask bit is missing on +rk3566/8, that means when we write to these bits, +it will not take any effect. + +We need this to make the vop work properly after +rk3566/8 variants. + +Signed-off-by: Andy Yan +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20231211115752.1785013-1-andyshrk@163.com +--- + drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 15 +++++++++++++-- + 1 file changed, 13 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +@@ -268,12 +268,23 @@ static bool vop2_cluster_window(const st + return win->data->feature & WIN_FEATURE_CLUSTER; + } + ++/* ++ * Note: ++ * The write mask function is documented but missing on rk3566/8, writes ++ * to these bits have no effect. For newer soc(rk3588 and following) the ++ * write mask is needed for register writes. ++ * ++ * GLB_CFG_DONE_EN has no write mask bit. ++ * ++ */ + static void vop2_cfg_done(struct vop2_video_port *vp) + { + struct vop2 *vop2 = vp->vop2; ++ u32 val = RK3568_REG_CFG_DONE__GLB_CFG_DONE_EN; + +- regmap_set_bits(vop2->map, RK3568_REG_CFG_DONE, +- BIT(vp->id) | RK3568_REG_CFG_DONE__GLB_CFG_DONE_EN); ++ val |= BIT(vp->id) | (BIT(vp->id) << 16); ++ ++ regmap_set_bits(vop2->map, RK3568_REG_CFG_DONE, val); + } + + static void vop2_win_disable(struct vop2_win *win) diff --git a/patches-6.6/033-24-v6.8-drm-rockchip-vop2-Set-YUV-RGB-overlay-mode.patch b/patches-6.6/033-24-v6.8-drm-rockchip-vop2-Set-YUV-RGB-overlay-mode.patch new file mode 100644 index 0000000..68c6310 --- /dev/null +++ b/patches-6.6/033-24-v6.8-drm-rockchip-vop2-Set-YUV-RGB-overlay-mode.patch @@ -0,0 +1,95 @@ +From dd49ee4614cfb0b1f627c4353b60cecfe998a374 Mon Sep 17 00:00:00 2001 +From: Andy Yan +Date: Mon, 11 Dec 2023 19:58:05 +0800 +Subject: [PATCH] drm/rockchip: vop2: Set YUV/RGB overlay mode + +Set overlay mode register according to the +output mode is yuv or rgb. + +Signed-off-by: Andy Yan +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20231211115805.1785073-1-andyshrk@163.com +--- + drivers/gpu/drm/rockchip/rockchip_drm_drv.h | 1 + + drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 17 ++++++++++++++--- + drivers/gpu/drm/rockchip/rockchip_drm_vop2.h | 1 + + 3 files changed, 16 insertions(+), 3 deletions(-) + +--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h +@@ -48,6 +48,7 @@ struct rockchip_crtc_state { + int output_bpc; + int output_flags; + bool enable_afbc; ++ bool yuv_overlay; + u32 bus_format; + u32 bus_flags; + int color_space; +--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +@@ -1625,6 +1625,8 @@ static void vop2_crtc_atomic_enable(stru + + vop2->enable_count++; + ++ vcstate->yuv_overlay = is_yuv_output(vcstate->bus_format); ++ + vop2_crtc_enable_irq(vp, VP_INT_POST_BUF_EMPTY); + + polflags = 0; +@@ -1652,7 +1654,7 @@ static void vop2_crtc_atomic_enable(stru + if (vop2_output_uv_swap(vcstate->bus_format, vcstate->output_mode)) + dsp_ctrl |= RK3568_VP_DSP_CTRL__DSP_RB_SWAP; + +- if (is_yuv_output(vcstate->bus_format)) ++ if (vcstate->yuv_overlay) + dsp_ctrl |= RK3568_VP_DSP_CTRL__POST_DSP_OUT_R2Y; + + vop2_dither_setup(crtc, &dsp_ctrl); +@@ -1961,10 +1963,12 @@ static void vop2_setup_layer_mixer(struc + u16 hdisplay; + u32 bg_dly; + u32 pre_scan_dly; ++ u32 ovl_ctrl; + int i; + struct vop2_video_port *vp0 = &vop2->vps[0]; + struct vop2_video_port *vp1 = &vop2->vps[1]; + struct vop2_video_port *vp2 = &vop2->vps[2]; ++ struct rockchip_crtc_state *vcstate = to_rockchip_crtc_state(vp->crtc.state); + + adjusted_mode = &vp->crtc.state->adjusted_mode; + hsync_len = adjusted_mode->crtc_hsync_end - adjusted_mode->crtc_hsync_start; +@@ -1977,7 +1981,15 @@ static void vop2_setup_layer_mixer(struc + pre_scan_dly = ((bg_dly + (hdisplay >> 1) - 1) << 16) | hsync_len; + vop2_vp_write(vp, RK3568_VP_PRE_SCAN_HTIMING, pre_scan_dly); + +- vop2_writel(vop2, RK3568_OVL_CTRL, 0); ++ ovl_ctrl = vop2_readl(vop2, RK3568_OVL_CTRL); ++ ovl_ctrl |= RK3568_OVL_CTRL__LAYERSEL_REGDONE_IMD; ++ if (vcstate->yuv_overlay) ++ ovl_ctrl |= RK3568_OVL_CTRL__YUV_MODE(vp->id); ++ else ++ ovl_ctrl &= ~RK3568_OVL_CTRL__YUV_MODE(vp->id); ++ ++ vop2_writel(vop2, RK3568_OVL_CTRL, ovl_ctrl); ++ + port_sel = vop2_readl(vop2, RK3568_OVL_PORT_SEL); + port_sel &= RK3568_OVL_PORT_SEL__SEL_PORT; + +@@ -2051,7 +2063,6 @@ static void vop2_setup_layer_mixer(struc + + vop2_writel(vop2, RK3568_OVL_LAYER_SEL, layer_sel); + vop2_writel(vop2, RK3568_OVL_PORT_SEL, port_sel); +- vop2_writel(vop2, RK3568_OVL_CTRL, RK3568_OVL_CTRL__LAYERSEL_REGDONE_IMD); + } + + static void vop2_setup_dly_for_windows(struct vop2 *vop2) +--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h +@@ -401,6 +401,7 @@ enum dst_factor_mode { + #define VOP2_COLOR_KEY_MASK BIT(31) + + #define RK3568_OVL_CTRL__LAYERSEL_REGDONE_IMD BIT(28) ++#define RK3568_OVL_CTRL__YUV_MODE(vp) BIT(vp) + + #define RK3568_VP_BG_MIX_CTRL__BG_DLY GENMASK(31, 24) + diff --git a/patches-6.6/033-25-v6.8-drm-rockchip-vop2-set-bg-dly-and-prescan-dly-at.patch b/patches-6.6/033-25-v6.8-drm-rockchip-vop2-set-bg-dly-and-prescan-dly-at.patch new file mode 100644 index 0000000..b26bcf6 --- /dev/null +++ b/patches-6.6/033-25-v6.8-drm-rockchip-vop2-set-bg-dly-and-prescan-dly-at.patch @@ -0,0 +1,70 @@ +From 075a5b3969becb1ebc2f1d4fa1a1fe9163679273 Mon Sep 17 00:00:00 2001 +From: Andy Yan +Date: Mon, 11 Dec 2023 19:58:15 +0800 +Subject: [PATCH] drm/rockchip: vop2: set bg dly and prescan dly at + vop2_post_config + +We need to setup background delay cycle and prescan +delay cycle when a mode is enable to avoid trigger +POST_BUF_EMPTY irq on rk3588. + +Note: RK356x has no such requirement. + +Signed-off-by: Andy Yan +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20231211115815.1785131-1-andyshrk@163.com +--- + drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 26 ++++++++------------ + 1 file changed, 10 insertions(+), 16 deletions(-) + +--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +@@ -1462,8 +1462,18 @@ static void vop2_post_config(struct drm_ + u32 top_margin = 100, bottom_margin = 100; + u16 hsize = hdisplay * (left_margin + right_margin) / 200; + u16 vsize = vdisplay * (top_margin + bottom_margin) / 200; ++ u16 hsync_len = mode->crtc_hsync_end - mode->crtc_hsync_start; + u16 hact_end, vact_end; + u32 val; ++ u32 bg_dly; ++ u32 pre_scan_dly; ++ ++ bg_dly = vp->data->pre_scan_max_dly[3]; ++ vop2_writel(vp->vop2, RK3568_VP_BG_MIX_CTRL(vp->id), ++ FIELD_PREP(RK3568_VP_BG_MIX_CTRL__BG_DLY, bg_dly)); ++ ++ pre_scan_dly = ((bg_dly + (hdisplay >> 1) - 1) << 16) | hsync_len; ++ vop2_vp_write(vp, RK3568_VP_PRE_SCAN_HTIMING, pre_scan_dly); + + vsize = rounddown(vsize, 2); + hsize = rounddown(hsize, 2); +@@ -1958,11 +1968,6 @@ static void vop2_setup_layer_mixer(struc + u32 layer_sel = 0; + u32 port_sel; + unsigned int nlayer, ofs; +- struct drm_display_mode *adjusted_mode; +- u16 hsync_len; +- u16 hdisplay; +- u32 bg_dly; +- u32 pre_scan_dly; + u32 ovl_ctrl; + int i; + struct vop2_video_port *vp0 = &vop2->vps[0]; +@@ -1970,17 +1975,6 @@ static void vop2_setup_layer_mixer(struc + struct vop2_video_port *vp2 = &vop2->vps[2]; + struct rockchip_crtc_state *vcstate = to_rockchip_crtc_state(vp->crtc.state); + +- adjusted_mode = &vp->crtc.state->adjusted_mode; +- hsync_len = adjusted_mode->crtc_hsync_end - adjusted_mode->crtc_hsync_start; +- hdisplay = adjusted_mode->crtc_hdisplay; +- +- bg_dly = vp->data->pre_scan_max_dly[3]; +- vop2_writel(vop2, RK3568_VP_BG_MIX_CTRL(vp->id), +- FIELD_PREP(RK3568_VP_BG_MIX_CTRL__BG_DLY, bg_dly)); +- +- pre_scan_dly = ((bg_dly + (hdisplay >> 1) - 1) << 16) | hsync_len; +- vop2_vp_write(vp, RK3568_VP_PRE_SCAN_HTIMING, pre_scan_dly); +- + ovl_ctrl = vop2_readl(vop2, RK3568_OVL_CTRL); + ovl_ctrl |= RK3568_OVL_CTRL__LAYERSEL_REGDONE_IMD; + if (vcstate->yuv_overlay) diff --git a/patches-6.6/033-26-v6.8-drm-rockchip-vop2-rename-grf-to-sys_grf.patch b/patches-6.6/033-26-v6.8-drm-rockchip-vop2-rename-grf-to-sys_grf.patch new file mode 100644 index 0000000..156b470 --- /dev/null +++ b/patches-6.6/033-26-v6.8-drm-rockchip-vop2-rename-grf-to-sys_grf.patch @@ -0,0 +1,50 @@ +From c408af1afc4b74ea6df69e0313be97f1f83e981a Mon Sep 17 00:00:00 2001 +From: Andy Yan +Date: Mon, 11 Dec 2023 19:58:26 +0800 +Subject: [PATCH] drm/rockchip: vop2: rename grf to sys_grf + +The vop2 need to reference more grf(system grf, vop grf, vo0/1 grf,etc) +in the upcoming rk3588. + +So we rename the current system grf to sys_grf. + +Signed-off-by: Andy Yan +Reviewed-by: Sascha Hauer +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20231211115826.1785190-1-andyshrk@163.com +--- + drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +@@ -190,7 +190,7 @@ struct vop2 { + void __iomem *regs; + struct regmap *map; + +- struct regmap *grf; ++ struct regmap *sys_grf; + + /* physical map length of vop2 register */ + u32 len; +@@ -1526,9 +1526,9 @@ static void rk3568_set_intf_mux(struct v + dip &= ~RK3568_DSP_IF_POL__RGB_LVDS_PIN_POL; + dip |= FIELD_PREP(RK3568_DSP_IF_POL__RGB_LVDS_PIN_POL, polflags); + if (polflags & POLFLAG_DCLK_INV) +- regmap_write(vop2->grf, RK3568_GRF_VO_CON1, BIT(3 + 16) | BIT(3)); ++ regmap_write(vop2->sys_grf, RK3568_GRF_VO_CON1, BIT(3 + 16) | BIT(3)); + else +- regmap_write(vop2->grf, RK3568_GRF_VO_CON1, BIT(3 + 16)); ++ regmap_write(vop2->sys_grf, RK3568_GRF_VO_CON1, BIT(3 + 16)); + break; + case ROCKCHIP_VOP2_EP_HDMI0: + die &= ~RK3568_SYS_DSP_INFACE_EN_HDMI_MUX; +@@ -2769,7 +2769,7 @@ static int vop2_bind(struct device *dev, + return PTR_ERR(vop2->lut_regs); + } + +- vop2->grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,grf"); ++ vop2->sys_grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,grf"); + + vop2->hclk = devm_clk_get(vop2->dev, "hclk"); + if (IS_ERR(vop2->hclk)) { diff --git a/patches-6.6/033-27-v6.8-dt-bindings-rockchip-vop2-Add-more-endpoint-definition.patch b/patches-6.6/033-27-v6.8-dt-bindings-rockchip-vop2-Add-more-endpoint-definition.patch new file mode 100644 index 0000000..9e92daf --- /dev/null +++ b/patches-6.6/033-27-v6.8-dt-bindings-rockchip-vop2-Add-more-endpoint-definition.patch @@ -0,0 +1,28 @@ +From dc7226acacc6502291446f9e33cf96246ec49a30 Mon Sep 17 00:00:00 2001 +From: Andy Yan +Date: Mon, 11 Dec 2023 19:59:07 +0800 +Subject: [PATCH] dt-bindings: rockchip,vop2: Add more endpoint definition + +There are 2 HDMI, 2 DP, 2 eDP on rk3588, so add +corresponding endpoint definition for it. + +Signed-off-by: Andy Yan +Acked-by: Krzysztof Kozlowski +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20231211115907.1785377-1-andyshrk@163.com +--- + include/dt-bindings/soc/rockchip,vop2.h | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/include/dt-bindings/soc/rockchip,vop2.h ++++ b/include/dt-bindings/soc/rockchip,vop2.h +@@ -10,5 +10,9 @@ + #define ROCKCHIP_VOP2_EP_LVDS0 5 + #define ROCKCHIP_VOP2_EP_MIPI1 6 + #define ROCKCHIP_VOP2_EP_LVDS1 7 ++#define ROCKCHIP_VOP2_EP_HDMI1 8 ++#define ROCKCHIP_VOP2_EP_EDP1 9 ++#define ROCKCHIP_VOP2_EP_DP0 10 ++#define ROCKCHIP_VOP2_EP_DP1 11 + + #endif /* __DT_BINDINGS_ROCKCHIP_VOP2_H */ diff --git a/patches-6.6/033-28-v6.8-drm-rockchip-vop2-Add-support-for-rk3588.patch b/patches-6.6/033-28-v6.8-drm-rockchip-vop2-Add-support-for-rk3588.patch new file mode 100644 index 0000000..a2d8efc --- /dev/null +++ b/patches-6.6/033-28-v6.8-drm-rockchip-vop2-Add-support-for-rk3588.patch @@ -0,0 +1,997 @@ +From 5a028e8f062fc862f051f8e62a0d5a1abac91955 Mon Sep 17 00:00:00 2001 +From: Andy Yan +Date: Mon, 11 Dec 2023 19:59:19 +0800 +Subject: [PATCH] drm/rockchip: vop2: Add support for rk3588 + +VOP2 on rk3588: + +Four video ports: +VP0 Max 4096x2160 +VP1 Max 4096x2160 +VP2 Max 4096x2160 +VP3 Max 2048x1080 + +4 4K Cluster windows with AFBC/line RGB and AFBC-only YUV support +4 4K Esmart windows with line RGB/YUV support + +Signed-off-by: Andy Yan +Reviewed-by: Sascha Hauer +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20231211115919.1785435-1-andyshrk@163.com +--- + drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 400 ++++++++++++++++++- + drivers/gpu/drm/rockchip/rockchip_drm_vop2.h | 81 ++++ + drivers/gpu/drm/rockchip/rockchip_vop2_reg.c | 221 ++++++++++ + 3 files changed, 696 insertions(+), 6 deletions(-) + +--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +@@ -191,6 +191,9 @@ struct vop2 { + struct regmap *map; + + struct regmap *sys_grf; ++ struct regmap *vop_grf; ++ struct regmap *vo1_grf; ++ struct regmap *sys_pmu; + + /* physical map length of vop2 register */ + u32 len; +@@ -209,6 +212,7 @@ struct vop2 { + unsigned int enable_count; + struct clk *hclk; + struct clk *aclk; ++ struct clk *pclk; + + /* optional internal rgb encoder */ + struct rockchip_rgb *rgb; +@@ -217,6 +221,23 @@ struct vop2 { + struct vop2_win win[]; + }; + ++#define vop2_output_if_is_hdmi(x) ((x) == ROCKCHIP_VOP2_EP_HDMI0 || \ ++ (x) == ROCKCHIP_VOP2_EP_HDMI1) ++ ++#define vop2_output_if_is_dp(x) ((x) == ROCKCHIP_VOP2_EP_DP0 || \ ++ (x) == ROCKCHIP_VOP2_EP_DP1) ++ ++#define vop2_output_if_is_edp(x) ((x) == ROCKCHIP_VOP2_EP_EDP0 || \ ++ (x) == ROCKCHIP_VOP2_EP_EDP1) ++ ++#define vop2_output_if_is_mipi(x) ((x) == ROCKCHIP_VOP2_EP_MIPI0 || \ ++ (x) == ROCKCHIP_VOP2_EP_MIPI1) ++ ++#define vop2_output_if_is_lvds(x) ((x) == ROCKCHIP_VOP2_EP_LVDS0 || \ ++ (x) == ROCKCHIP_VOP2_EP_LVDS1) ++ ++#define vop2_output_if_is_dpi(x) ((x) == ROCKCHIP_VOP2_EP_RGB0) ++ + static const struct regmap_config vop2_regmap_config; + + static struct vop2_video_port *to_vop2_video_port(struct drm_crtc *crtc) +@@ -475,6 +496,17 @@ static bool vop2_output_uv_swap(u32 bus_ + return false; + } + ++static bool vop2_output_rg_swap(struct vop2 *vop2, u32 bus_format) ++{ ++ if (vop2->data->soc_id == 3588) { ++ if (bus_format == MEDIA_BUS_FMT_YUV8_1X24 || ++ bus_format == MEDIA_BUS_FMT_YUV10_1X30) ++ return true; ++ } ++ ++ return false; ++} ++ + static bool is_yuv_output(u32 bus_format) + { + switch (bus_format) { +@@ -881,13 +913,32 @@ static int vop2_core_clks_prepare_enable + goto err; + } + ++ ret = clk_prepare_enable(vop2->pclk); ++ if (ret < 0) { ++ drm_err(vop2->drm, "failed to enable pclk - %d\n", ret); ++ goto err1; ++ } ++ + return 0; ++err1: ++ clk_disable_unprepare(vop2->aclk); + err: + clk_disable_unprepare(vop2->hclk); + + return ret; + } + ++static void rk3588_vop2_power_domain_enable_all(struct vop2 *vop2) ++{ ++ u32 pd; ++ ++ pd = vop2_readl(vop2, RK3588_SYS_PD_CTRL); ++ pd &= ~(VOP2_PD_CLUSTER0 | VOP2_PD_CLUSTER1 | VOP2_PD_CLUSTER2 | ++ VOP2_PD_CLUSTER3 | VOP2_PD_ESMART); ++ ++ vop2_writel(vop2, RK3588_SYS_PD_CTRL, pd); ++} ++ + static void vop2_enable(struct vop2 *vop2) + { + int ret; +@@ -919,6 +970,9 @@ static void vop2_enable(struct vop2 *vop + if (vop2->data->soc_id == 3566) + vop2_writel(vop2, RK3568_OTP_WIN_EN, 1); + ++ if (vop2->data->soc_id == 3588) ++ rk3588_vop2_power_domain_enable_all(vop2); ++ + vop2_writel(vop2, RK3568_REG_CFG_DONE, RK3568_REG_CFG_DONE__GLB_CFG_DONE_EN); + + /* +@@ -944,6 +998,7 @@ static void vop2_disable(struct vop2 *vo + + pm_runtime_put_sync(vop2->dev); + ++ clk_disable_unprepare(vop2->pclk); + clk_disable_unprepare(vop2->aclk); + clk_disable_unprepare(vop2->hclk); + } +@@ -1311,7 +1366,19 @@ static void vop2_plane_atomic_update(str + vop2_win_write(win, VOP2_WIN_AFBC_ENABLE, 1); + vop2_win_write(win, VOP2_WIN_AFBC_FORMAT, afbc_format); + vop2_win_write(win, VOP2_WIN_AFBC_UV_SWAP, uv_swap); +- vop2_win_write(win, VOP2_WIN_AFBC_AUTO_GATING_EN, 0); ++ /* ++ * On rk3566/8, this bit is auto gating enable, ++ * but this function is not work well so we need ++ * to disable it for these two platform. ++ * On rk3588, and the following new soc(rk3528/rk3576), ++ * this bit is gating disable, we should write 1 to ++ * disable gating when enable afbc. ++ */ ++ if (vop2->data->soc_id == 3566 || vop2->data->soc_id == 3568) ++ vop2_win_write(win, VOP2_WIN_AFBC_AUTO_GATING_EN, 0); ++ else ++ vop2_win_write(win, VOP2_WIN_AFBC_AUTO_GATING_EN, 1); ++ + vop2_win_write(win, VOP2_WIN_AFBC_BLOCK_SPLIT_EN, 0); + transform_offset = vop2_afbc_transform_offset(pstate, half_block_en); + vop2_win_write(win, VOP2_WIN_AFBC_HDR_PTR, yrgb_mst); +@@ -1509,10 +1576,10 @@ static void vop2_post_config(struct drm_ + vop2_vp_write(vp, RK3568_VP_DSP_BG, 0); + } + +-static void rk3568_set_intf_mux(struct vop2_video_port *vp, int id, +- u32 polflags) ++static unsigned long rk3568_set_intf_mux(struct vop2_video_port *vp, int id, u32 polflags) + { + struct vop2 *vop2 = vp->vop2; ++ struct drm_crtc *crtc = &vp->crtc; + u32 die, dip; + + die = vop2_readl(vop2, RK3568_DSP_IF_EN); +@@ -1574,13 +1641,281 @@ static void rk3568_set_intf_mux(struct v + break; + default: + drm_err(vop2->drm, "Invalid interface id %d on vp%d\n", id, vp->id); +- return; ++ return 0; ++ } ++ ++ dip |= RK3568_DSP_IF_POL__CFG_DONE_IMD; ++ ++ vop2_writel(vop2, RK3568_DSP_IF_EN, die); ++ vop2_writel(vop2, RK3568_DSP_IF_POL, dip); ++ ++ return crtc->state->adjusted_mode.crtc_clock * 1000LL; ++} ++ ++/* ++ * calc the dclk on rk3588 ++ * the available div of dclk is 1, 2, 4 ++ */ ++static unsigned long rk3588_calc_dclk(unsigned long child_clk, unsigned long max_dclk) ++{ ++ if (child_clk * 4 <= max_dclk) ++ return child_clk * 4; ++ else if (child_clk * 2 <= max_dclk) ++ return child_clk * 2; ++ else if (child_clk <= max_dclk) ++ return child_clk; ++ else ++ return 0; ++} ++ ++/* ++ * 4 pixclk/cycle on rk3588 ++ * RGB/eDP/HDMI: if_pixclk >= dclk_core ++ * DP: dp_pixclk = dclk_out <= dclk_core ++ * DSI: mipi_pixclk <= dclk_out <= dclk_core ++ */ ++static unsigned long rk3588_calc_cru_cfg(struct vop2_video_port *vp, int id, ++ int *dclk_core_div, int *dclk_out_div, ++ int *if_pixclk_div, int *if_dclk_div) ++{ ++ struct vop2 *vop2 = vp->vop2; ++ struct drm_crtc *crtc = &vp->crtc; ++ struct drm_display_mode *adjusted_mode = &crtc->state->adjusted_mode; ++ struct rockchip_crtc_state *vcstate = to_rockchip_crtc_state(crtc->state); ++ int output_mode = vcstate->output_mode; ++ unsigned long v_pixclk = adjusted_mode->crtc_clock * 1000LL; /* video timing pixclk */ ++ unsigned long dclk_core_rate = v_pixclk >> 2; ++ unsigned long dclk_rate = v_pixclk; ++ unsigned long dclk_out_rate; ++ unsigned long if_dclk_rate; ++ unsigned long if_pixclk_rate; ++ int K = 1; ++ ++ if (vop2_output_if_is_hdmi(id)) { ++ /* ++ * K = 2: dclk_core = if_pixclk_rate > if_dclk_rate ++ * K = 1: dclk_core = hdmie_edp_dclk > if_pixclk_rate ++ */ ++ if (output_mode == ROCKCHIP_OUT_MODE_YUV420) { ++ dclk_rate = dclk_rate >> 1; ++ K = 2; ++ } ++ ++ if_pixclk_rate = (dclk_core_rate << 1) / K; ++ if_dclk_rate = dclk_core_rate / K; ++ /* ++ * *if_pixclk_div = dclk_rate / if_pixclk_rate; ++ * *if_dclk_div = dclk_rate / if_dclk_rate; ++ */ ++ *if_pixclk_div = 2; ++ *if_dclk_div = 4; ++ } else if (vop2_output_if_is_edp(id)) { ++ /* ++ * edp_pixclk = edp_dclk > dclk_core ++ */ ++ if_pixclk_rate = v_pixclk / K; ++ dclk_rate = if_pixclk_rate * K; ++ /* ++ * *if_pixclk_div = dclk_rate / if_pixclk_rate; ++ * *if_dclk_div = *if_pixclk_div; ++ */ ++ *if_pixclk_div = K; ++ *if_dclk_div = K; ++ } else if (vop2_output_if_is_dp(id)) { ++ if (output_mode == ROCKCHIP_OUT_MODE_YUV420) ++ dclk_out_rate = v_pixclk >> 3; ++ else ++ dclk_out_rate = v_pixclk >> 2; ++ ++ dclk_rate = rk3588_calc_dclk(dclk_out_rate, 600000); ++ if (!dclk_rate) { ++ drm_err(vop2->drm, "DP dclk_out_rate out of range, dclk_out_rate: %ld KHZ\n", ++ dclk_out_rate); ++ return 0; ++ } ++ *dclk_out_div = dclk_rate / dclk_out_rate; ++ } else if (vop2_output_if_is_mipi(id)) { ++ if_pixclk_rate = dclk_core_rate / K; ++ /* ++ * dclk_core = dclk_out * K = if_pixclk * K = v_pixclk / 4 ++ */ ++ dclk_out_rate = if_pixclk_rate; ++ /* ++ * dclk_rate = N * dclk_core_rate N = (1,2,4 ), ++ * we get a little factor here ++ */ ++ dclk_rate = rk3588_calc_dclk(dclk_out_rate, 600000); ++ if (!dclk_rate) { ++ drm_err(vop2->drm, "MIPI dclk out of range, dclk_out_rate: %ld KHZ\n", ++ dclk_out_rate); ++ return 0; ++ } ++ *dclk_out_div = dclk_rate / dclk_out_rate; ++ /* ++ * mipi pixclk == dclk_out ++ */ ++ *if_pixclk_div = 1; ++ } else if (vop2_output_if_is_dpi(id)) { ++ dclk_rate = v_pixclk; ++ } ++ ++ *dclk_core_div = dclk_rate / dclk_core_rate; ++ *if_pixclk_div = ilog2(*if_pixclk_div); ++ *if_dclk_div = ilog2(*if_dclk_div); ++ *dclk_core_div = ilog2(*dclk_core_div); ++ *dclk_out_div = ilog2(*dclk_out_div); ++ ++ drm_dbg(vop2->drm, "dclk: %ld, pixclk_div: %d, dclk_div: %d\n", ++ dclk_rate, *if_pixclk_div, *if_dclk_div); ++ ++ return dclk_rate; ++} ++ ++/* ++ * MIPI port mux on rk3588: ++ * 0: Video Port2 ++ * 1: Video Port3 ++ * 3: Video Port 1(MIPI1 only) ++ */ ++static u32 rk3588_get_mipi_port_mux(int vp_id) ++{ ++ if (vp_id == 1) ++ return 3; ++ else if (vp_id == 3) ++ return 1; ++ else ++ return 0; ++} ++ ++static u32 rk3588_get_hdmi_pol(u32 flags) ++{ ++ u32 val; ++ ++ val = (flags & DRM_MODE_FLAG_NHSYNC) ? BIT(HSYNC_POSITIVE) : 0; ++ val |= (flags & DRM_MODE_FLAG_NVSYNC) ? BIT(VSYNC_POSITIVE) : 0; ++ ++ return val; ++} ++ ++static unsigned long rk3588_set_intf_mux(struct vop2_video_port *vp, int id, u32 polflags) ++{ ++ struct vop2 *vop2 = vp->vop2; ++ int dclk_core_div, dclk_out_div, if_pixclk_div, if_dclk_div; ++ unsigned long clock; ++ u32 die, dip, div, vp_clk_div, val; ++ ++ clock = rk3588_calc_cru_cfg(vp, id, &dclk_core_div, &dclk_out_div, ++ &if_pixclk_div, &if_dclk_div); ++ if (!clock) ++ return 0; ++ ++ vp_clk_div = FIELD_PREP(RK3588_VP_CLK_CTRL__DCLK_CORE_DIV, dclk_core_div); ++ vp_clk_div |= FIELD_PREP(RK3588_VP_CLK_CTRL__DCLK_OUT_DIV, dclk_out_div); ++ ++ die = vop2_readl(vop2, RK3568_DSP_IF_EN); ++ dip = vop2_readl(vop2, RK3568_DSP_IF_POL); ++ div = vop2_readl(vop2, RK3568_DSP_IF_CTRL); ++ ++ switch (id) { ++ case ROCKCHIP_VOP2_EP_HDMI0: ++ div &= ~RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV; ++ div &= ~RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV; ++ div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV, if_dclk_div); ++ div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV, if_pixclk_div); ++ die &= ~RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX; ++ die |= RK3588_SYS_DSP_INFACE_EN_HDMI0 | ++ FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX, vp->id); ++ val = rk3588_get_hdmi_pol(polflags); ++ regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 1, 1)); ++ regmap_write(vop2->vo1_grf, RK3588_GRF_VO1_CON0, HIWORD_UPDATE(val, 6, 5)); ++ break; ++ case ROCKCHIP_VOP2_EP_HDMI1: ++ div &= ~RK3588_DSP_IF_EDP_HDMI1_DCLK_DIV; ++ div &= ~RK3588_DSP_IF_EDP_HDMI1_PCLK_DIV; ++ div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI1_DCLK_DIV, if_dclk_div); ++ div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI1_PCLK_DIV, if_pixclk_div); ++ die &= ~RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX; ++ die |= RK3588_SYS_DSP_INFACE_EN_HDMI1 | ++ FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX, vp->id); ++ val = rk3588_get_hdmi_pol(polflags); ++ regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 4, 4)); ++ regmap_write(vop2->vo1_grf, RK3588_GRF_VO1_CON0, HIWORD_UPDATE(val, 8, 7)); ++ break; ++ case ROCKCHIP_VOP2_EP_EDP0: ++ div &= ~RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV; ++ div &= ~RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV; ++ div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV, if_dclk_div); ++ div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV, if_pixclk_div); ++ die &= ~RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX; ++ die |= RK3588_SYS_DSP_INFACE_EN_EDP0 | ++ FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX, vp->id); ++ regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 0, 0)); ++ break; ++ case ROCKCHIP_VOP2_EP_EDP1: ++ div &= ~RK3588_DSP_IF_EDP_HDMI1_DCLK_DIV; ++ div &= ~RK3588_DSP_IF_EDP_HDMI1_PCLK_DIV; ++ div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV, if_dclk_div); ++ div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV, if_pixclk_div); ++ die &= ~RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX; ++ die |= RK3588_SYS_DSP_INFACE_EN_EDP1 | ++ FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX, vp->id); ++ regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 3, 3)); ++ break; ++ case ROCKCHIP_VOP2_EP_MIPI0: ++ div &= ~RK3588_DSP_IF_MIPI0_PCLK_DIV; ++ div |= FIELD_PREP(RK3588_DSP_IF_MIPI0_PCLK_DIV, if_pixclk_div); ++ die &= ~RK3588_SYS_DSP_INFACE_EN_MIPI0_MUX; ++ val = rk3588_get_mipi_port_mux(vp->id); ++ die |= RK3588_SYS_DSP_INFACE_EN_MIPI0 | ++ FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_MIPI0_MUX, !!val); ++ break; ++ case ROCKCHIP_VOP2_EP_MIPI1: ++ div &= ~RK3588_DSP_IF_MIPI1_PCLK_DIV; ++ div |= FIELD_PREP(RK3588_DSP_IF_MIPI1_PCLK_DIV, if_pixclk_div); ++ die &= ~RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX; ++ val = rk3588_get_mipi_port_mux(vp->id); ++ die |= RK3588_SYS_DSP_INFACE_EN_MIPI1 | ++ FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX, val); ++ break; ++ case ROCKCHIP_VOP2_EP_DP0: ++ die &= ~RK3588_SYS_DSP_INFACE_EN_DP0_MUX; ++ die |= RK3588_SYS_DSP_INFACE_EN_DP0 | ++ FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_DP0_MUX, vp->id); ++ dip &= ~RK3588_DSP_IF_POL__DP0_PIN_POL; ++ dip |= FIELD_PREP(RK3588_DSP_IF_POL__DP0_PIN_POL, polflags); ++ break; ++ case ROCKCHIP_VOP2_EP_DP1: ++ die &= ~RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX; ++ die |= RK3588_SYS_DSP_INFACE_EN_MIPI1 | ++ FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX, vp->id); ++ dip &= ~RK3588_DSP_IF_POL__DP1_PIN_POL; ++ dip |= FIELD_PREP(RK3588_DSP_IF_POL__DP1_PIN_POL, polflags); ++ break; ++ default: ++ drm_err(vop2->drm, "Invalid interface id %d on vp%d\n", id, vp->id); ++ return 0; + } + + dip |= RK3568_DSP_IF_POL__CFG_DONE_IMD; + ++ vop2_vp_write(vp, RK3588_VP_CLK_CTRL, vp_clk_div); + vop2_writel(vop2, RK3568_DSP_IF_EN, die); ++ vop2_writel(vop2, RK3568_DSP_IF_CTRL, div); + vop2_writel(vop2, RK3568_DSP_IF_POL, dip); ++ ++ return clock; ++} ++ ++static unsigned long vop2_set_intf_mux(struct vop2_video_port *vp, int ep_id, u32 polflags) ++{ ++ struct vop2 *vop2 = vp->vop2; ++ ++ if (vop2->data->soc_id == 3566 || vop2->data->soc_id == 3568) ++ return rk3568_set_intf_mux(vp, ep_id, polflags); ++ else if (vop2->data->soc_id == 3588) ++ return rk3588_set_intf_mux(vp, ep_id, polflags); ++ else ++ return 0; + } + + static int us_to_vertical_line(struct drm_display_mode *mode, int us) +@@ -1650,9 +1985,17 @@ static void vop2_crtc_atomic_enable(stru + drm_for_each_encoder_mask(encoder, crtc->dev, crtc_state->encoder_mask) { + struct rockchip_encoder *rkencoder = to_rockchip_encoder(encoder); + +- rk3568_set_intf_mux(vp, rkencoder->crtc_endpoint_id, polflags); ++ /* ++ * for drive a high resolution(4KP120, 8K), vop on rk3588/rk3576 need ++ * process multi(1/2/4/8) pixels per cycle, so the dclk feed by the ++ * system cru may be the 1/2 or 1/4 of mode->clock. ++ */ ++ clock = vop2_set_intf_mux(vp, rkencoder->crtc_endpoint_id, polflags); + } + ++ if (!clock) ++ return; ++ + if (vcstate->output_mode == ROCKCHIP_OUT_MODE_AAAA && + !(vp_data->feature & VOP_FEATURE_OUTPUT_10BIT)) + out_mode = ROCKCHIP_OUT_MODE_P888; +@@ -1663,6 +2006,8 @@ static void vop2_crtc_atomic_enable(stru + + if (vop2_output_uv_swap(vcstate->bus_format, vcstate->output_mode)) + dsp_ctrl |= RK3568_VP_DSP_CTRL__DSP_RB_SWAP; ++ if (vop2_output_rg_swap(vop2, vcstate->bus_format)) ++ dsp_ctrl |= RK3568_VP_DSP_CTRL__DSP_RG_SWAP; + + if (vcstate->yuv_overlay) + dsp_ctrl |= RK3568_VP_DSP_CTRL__POST_DSP_OUT_R2Y; +@@ -2024,6 +2369,14 @@ static void vop2_setup_layer_mixer(struc + port_sel &= ~RK3568_OVL_PORT_SEL__CLUSTER1; + port_sel |= FIELD_PREP(RK3568_OVL_PORT_SEL__CLUSTER1, vp->id); + break; ++ case ROCKCHIP_VOP2_CLUSTER2: ++ port_sel &= ~RK3588_OVL_PORT_SEL__CLUSTER2; ++ port_sel |= FIELD_PREP(RK3588_OVL_PORT_SEL__CLUSTER2, vp->id); ++ break; ++ case ROCKCHIP_VOP2_CLUSTER3: ++ port_sel &= ~RK3588_OVL_PORT_SEL__CLUSTER3; ++ port_sel |= FIELD_PREP(RK3588_OVL_PORT_SEL__CLUSTER3, vp->id); ++ break; + case ROCKCHIP_VOP2_ESMART0: + port_sel &= ~RK3568_OVL_PORT_SEL__ESMART0; + port_sel |= FIELD_PREP(RK3568_OVL_PORT_SEL__ESMART0, vp->id); +@@ -2032,6 +2385,14 @@ static void vop2_setup_layer_mixer(struc + port_sel &= ~RK3568_OVL_PORT_SEL__ESMART1; + port_sel |= FIELD_PREP(RK3568_OVL_PORT_SEL__ESMART1, vp->id); + break; ++ case ROCKCHIP_VOP2_ESMART2: ++ port_sel &= ~RK3588_OVL_PORT_SEL__ESMART2; ++ port_sel |= FIELD_PREP(RK3588_OVL_PORT_SEL__ESMART2, vp->id); ++ break; ++ case ROCKCHIP_VOP2_ESMART3: ++ port_sel &= ~RK3588_OVL_PORT_SEL__ESMART3; ++ port_sel |= FIELD_PREP(RK3588_OVL_PORT_SEL__ESMART3, vp->id); ++ break; + case ROCKCHIP_VOP2_SMART0: + port_sel &= ~RK3568_OVL_PORT_SEL__SMART0; + port_sel |= FIELD_PREP(RK3568_OVL_PORT_SEL__SMART0, vp->id); +@@ -2768,8 +3129,29 @@ static int vop2_bind(struct device *dev, + if (IS_ERR(vop2->lut_regs)) + return PTR_ERR(vop2->lut_regs); + } ++ if (vop2_data->feature & VOP2_FEATURE_HAS_SYS_GRF) { ++ vop2->sys_grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,grf"); ++ if (IS_ERR(vop2->sys_grf)) ++ return dev_err_probe(dev, PTR_ERR(vop2->sys_grf), "cannot get sys_grf"); ++ } ++ ++ if (vop2_data->feature & VOP2_FEATURE_HAS_VOP_GRF) { ++ vop2->vop_grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,vop-grf"); ++ if (IS_ERR(vop2->vop_grf)) ++ return dev_err_probe(dev, PTR_ERR(vop2->vop_grf), "cannot get vop_grf"); ++ } ++ ++ if (vop2_data->feature & VOP2_FEATURE_HAS_VO1_GRF) { ++ vop2->vo1_grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,vo1-grf"); ++ if (IS_ERR(vop2->vo1_grf)) ++ return dev_err_probe(dev, PTR_ERR(vop2->vo1_grf), "cannot get vo1_grf"); ++ } + +- vop2->sys_grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,grf"); ++ if (vop2_data->feature & VOP2_FEATURE_HAS_SYS_PMU) { ++ vop2->sys_pmu = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,pmu"); ++ if (IS_ERR(vop2->sys_pmu)) ++ return dev_err_probe(dev, PTR_ERR(vop2->sys_pmu), "cannot get sys_pmu"); ++ } + + vop2->hclk = devm_clk_get(vop2->dev, "hclk"); + if (IS_ERR(vop2->hclk)) { +@@ -2783,6 +3165,12 @@ static int vop2_bind(struct device *dev, + return PTR_ERR(vop2->aclk); + } + ++ vop2->pclk = devm_clk_get_optional(vop2->dev, "pclk_vop"); ++ if (IS_ERR(vop2->pclk)) { ++ drm_err(vop2->drm, "failed to get pclk source\n"); ++ return PTR_ERR(vop2->pclk); ++ } ++ + vop2->irq = platform_get_irq(pdev, 0); + if (vop2->irq < 0) { + drm_err(vop2->drm, "cannot find irq for vop2\n"); +--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h +@@ -13,9 +13,16 @@ + + #define VOP_FEATURE_OUTPUT_10BIT BIT(0) + ++#define VOP2_FEATURE_HAS_SYS_GRF BIT(0) ++#define VOP2_FEATURE_HAS_VO0_GRF BIT(1) ++#define VOP2_FEATURE_HAS_VO1_GRF BIT(2) ++#define VOP2_FEATURE_HAS_VOP_GRF BIT(3) ++#define VOP2_FEATURE_HAS_SYS_PMU BIT(4) ++ + #define WIN_FEATURE_AFBDC BIT(0) + #define WIN_FEATURE_CLUSTER BIT(1) + ++#define HIWORD_UPDATE(v, h, l) ((GENMASK(h, l) << 16) | ((v) << (l))) + /* + * the delay number of a window in different mode. + */ +@@ -38,6 +45,18 @@ enum vop2_scale_down_mode { + VOP2_SCALE_DOWN_AVG, + }; + ++/* ++ * vop2 internal power domain id, ++ * should be all none zero, 0 will be treat as invalid; ++ */ ++#define VOP2_PD_CLUSTER0 BIT(0) ++#define VOP2_PD_CLUSTER1 BIT(1) ++#define VOP2_PD_CLUSTER2 BIT(2) ++#define VOP2_PD_CLUSTER3 BIT(3) ++#define VOP2_PD_DSC_8K BIT(5) ++#define VOP2_PD_DSC_4K BIT(6) ++#define VOP2_PD_ESMART BIT(7) ++ + enum vop2_win_regs { + VOP2_WIN_ENABLE, + VOP2_WIN_FORMAT, +@@ -138,6 +157,7 @@ struct vop2_video_port_data { + + struct vop2_data { + u8 nr_vps; ++ u64 feature; + const struct vop2_win_data *win; + const struct vop2_video_port_data *vp; + struct vop_rect max_input; +@@ -192,6 +212,11 @@ enum dst_factor_mode { + }; + + #define RK3568_GRF_VO_CON1 0x0364 ++ ++#define RK3588_GRF_SOC_CON1 0x0304 ++#define RK3588_GRF_VOP_CON2 0x08 ++#define RK3588_GRF_VO1_CON0 0x00 ++ + /* System registers definition */ + #define RK3568_REG_CFG_DONE 0x000 + #define RK3568_VERSION_INFO 0x004 +@@ -200,6 +225,7 @@ enum dst_factor_mode { + #define RK3568_DSP_IF_EN 0x028 + #define RK3568_DSP_IF_CTRL 0x02c + #define RK3568_DSP_IF_POL 0x030 ++#define RK3588_SYS_PD_CTRL 0x034 + #define RK3568_WB_CTRL 0x40 + #define RK3568_WB_XSCAL_FACTOR 0x44 + #define RK3568_WB_YRGB_MST 0x48 +@@ -220,9 +246,14 @@ enum dst_factor_mode { + #define RK3568_VP_INT_RAW_STATUS(vp) (0xAC + (vp) * 0x10) + + /* Video Port registers definition */ ++#define RK3568_VP0_CTRL_BASE 0x0C00 ++#define RK3568_VP1_CTRL_BASE 0x0D00 ++#define RK3568_VP2_CTRL_BASE 0x0E00 ++#define RK3588_VP3_CTRL_BASE 0x0F00 + #define RK3568_VP_DSP_CTRL 0x00 + #define RK3568_VP_MIPI_CTRL 0x04 + #define RK3568_VP_COLOR_BAR_CTRL 0x08 ++#define RK3588_VP_CLK_CTRL 0x0C + #define RK3568_VP_3D_LUT_CTRL 0x10 + #define RK3568_VP_3D_LUT_MST 0x20 + #define RK3568_VP_DSP_BG 0x2C +@@ -264,6 +295,17 @@ enum dst_factor_mode { + #define RK3568_SMART_DLY_NUM 0x6F8 + + /* Cluster register definition, offset relative to window base */ ++#define RK3568_CLUSTER0_CTRL_BASE 0x1000 ++#define RK3568_CLUSTER1_CTRL_BASE 0x1200 ++#define RK3588_CLUSTER2_CTRL_BASE 0x1400 ++#define RK3588_CLUSTER3_CTRL_BASE 0x1600 ++#define RK3568_ESMART0_CTRL_BASE 0x1800 ++#define RK3568_ESMART1_CTRL_BASE 0x1A00 ++#define RK3568_SMART0_CTRL_BASE 0x1C00 ++#define RK3568_SMART1_CTRL_BASE 0x1E00 ++#define RK3588_ESMART2_CTRL_BASE 0x1C00 ++#define RK3588_ESMART3_CTRL_BASE 0x1E00 ++ + #define RK3568_CLUSTER_WIN_CTRL0 0x00 + #define RK3568_CLUSTER_WIN_CTRL1 0x04 + #define RK3568_CLUSTER_WIN_YRGB_MST 0x10 +@@ -357,13 +399,18 @@ enum dst_factor_mode { + #define RK3568_VP_DSP_CTRL__DITHER_DOWN_EN BIT(17) + #define RK3568_VP_DSP_CTRL__PRE_DITHER_DOWN_EN BIT(16) + #define RK3568_VP_DSP_CTRL__POST_DSP_OUT_R2Y BIT(15) ++#define RK3568_VP_DSP_CTRL__DSP_RG_SWAP BIT(10) + #define RK3568_VP_DSP_CTRL__DSP_RB_SWAP BIT(9) ++#define RK3568_VP_DSP_CTRL__DSP_BG_SWAP BIT(8) + #define RK3568_VP_DSP_CTRL__DSP_INTERLACE BIT(7) + #define RK3568_VP_DSP_CTRL__DSP_FILED_POL BIT(6) + #define RK3568_VP_DSP_CTRL__P2I_EN BIT(5) + #define RK3568_VP_DSP_CTRL__CORE_DCLK_DIV BIT(4) + #define RK3568_VP_DSP_CTRL__OUT_MODE GENMASK(3, 0) + ++#define RK3588_VP_CLK_CTRL__DCLK_OUT_DIV GENMASK(3, 2) ++#define RK3588_VP_CLK_CTRL__DCLK_CORE_DIV GENMASK(1, 0) ++ + #define RK3568_VP_POST_SCL_CTRL__VSCALEDOWN BIT(1) + #define RK3568_VP_POST_SCL_CTRL__HSCALEDOWN BIT(0) + +@@ -382,11 +429,37 @@ enum dst_factor_mode { + #define RK3568_SYS_DSP_INFACE_EN_HDMI BIT(1) + #define RK3568_SYS_DSP_INFACE_EN_RGB BIT(0) + ++#define RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX GENMASK(22, 21) ++#define RK3588_SYS_DSP_INFACE_EN_MIPI0_MUX GENMASK(20, 20) ++#define RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX GENMASK(19, 18) ++#define RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX GENMASK(17, 16) ++#define RK3588_SYS_DSP_INFACE_EN_DP1_MUX GENMASK(15, 14) ++#define RK3588_SYS_DSP_INFACE_EN_DP0_MUX GENMASK(13, 12) ++#define RK3588_SYS_DSP_INFACE_EN_DPI GENMASK(9, 8) ++#define RK3588_SYS_DSP_INFACE_EN_MIPI1 BIT(7) ++#define RK3588_SYS_DSP_INFACE_EN_MIPI0 BIT(6) ++#define RK3588_SYS_DSP_INFACE_EN_HDMI1 BIT(5) ++#define RK3588_SYS_DSP_INFACE_EN_EDP1 BIT(4) ++#define RK3588_SYS_DSP_INFACE_EN_HDMI0 BIT(3) ++#define RK3588_SYS_DSP_INFACE_EN_EDP0 BIT(2) ++#define RK3588_SYS_DSP_INFACE_EN_DP1 BIT(1) ++#define RK3588_SYS_DSP_INFACE_EN_DP0 BIT(0) ++ ++#define RK3588_DSP_IF_MIPI1_PCLK_DIV GENMASK(27, 26) ++#define RK3588_DSP_IF_MIPI0_PCLK_DIV GENMASK(25, 24) ++#define RK3588_DSP_IF_EDP_HDMI1_PCLK_DIV GENMASK(22, 22) ++#define RK3588_DSP_IF_EDP_HDMI1_DCLK_DIV GENMASK(21, 20) ++#define RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV GENMASK(18, 18) ++#define RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV GENMASK(17, 16) ++ + #define RK3568_DSP_IF_POL__MIPI_PIN_POL GENMASK(19, 16) + #define RK3568_DSP_IF_POL__EDP_PIN_POL GENMASK(15, 12) + #define RK3568_DSP_IF_POL__HDMI_PIN_POL GENMASK(7, 4) + #define RK3568_DSP_IF_POL__RGB_LVDS_PIN_POL GENMASK(3, 0) + ++#define RK3588_DSP_IF_POL__DP1_PIN_POL GENMASK(14, 12) ++#define RK3588_DSP_IF_POL__DP0_PIN_POL GENMASK(10, 8) ++ + #define RK3568_VP0_MIPI_CTRL__DCLK_DIV2_PHASE_LOCK BIT(5) + #define RK3568_VP0_MIPI_CTRL__DCLK_DIV2 BIT(4) + +@@ -408,8 +481,12 @@ enum dst_factor_mode { + #define RK3568_OVL_PORT_SEL__SEL_PORT GENMASK(31, 16) + #define RK3568_OVL_PORT_SEL__SMART1 GENMASK(31, 30) + #define RK3568_OVL_PORT_SEL__SMART0 GENMASK(29, 28) ++#define RK3588_OVL_PORT_SEL__ESMART3 GENMASK(31, 30) ++#define RK3588_OVL_PORT_SEL__ESMART2 GENMASK(29, 28) + #define RK3568_OVL_PORT_SEL__ESMART1 GENMASK(27, 26) + #define RK3568_OVL_PORT_SEL__ESMART0 GENMASK(25, 24) ++#define RK3588_OVL_PORT_SEL__CLUSTER3 GENMASK(23, 22) ++#define RK3588_OVL_PORT_SEL__CLUSTER2 GENMASK(21, 20) + #define RK3568_OVL_PORT_SEL__CLUSTER1 GENMASK(19, 18) + #define RK3568_OVL_PORT_SEL__CLUSTER0 GENMASK(17, 16) + #define RK3568_OVL_PORT_SET__PORT2_MUX GENMASK(11, 8) +@@ -422,6 +499,10 @@ enum dst_factor_mode { + #define RK3568_CLUSTER_DLY_NUM__CLUSTER0_1 GENMASK(15, 8) + #define RK3568_CLUSTER_DLY_NUM__CLUSTER0_0 GENMASK(7, 0) + ++#define RK3568_CLUSTER_WIN_CTRL0__WIN0_EN BIT(0) ++ ++#define RK3568_SMART_REGION0_CTRL__WIN0_EN BIT(0) ++ + #define RK3568_SMART_DLY_NUM__SMART1 GENMASK(31, 24) + #define RK3568_SMART_DLY_NUM__SMART0 GENMASK(23, 16) + #define RK3568_SMART_DLY_NUM__ESMART1 GENMASK(15, 8) +--- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c ++++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c +@@ -34,6 +34,30 @@ static const uint32_t formats_cluster[] + DRM_FORMAT_Y210, /* yuv422_10bit non-Linear mode only */ + }; + ++static const uint32_t formats_esmart[] = { ++ DRM_FORMAT_XRGB8888, ++ DRM_FORMAT_ARGB8888, ++ DRM_FORMAT_XBGR8888, ++ DRM_FORMAT_ABGR8888, ++ DRM_FORMAT_RGB888, ++ DRM_FORMAT_BGR888, ++ DRM_FORMAT_RGB565, ++ DRM_FORMAT_BGR565, ++ DRM_FORMAT_NV12, /* yuv420_8bit linear mode, 2 plane */ ++ DRM_FORMAT_NV21, /* yvu420_8bit linear mode, 2 plane */ ++ DRM_FORMAT_NV16, /* yuv422_8bit linear mode, 2 plane */ ++ DRM_FORMAT_NV61, /* yvu422_8bit linear mode, 2 plane */ ++ DRM_FORMAT_NV20, /* yuv422_10bit linear mode, 2 plane, no padding */ ++ DRM_FORMAT_NV24, /* yuv444_8bit linear mode, 2 plane */ ++ DRM_FORMAT_NV42, /* yvu444_8bit linear mode, 2 plane */ ++ DRM_FORMAT_NV30, /* yuv444_10bit linear mode, 2 plane, no padding */ ++ DRM_FORMAT_NV15, /* yuv420_10bit linear mode, 2 plane, no padding */ ++ DRM_FORMAT_YVYU, /* yuv422_8bit[YVYU] linear mode */ ++ DRM_FORMAT_VYUY, /* yuv422_8bit[VYUY] linear mode */ ++ DRM_FORMAT_YUYV, /* yuv422_8bit[YUYV] linear mode */ ++ DRM_FORMAT_UYVY, /* yuv422_8bit[UYVY] linear mode */ ++}; ++ + static const uint32_t formats_rk356x_esmart[] = { + DRM_FORMAT_XRGB8888, + DRM_FORMAT_ARGB8888, +@@ -236,7 +260,188 @@ static const struct vop2_win_data rk3568 + }, + }; + ++static const struct vop2_video_port_data rk3588_vop_video_ports[] = { ++ { ++ .id = 0, ++ .feature = VOP_FEATURE_OUTPUT_10BIT, ++ .gamma_lut_len = 1024, ++ .cubic_lut_len = 9 * 9 * 9, /* 9x9x9 */ ++ .max_output = { 4096, 2304 }, ++ /* hdr2sdr sdr2hdr hdr2hdr sdr2sdr */ ++ .pre_scan_max_dly = { 76, 65, 65, 54 }, ++ .offset = 0xc00, ++ }, { ++ .id = 1, ++ .feature = VOP_FEATURE_OUTPUT_10BIT, ++ .gamma_lut_len = 1024, ++ .cubic_lut_len = 729, /* 9x9x9 */ ++ .max_output = { 4096, 2304 }, ++ .pre_scan_max_dly = { 76, 65, 65, 54 }, ++ .offset = 0xd00, ++ }, { ++ .id = 2, ++ .feature = VOP_FEATURE_OUTPUT_10BIT, ++ .gamma_lut_len = 1024, ++ .cubic_lut_len = 17 * 17 * 17, /* 17x17x17 */ ++ .max_output = { 4096, 2304 }, ++ .pre_scan_max_dly = { 52, 52, 52, 52 }, ++ .offset = 0xe00, ++ }, { ++ .id = 3, ++ .gamma_lut_len = 1024, ++ .max_output = { 2048, 1536 }, ++ .pre_scan_max_dly = { 52, 52, 52, 52 }, ++ .offset = 0xf00, ++ }, ++}; ++ ++/* ++ * rk3588 vop with 4 cluster, 4 esmart win. ++ * Every cluster can work as 4K win or split into two win. ++ * All win in cluster support AFBCD. ++ * ++ * Every esmart win and smart win support 4 Multi-region. ++ * ++ * Scale filter mode: ++ * ++ * * Cluster: bicubic for horizontal scale up, others use bilinear ++ * * ESmart: ++ * * nearest-neighbor/bilinear/bicubic for scale up ++ * * nearest-neighbor/bilinear/average for scale down ++ * ++ * AXI Read ID assignment: ++ * Two AXI bus: ++ * AXI0 is a read/write bus with a higher performance. ++ * AXI1 is a read only bus. ++ * ++ * Every window on a AXI bus must assigned two unique ++ * read id(yrgb_id/uv_id, valid id are 0x1~0xe). ++ * ++ * AXI0: ++ * Cluster0/1, Esmart0/1, WriteBack ++ * ++ * AXI 1: ++ * Cluster2/3, Esmart2/3 ++ * ++ */ ++static const struct vop2_win_data rk3588_vop_win_data[] = { ++ { ++ .name = "Cluster0-win0", ++ .phys_id = ROCKCHIP_VOP2_CLUSTER0, ++ .base = 0x1000, ++ .formats = formats_cluster, ++ .nformats = ARRAY_SIZE(formats_cluster), ++ .format_modifiers = format_modifiers_afbc, ++ .layer_sel_id = 0, ++ .supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 | ++ DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y, ++ .max_upscale_factor = 4, ++ .max_downscale_factor = 4, ++ .dly = { 4, 26, 29 }, ++ .type = DRM_PLANE_TYPE_PRIMARY, ++ .feature = WIN_FEATURE_AFBDC | WIN_FEATURE_CLUSTER, ++ }, { ++ .name = "Cluster1-win0", ++ .phys_id = ROCKCHIP_VOP2_CLUSTER1, ++ .base = 0x1200, ++ .formats = formats_cluster, ++ .nformats = ARRAY_SIZE(formats_cluster), ++ .format_modifiers = format_modifiers_afbc, ++ .layer_sel_id = 1, ++ .supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 | ++ DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y, ++ .type = DRM_PLANE_TYPE_PRIMARY, ++ .max_upscale_factor = 4, ++ .max_downscale_factor = 4, ++ .dly = { 4, 26, 29 }, ++ .feature = WIN_FEATURE_AFBDC | WIN_FEATURE_CLUSTER, ++ }, { ++ .name = "Cluster2-win0", ++ .phys_id = ROCKCHIP_VOP2_CLUSTER2, ++ .base = 0x1400, ++ .formats = formats_cluster, ++ .nformats = ARRAY_SIZE(formats_cluster), ++ .format_modifiers = format_modifiers_afbc, ++ .layer_sel_id = 4, ++ .supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 | ++ DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y, ++ .type = DRM_PLANE_TYPE_PRIMARY, ++ .max_upscale_factor = 4, ++ .max_downscale_factor = 4, ++ .dly = { 4, 26, 29 }, ++ .feature = WIN_FEATURE_AFBDC | WIN_FEATURE_CLUSTER, ++ }, { ++ .name = "Cluster3-win0", ++ .phys_id = ROCKCHIP_VOP2_CLUSTER3, ++ .base = 0x1600, ++ .formats = formats_cluster, ++ .nformats = ARRAY_SIZE(formats_cluster), ++ .format_modifiers = format_modifiers_afbc, ++ .layer_sel_id = 5, ++ .supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 | ++ DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y, ++ .type = DRM_PLANE_TYPE_PRIMARY, ++ .max_upscale_factor = 4, ++ .max_downscale_factor = 4, ++ .dly = { 4, 26, 29 }, ++ .feature = WIN_FEATURE_AFBDC | WIN_FEATURE_CLUSTER, ++ }, { ++ .name = "Esmart0-win0", ++ .phys_id = ROCKCHIP_VOP2_ESMART0, ++ .formats = formats_esmart, ++ .nformats = ARRAY_SIZE(formats_esmart), ++ .format_modifiers = format_modifiers, ++ .base = 0x1800, ++ .layer_sel_id = 2, ++ .supported_rotations = DRM_MODE_REFLECT_Y, ++ .type = DRM_PLANE_TYPE_OVERLAY, ++ .max_upscale_factor = 8, ++ .max_downscale_factor = 8, ++ .dly = { 23, 45, 48 }, ++ }, { ++ .name = "Esmart1-win0", ++ .phys_id = ROCKCHIP_VOP2_ESMART1, ++ .formats = formats_esmart, ++ .nformats = ARRAY_SIZE(formats_esmart), ++ .format_modifiers = format_modifiers, ++ .base = 0x1a00, ++ .layer_sel_id = 3, ++ .supported_rotations = DRM_MODE_REFLECT_Y, ++ .type = DRM_PLANE_TYPE_OVERLAY, ++ .max_upscale_factor = 8, ++ .max_downscale_factor = 8, ++ .dly = { 23, 45, 48 }, ++ }, { ++ .name = "Esmart2-win0", ++ .phys_id = ROCKCHIP_VOP2_ESMART2, ++ .base = 0x1c00, ++ .formats = formats_esmart, ++ .nformats = ARRAY_SIZE(formats_esmart), ++ .format_modifiers = format_modifiers, ++ .layer_sel_id = 6, ++ .supported_rotations = DRM_MODE_REFLECT_Y, ++ .type = DRM_PLANE_TYPE_OVERLAY, ++ .max_upscale_factor = 8, ++ .max_downscale_factor = 8, ++ .dly = { 23, 45, 48 }, ++ }, { ++ .name = "Esmart3-win0", ++ .phys_id = ROCKCHIP_VOP2_ESMART3, ++ .formats = formats_esmart, ++ .nformats = ARRAY_SIZE(formats_esmart), ++ .format_modifiers = format_modifiers, ++ .base = 0x1e00, ++ .layer_sel_id = 7, ++ .supported_rotations = DRM_MODE_REFLECT_Y, ++ .type = DRM_PLANE_TYPE_OVERLAY, ++ .max_upscale_factor = 8, ++ .max_downscale_factor = 8, ++ .dly = { 23, 45, 48 }, ++ }, ++}; ++ + static const struct vop2_data rk3566_vop = { ++ .feature = VOP2_FEATURE_HAS_SYS_GRF, + .nr_vps = 3, + .max_input = { 4096, 2304 }, + .max_output = { 4096, 2304 }, +@@ -247,6 +452,7 @@ static const struct vop2_data rk3566_vop + }; + + static const struct vop2_data rk3568_vop = { ++ .feature = VOP2_FEATURE_HAS_SYS_GRF, + .nr_vps = 3, + .max_input = { 4096, 2304 }, + .max_output = { 4096, 2304 }, +@@ -256,6 +462,18 @@ static const struct vop2_data rk3568_vop + .soc_id = 3568, + }; + ++static const struct vop2_data rk3588_vop = { ++ .feature = VOP2_FEATURE_HAS_SYS_GRF | VOP2_FEATURE_HAS_VO1_GRF | ++ VOP2_FEATURE_HAS_VOP_GRF | VOP2_FEATURE_HAS_SYS_PMU, ++ .nr_vps = 4, ++ .max_input = { 4096, 4320 }, ++ .max_output = { 4096, 4320 }, ++ .vp = rk3588_vop_video_ports, ++ .win = rk3588_vop_win_data, ++ .win_size = ARRAY_SIZE(rk3588_vop_win_data), ++ .soc_id = 3588, ++}; ++ + static const struct of_device_id vop2_dt_match[] = { + { + .compatible = "rockchip,rk3566-vop", +@@ -264,6 +482,9 @@ static const struct of_device_id vop2_dt + .compatible = "rockchip,rk3568-vop", + .data = &rk3568_vop, + }, { ++ .compatible = "rockchip,rk3588-vop", ++ .data = &rk3588_vop ++ }, { + }, + }; + MODULE_DEVICE_TABLE(of, vop2_dt_match); diff --git a/patches-6.6/033-29-v6.8-drm-rockchip-vop2-rename-VOP_FEATURE_OUTPUT_10BIT-to.patch b/patches-6.6/033-29-v6.8-drm-rockchip-vop2-rename-VOP_FEATURE_OUTPUT_10BIT-to.patch new file mode 100644 index 0000000..1db6bda --- /dev/null +++ b/patches-6.6/033-29-v6.8-drm-rockchip-vop2-rename-VOP_FEATURE_OUTPUT_10BIT-to.patch @@ -0,0 +1,80 @@ +From 9d7fe7704d534c2d043aff2987f10671a8b4373d Mon Sep 17 00:00:00 2001 +From: Andy Yan +Date: Mon, 11 Dec 2023 19:59:31 +0800 +Subject: [PATCH] drm/rockchip: vop2: rename VOP_FEATURE_OUTPUT_10BIT to + VOP2_VP_FEATURE_OUTPUT_10BIT + +VOP2 has multiple independent video ports with different +feature, so rename VOP_FEATURE_OUTPUT_10BIT to +VOP2_VP_FEATURE_OUTPUT_10BIT for more clearly meaning. + +Signed-off-by: Andy Yan +Reviewed-by: Sascha Hauer +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20231211115931.1785495-1-andyshrk@163.com +--- + drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 2 +- + drivers/gpu/drm/rockchip/rockchip_drm_vop2.h | 2 +- + drivers/gpu/drm/rockchip/rockchip_vop2_reg.c | 8 ++++---- + 3 files changed, 6 insertions(+), 6 deletions(-) + +--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +@@ -1997,7 +1997,7 @@ static void vop2_crtc_atomic_enable(stru + return; + + if (vcstate->output_mode == ROCKCHIP_OUT_MODE_AAAA && +- !(vp_data->feature & VOP_FEATURE_OUTPUT_10BIT)) ++ !(vp_data->feature & VOP2_VP_FEATURE_OUTPUT_10BIT)) + out_mode = ROCKCHIP_OUT_MODE_P888; + else + out_mode = vcstate->output_mode; +--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h +@@ -11,7 +11,7 @@ + #include + #include "rockchip_drm_vop.h" + +-#define VOP_FEATURE_OUTPUT_10BIT BIT(0) ++#define VOP2_VP_FEATURE_OUTPUT_10BIT BIT(0) + + #define VOP2_FEATURE_HAS_SYS_GRF BIT(0) + #define VOP2_FEATURE_HAS_VO0_GRF BIT(1) +--- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c ++++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c +@@ -136,7 +136,7 @@ static const uint64_t format_modifiers_a + static const struct vop2_video_port_data rk3568_vop_video_ports[] = { + { + .id = 0, +- .feature = VOP_FEATURE_OUTPUT_10BIT, ++ .feature = VOP2_VP_FEATURE_OUTPUT_10BIT, + .gamma_lut_len = 1024, + .cubic_lut_len = 9 * 9 * 9, + .max_output = { 4096, 2304 }, +@@ -263,7 +263,7 @@ static const struct vop2_win_data rk3568 + static const struct vop2_video_port_data rk3588_vop_video_ports[] = { + { + .id = 0, +- .feature = VOP_FEATURE_OUTPUT_10BIT, ++ .feature = VOP2_VP_FEATURE_OUTPUT_10BIT, + .gamma_lut_len = 1024, + .cubic_lut_len = 9 * 9 * 9, /* 9x9x9 */ + .max_output = { 4096, 2304 }, +@@ -272,7 +272,7 @@ static const struct vop2_video_port_data + .offset = 0xc00, + }, { + .id = 1, +- .feature = VOP_FEATURE_OUTPUT_10BIT, ++ .feature = VOP2_VP_FEATURE_OUTPUT_10BIT, + .gamma_lut_len = 1024, + .cubic_lut_len = 729, /* 9x9x9 */ + .max_output = { 4096, 2304 }, +@@ -280,7 +280,7 @@ static const struct vop2_video_port_data + .offset = 0xd00, + }, { + .id = 2, +- .feature = VOP_FEATURE_OUTPUT_10BIT, ++ .feature = VOP2_VP_FEATURE_OUTPUT_10BIT, + .gamma_lut_len = 1024, + .cubic_lut_len = 17 * 17 * 17, /* 17x17x17 */ + .max_output = { 4096, 2304 }, diff --git a/patches-6.6/033-30-v6.8-drm-rockchip-vop2-Avoid-use-regmap_reinit_cache-at-runtim.patch b/patches-6.6/033-30-v6.8-drm-rockchip-vop2-Avoid-use-regmap_reinit_cache-at-runtim.patch new file mode 100644 index 0000000..9e691b0 --- /dev/null +++ b/patches-6.6/033-30-v6.8-drm-rockchip-vop2-Avoid-use-regmap_reinit_cache-at-runtim.patch @@ -0,0 +1,59 @@ +From 3ee348eb36f14e9303a7e9757efb91b0bbf3f7a9 Mon Sep 17 00:00:00 2001 +From: Andy Yan +Date: Sun, 17 Dec 2023 16:44:15 +0800 +Subject: [PATCH] drm/rockchip: vop2: Avoid use regmap_reinit_cache at runtime + +Marek Report a possible irq lock inversion dependency warning when +commit 81a06f1d02e5 ("Revert "drm/rockchip: vop2: Use regcache_sync() +to fix suspend/resume"") lands linux-next. + +I can reproduce this warning with: +CONFIG_PROVE_LOCKING=y +CONFIG_DEBUG_LOCKDEP=y + +It seems than when use regmap_reinit_cache at runtime whith Mark's +commit 3d59c22bbb8d ("drm/rockchip: vop2: Convert to use maple tree +register cache"), it will trigger a possible irq lock inversion dependency +warning. + +One solution is switch back to REGCACHE_RBTREE, but it seems that +REGCACHE_MAPLE is the future, so I avoid using regmap_reinit_cache, +and drop all the regcache when vop is disabled, then we get a fresh +start at next enbable time. + +Fixes: 81a06f1d02e5 ("Revert "drm/rockchip: vop2: Use regcache_sync() to fix suspend/resume"") +Reported-by: Marek Szyprowski +Closes: https://lore.kernel.org/all/98a9f15d-30ac-47bf-9b93-3aa2c9900f7b@samsung.com/ +Signed-off-by: Andy Yan +Tested-by: Marek Szyprowski +[dropped the large kernel log of the lockdep report from the message] +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20231217084415.2373043-1-andyshrk@163.com +--- + drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 8 ++------ + 1 file changed, 2 insertions(+), 6 deletions(-) + +--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +@@ -961,12 +961,6 @@ static void vop2_enable(struct vop2 *vop + return; + } + +- ret = regmap_reinit_cache(vop2->map, &vop2_regmap_config); +- if (ret) { +- drm_err(vop2->drm, "failed to reinit cache: %d\n", ret); +- return; +- } +- + if (vop2->data->soc_id == 3566) + vop2_writel(vop2, RK3568_OTP_WIN_EN, 1); + +@@ -998,6 +992,8 @@ static void vop2_disable(struct vop2 *vo + + pm_runtime_put_sync(vop2->dev); + ++ regcache_drop_region(vop2->map, 0, vop2_regmap_config.max_register); ++ + clk_disable_unprepare(vop2->pclk); + clk_disable_unprepare(vop2->aclk); + clk_disable_unprepare(vop2->hclk); diff --git a/patches-6.6/033-31-v6.8-drm-rockchip-vop2-clean-up-some-inconsistent-indenting.patch b/patches-6.6/033-31-v6.8-drm-rockchip-vop2-clean-up-some-inconsistent-indenting.patch new file mode 100644 index 0000000..6272c4e --- /dev/null +++ b/patches-6.6/033-31-v6.8-drm-rockchip-vop2-clean-up-some-inconsistent-indenting.patch @@ -0,0 +1,31 @@ +From f40e61eb538d35661d6dda1de92867954d776c4a Mon Sep 17 00:00:00 2001 +From: Jiapeng Chong +Date: Tue, 19 Dec 2023 14:26:35 +0800 +Subject: [PATCH] drm/rockchip: vop2: clean up some inconsistent indenting + +No functional modification involved. + +drivers/gpu/drm/rockchip/rockchip_drm_vop2.c:1708 rk3588_calc_cru_cfg() warn: inconsistent indenting. + +Reported-by: Abaci Robot +Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=7778 +Signed-off-by: Jiapeng Chong +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20231219062635.100718-1-jiapeng.chong@linux.alibaba.com +--- + drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +@@ -1703,8 +1703,8 @@ static unsigned long rk3588_calc_cru_cfg + * *if_pixclk_div = dclk_rate / if_pixclk_rate; + * *if_dclk_div = dclk_rate / if_dclk_rate; + */ +- *if_pixclk_div = 2; +- *if_dclk_div = 4; ++ *if_pixclk_div = 2; ++ *if_dclk_div = 4; + } else if (vop2_output_if_is_edp(id)) { + /* + * edp_pixclk = edp_dclk > dclk_core diff --git a/patches-6.6/033-32-v6.8-drm-rockchip-vop2-Drop-superfluous-include.patch b/patches-6.6/033-32-v6.8-drm-rockchip-vop2-Drop-superfluous-include.patch new file mode 100644 index 0000000..de34da6 --- /dev/null +++ b/patches-6.6/033-32-v6.8-drm-rockchip-vop2-Drop-superfluous-include.patch @@ -0,0 +1,25 @@ +From 38709af26c33e398c3292e96837ccfde41fd9e6b Mon Sep 17 00:00:00 2001 +From: Cristian Ciocaltea +Date: Thu, 4 Jan 2024 16:39:49 +0200 +Subject: [PATCH] drm/rockchip: vop2: Drop superfluous include + +The rockchip_drm_fb.h header contains just a single function which is +not directly used by the VOP2 driver. Drop the unnecessary include. + +Signed-off-by: Cristian Ciocaltea +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20240104143951.85219-1-cristian.ciocaltea@collabora.com +--- + drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +@@ -35,7 +35,6 @@ + + #include "rockchip_drm_drv.h" + #include "rockchip_drm_gem.h" +-#include "rockchip_drm_fb.h" + #include "rockchip_drm_vop2.h" + #include "rockchip_rgb.h" + diff --git a/patches-6.6/033-33-v6.8-drm-rockchip-vop2-Drop-unused-if_dclk_rate-variable.patch b/patches-6.6/033-33-v6.8-drm-rockchip-vop2-Drop-unused-if_dclk_rate-variable.patch new file mode 100644 index 0000000..16c4ada --- /dev/null +++ b/patches-6.6/033-33-v6.8-drm-rockchip-vop2-Drop-unused-if_dclk_rate-variable.patch @@ -0,0 +1,47 @@ +From 196da3f3f76a46905f7daab29c56974f1aba9a7a Mon Sep 17 00:00:00 2001 +From: Cristian Ciocaltea +Date: Fri, 5 Jan 2024 19:40:06 +0200 +Subject: [PATCH] drm/rockchip: vop2: Drop unused if_dclk_rate variable +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Commit 5a028e8f062f ("drm/rockchip: vop2: Add support for rk3588") +introduced a variable which ended up being unused: + +rockchip_drm_vop2.c:1688:23: warning: variable ‘if_dclk_rate’ set but not used [-Wunused-but-set-variable] + +This has been initially used as part of a formula to compute the clock +dividers, but eventually it has been replaced by static values. + +Drop the variable declaration and move its assignment to the comment +block, to serve as documentation of how the constants have been +generated. + +Signed-off-by: Cristian Ciocaltea +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20240105174007.98054-1-cristian.ciocaltea@collabora.com +--- + drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +@@ -1682,7 +1682,6 @@ static unsigned long rk3588_calc_cru_cfg + unsigned long dclk_core_rate = v_pixclk >> 2; + unsigned long dclk_rate = v_pixclk; + unsigned long dclk_out_rate; +- unsigned long if_dclk_rate; + unsigned long if_pixclk_rate; + int K = 1; + +@@ -1697,8 +1696,8 @@ static unsigned long rk3588_calc_cru_cfg + } + + if_pixclk_rate = (dclk_core_rate << 1) / K; +- if_dclk_rate = dclk_core_rate / K; + /* ++ * if_dclk_rate = dclk_core_rate / K; + * *if_pixclk_div = dclk_rate / if_pixclk_rate; + * *if_dclk_div = dclk_rate / if_dclk_rate; + */ diff --git a/patches-6.6/033-34-v6.8-drm-rockchip-vop2-add-a-missing-unlock-in.patch b/patches-6.6/033-34-v6.8-drm-rockchip-vop2-add-a-missing-unlock-in.patch new file mode 100644 index 0000000..5316851 --- /dev/null +++ b/patches-6.6/033-34-v6.8-drm-rockchip-vop2-add-a-missing-unlock-in.patch @@ -0,0 +1,31 @@ +From b6ddaa63f728d26c12048aed76be99c24f435c41 Mon Sep 17 00:00:00 2001 +From: Harshit Mogalapalli +Date: Fri, 19 Jan 2024 11:08:40 -0800 +Subject: [PATCH] drm/rockchip: vop2: add a missing unlock in + vop2_crtc_atomic_enable() + +Unlock before returning on the error path. + +Fixes: 5a028e8f062f ("drm/rockchip: vop2: Add support for rk3588") +Signed-off-by: Harshit Mogalapalli +Reviewed-by: Sascha Hauer +Signed-off-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20240119190841.1619443-1-harshit.m.mogalapalli@oracle.com +--- + drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c ++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +@@ -1987,8 +1987,10 @@ static void vop2_crtc_atomic_enable(stru + clock = vop2_set_intf_mux(vp, rkencoder->crtc_endpoint_id, polflags); + } + +- if (!clock) ++ if (!clock) { ++ vop2_unlock(vop2); + return; ++ } + + if (vcstate->output_mode == ROCKCHIP_OUT_MODE_AAAA && + !(vp_data->feature & VOP2_VP_FEATURE_OUTPUT_10BIT)) diff --git a/patches-6.6/034-01-v6.7-drm-gpuvm-rename-struct-drm_gpuva_manager-to-struct.patch b/patches-6.6/034-01-v6.7-drm-gpuvm-rename-struct-drm_gpuva_manager-to-struct.patch new file mode 100644 index 0000000..72a2eb9 --- /dev/null +++ b/patches-6.6/034-01-v6.7-drm-gpuvm-rename-struct-drm_gpuva_manager-to-struct.patch @@ -0,0 +1,5137 @@ +From f72c2db47080523d5e0f3c20846c96ed31c35648 Mon Sep 17 00:00:00 2001 +From: Danilo Krummrich +Date: Wed, 20 Sep 2023 16:42:34 +0200 +Subject: [PATCH] drm/gpuvm: rename struct drm_gpuva_manager to struct + drm_gpuvm +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Rename struct drm_gpuva_manager to struct drm_gpuvm including +corresponding functions. This way the GPUVA manager's structures align +very well with the documentation of VM_BIND [1] and VM_BIND locking [2]. + +It also provides a better foundation for the naming of data structures +and functions introduced for implementing a common dma-resv per GPU-VM +including tracking of external and evicted objects in subsequent +patches. + +[1] Documentation/gpu/drm-vm-bind-async.rst +[2] Documentation/gpu/drm-vm-bind-locking.rst + +Cc: Thomas Hellström +Cc: Matthew Brost +Acked-by: Dave Airlie +Acked-by: Christian König +Signed-off-by: Danilo Krummrich +Link: https://patchwork.freedesktop.org/patch/msgid/20230920144343.64830-2-dakr@redhat.com +--- + drivers/gpu/drm/Makefile | 2 +- + drivers/gpu/drm/drm_debugfs.c | 16 +- + .../gpu/drm/{drm_gpuva_mgr.c => drm_gpuvm.c} | 404 +++++++++--------- + drivers/gpu/drm/nouveau/nouveau_exec.c | 2 +- + drivers/gpu/drm/nouveau/nouveau_uvmm.c | 28 +- + drivers/gpu/drm/nouveau/nouveau_uvmm.h | 6 +- + include/drm/drm_debugfs.h | 6 +- + include/drm/{drm_gpuva_mgr.h => drm_gpuvm.h} | 155 ++++--- + 8 files changed, 309 insertions(+), 310 deletions(-) + rename drivers/gpu/drm/{drm_gpuva_mgr.c => drm_gpuvm.c} (78%) + rename include/drm/{drm_gpuva_mgr.h => drm_gpuvm.h} (78%) + +--- a/drivers/gpu/drm/Makefile ++++ b/drivers/gpu/drm/Makefile +@@ -45,7 +45,7 @@ drm-y := \ + drm_vblank.o \ + drm_vblank_work.o \ + drm_vma_manager.o \ +- drm_gpuva_mgr.o \ ++ drm_gpuvm.o \ + drm_writeback.o + drm-$(CONFIG_DRM_LEGACY) += \ + drm_agpsupport.o \ +--- a/drivers/gpu/drm/drm_debugfs.c ++++ b/drivers/gpu/drm/drm_debugfs.c +@@ -40,7 +40,7 @@ + #include + #include + #include +-#include ++#include + + #include "drm_crtc_internal.h" + #include "drm_internal.h" +@@ -182,31 +182,31 @@ static const struct file_operations drm_ + /** + * drm_debugfs_gpuva_info - dump the given DRM GPU VA space + * @m: pointer to the &seq_file to write +- * @mgr: the &drm_gpuva_manager representing the GPU VA space ++ * @gpuvm: the &drm_gpuvm representing the GPU VA space + * + * Dumps the GPU VA mappings of a given DRM GPU VA manager. + * + * For each DRM GPU VA space drivers should call this function from their + * &drm_info_list's show callback. + * +- * Returns: 0 on success, -ENODEV if the &mgr is not initialized ++ * Returns: 0 on success, -ENODEV if the &gpuvm is not initialized + */ + int drm_debugfs_gpuva_info(struct seq_file *m, +- struct drm_gpuva_manager *mgr) ++ struct drm_gpuvm *gpuvm) + { +- struct drm_gpuva *va, *kva = &mgr->kernel_alloc_node; ++ struct drm_gpuva *va, *kva = &gpuvm->kernel_alloc_node; + +- if (!mgr->name) ++ if (!gpuvm->name) + return -ENODEV; + + seq_printf(m, "DRM GPU VA space (%s) [0x%016llx;0x%016llx]\n", +- mgr->name, mgr->mm_start, mgr->mm_start + mgr->mm_range); ++ gpuvm->name, gpuvm->mm_start, gpuvm->mm_start + gpuvm->mm_range); + seq_printf(m, "Kernel reserved node [0x%016llx;0x%016llx]\n", + kva->va.addr, kva->va.addr + kva->va.range); + seq_puts(m, "\n"); + seq_puts(m, " VAs | start | range | end | object | object offset\n"); + seq_puts(m, "-------------------------------------------------------------------------------------------------------------\n"); +- drm_gpuva_for_each_va(va, mgr) { ++ drm_gpuvm_for_each_va(va, gpuvm) { + if (unlikely(va == kva)) + continue; + +--- a/drivers/gpu/drm/drm_gpuva_mgr.c ++++ /dev/null +@@ -1,1723 +0,0 @@ +-// SPDX-License-Identifier: GPL-2.0-only +-/* +- * Copyright (c) 2022 Red Hat. +- * +- * Permission is hereby granted, free of charge, to any person obtaining a +- * copy of this software and associated documentation files (the "Software"), +- * to deal in the Software without restriction, including without limitation +- * the rights to use, copy, modify, merge, publish, distribute, sublicense, +- * and/or sell copies of the Software, and to permit persons to whom the +- * Software is furnished to do so, subject to the following conditions: +- * +- * The above copyright notice and this permission notice shall be included in +- * all copies or substantial portions of the Software. +- * +- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +- * OTHER DEALINGS IN THE SOFTWARE. +- * +- * Authors: +- * Danilo Krummrich +- * +- */ +- +-#include +- +-#include +-#include +- +-/** +- * DOC: Overview +- * +- * The DRM GPU VA Manager, represented by struct drm_gpuva_manager keeps track +- * of a GPU's virtual address (VA) space and manages the corresponding virtual +- * mappings represented by &drm_gpuva objects. It also keeps track of the +- * mapping's backing &drm_gem_object buffers. +- * +- * &drm_gem_object buffers maintain a list of &drm_gpuva objects representing +- * all existent GPU VA mappings using this &drm_gem_object as backing buffer. +- * +- * GPU VAs can be flagged as sparse, such that drivers may use GPU VAs to also +- * keep track of sparse PTEs in order to support Vulkan 'Sparse Resources'. +- * +- * The GPU VA manager internally uses a rb-tree to manage the +- * &drm_gpuva mappings within a GPU's virtual address space. +- * +- * The &drm_gpuva_manager contains a special &drm_gpuva representing the +- * portion of VA space reserved by the kernel. This node is initialized together +- * with the GPU VA manager instance and removed when the GPU VA manager is +- * destroyed. +- * +- * In a typical application drivers would embed struct drm_gpuva_manager and +- * struct drm_gpuva within their own driver specific structures, there won't be +- * any memory allocations of its own nor memory allocations of &drm_gpuva +- * entries. +- * +- * The data structures needed to store &drm_gpuvas within the &drm_gpuva_manager +- * are contained within struct drm_gpuva already. Hence, for inserting +- * &drm_gpuva entries from within dma-fence signalling critical sections it is +- * enough to pre-allocate the &drm_gpuva structures. +- */ +- +-/** +- * DOC: Split and Merge +- * +- * Besides its capability to manage and represent a GPU VA space, the +- * &drm_gpuva_manager also provides functions to let the &drm_gpuva_manager +- * calculate a sequence of operations to satisfy a given map or unmap request. +- * +- * Therefore the DRM GPU VA manager provides an algorithm implementing splitting +- * and merging of existent GPU VA mappings with the ones that are requested to +- * be mapped or unmapped. This feature is required by the Vulkan API to +- * implement Vulkan 'Sparse Memory Bindings' - drivers UAPIs often refer to this +- * as VM BIND. +- * +- * Drivers can call drm_gpuva_sm_map() to receive a sequence of callbacks +- * containing map, unmap and remap operations for a given newly requested +- * mapping. The sequence of callbacks represents the set of operations to +- * execute in order to integrate the new mapping cleanly into the current state +- * of the GPU VA space. +- * +- * Depending on how the new GPU VA mapping intersects with the existent mappings +- * of the GPU VA space the &drm_gpuva_fn_ops callbacks contain an arbitrary +- * amount of unmap operations, a maximum of two remap operations and a single +- * map operation. The caller might receive no callback at all if no operation is +- * required, e.g. if the requested mapping already exists in the exact same way. +- * +- * The single map operation represents the original map operation requested by +- * the caller. +- * +- * &drm_gpuva_op_unmap contains a 'keep' field, which indicates whether the +- * &drm_gpuva to unmap is physically contiguous with the original mapping +- * request. Optionally, if 'keep' is set, drivers may keep the actual page table +- * entries for this &drm_gpuva, adding the missing page table entries only and +- * update the &drm_gpuva_manager's view of things accordingly. +- * +- * Drivers may do the same optimization, namely delta page table updates, also +- * for remap operations. This is possible since &drm_gpuva_op_remap consists of +- * one unmap operation and one or two map operations, such that drivers can +- * derive the page table update delta accordingly. +- * +- * Note that there can't be more than two existent mappings to split up, one at +- * the beginning and one at the end of the new mapping, hence there is a +- * maximum of two remap operations. +- * +- * Analogous to drm_gpuva_sm_map() drm_gpuva_sm_unmap() uses &drm_gpuva_fn_ops +- * to call back into the driver in order to unmap a range of GPU VA space. The +- * logic behind this function is way simpler though: For all existent mappings +- * enclosed by the given range unmap operations are created. For mappings which +- * are only partically located within the given range, remap operations are +- * created such that those mappings are split up and re-mapped partically. +- * +- * As an alternative to drm_gpuva_sm_map() and drm_gpuva_sm_unmap(), +- * drm_gpuva_sm_map_ops_create() and drm_gpuva_sm_unmap_ops_create() can be used +- * to directly obtain an instance of struct drm_gpuva_ops containing a list of +- * &drm_gpuva_op, which can be iterated with drm_gpuva_for_each_op(). This list +- * contains the &drm_gpuva_ops analogous to the callbacks one would receive when +- * calling drm_gpuva_sm_map() or drm_gpuva_sm_unmap(). While this way requires +- * more memory (to allocate the &drm_gpuva_ops), it provides drivers a way to +- * iterate the &drm_gpuva_op multiple times, e.g. once in a context where memory +- * allocations are possible (e.g. to allocate GPU page tables) and once in the +- * dma-fence signalling critical path. +- * +- * To update the &drm_gpuva_manager's view of the GPU VA space +- * drm_gpuva_insert() and drm_gpuva_remove() may be used. These functions can +- * safely be used from &drm_gpuva_fn_ops callbacks originating from +- * drm_gpuva_sm_map() or drm_gpuva_sm_unmap(). However, it might be more +- * convenient to use the provided helper functions drm_gpuva_map(), +- * drm_gpuva_remap() and drm_gpuva_unmap() instead. +- * +- * The following diagram depicts the basic relationships of existent GPU VA +- * mappings, a newly requested mapping and the resulting mappings as implemented +- * by drm_gpuva_sm_map() - it doesn't cover any arbitrary combinations of these. +- * +- * 1) Requested mapping is identical. Replace it, but indicate the backing PTEs +- * could be kept. +- * +- * :: +- * +- * 0 a 1 +- * old: |-----------| (bo_offset=n) +- * +- * 0 a 1 +- * req: |-----------| (bo_offset=n) +- * +- * 0 a 1 +- * new: |-----------| (bo_offset=n) +- * +- * +- * 2) Requested mapping is identical, except for the BO offset, hence replace +- * the mapping. +- * +- * :: +- * +- * 0 a 1 +- * old: |-----------| (bo_offset=n) +- * +- * 0 a 1 +- * req: |-----------| (bo_offset=m) +- * +- * 0 a 1 +- * new: |-----------| (bo_offset=m) +- * +- * +- * 3) Requested mapping is identical, except for the backing BO, hence replace +- * the mapping. +- * +- * :: +- * +- * 0 a 1 +- * old: |-----------| (bo_offset=n) +- * +- * 0 b 1 +- * req: |-----------| (bo_offset=n) +- * +- * 0 b 1 +- * new: |-----------| (bo_offset=n) +- * +- * +- * 4) Existent mapping is a left aligned subset of the requested one, hence +- * replace the existent one. +- * +- * :: +- * +- * 0 a 1 +- * old: |-----| (bo_offset=n) +- * +- * 0 a 2 +- * req: |-----------| (bo_offset=n) +- * +- * 0 a 2 +- * new: |-----------| (bo_offset=n) +- * +- * .. note:: +- * We expect to see the same result for a request with a different BO +- * and/or non-contiguous BO offset. +- * +- * +- * 5) Requested mapping's range is a left aligned subset of the existent one, +- * but backed by a different BO. Hence, map the requested mapping and split +- * the existent one adjusting its BO offset. +- * +- * :: +- * +- * 0 a 2 +- * old: |-----------| (bo_offset=n) +- * +- * 0 b 1 +- * req: |-----| (bo_offset=n) +- * +- * 0 b 1 a' 2 +- * new: |-----|-----| (b.bo_offset=n, a.bo_offset=n+1) +- * +- * .. note:: +- * We expect to see the same result for a request with a different BO +- * and/or non-contiguous BO offset. +- * +- * +- * 6) Existent mapping is a superset of the requested mapping. Split it up, but +- * indicate that the backing PTEs could be kept. +- * +- * :: +- * +- * 0 a 2 +- * old: |-----------| (bo_offset=n) +- * +- * 0 a 1 +- * req: |-----| (bo_offset=n) +- * +- * 0 a 1 a' 2 +- * new: |-----|-----| (a.bo_offset=n, a'.bo_offset=n+1) +- * +- * +- * 7) Requested mapping's range is a right aligned subset of the existent one, +- * but backed by a different BO. Hence, map the requested mapping and split +- * the existent one, without adjusting the BO offset. +- * +- * :: +- * +- * 0 a 2 +- * old: |-----------| (bo_offset=n) +- * +- * 1 b 2 +- * req: |-----| (bo_offset=m) +- * +- * 0 a 1 b 2 +- * new: |-----|-----| (a.bo_offset=n,b.bo_offset=m) +- * +- * +- * 8) Existent mapping is a superset of the requested mapping. Split it up, but +- * indicate that the backing PTEs could be kept. +- * +- * :: +- * +- * 0 a 2 +- * old: |-----------| (bo_offset=n) +- * +- * 1 a 2 +- * req: |-----| (bo_offset=n+1) +- * +- * 0 a' 1 a 2 +- * new: |-----|-----| (a'.bo_offset=n, a.bo_offset=n+1) +- * +- * +- * 9) Existent mapping is overlapped at the end by the requested mapping backed +- * by a different BO. Hence, map the requested mapping and split up the +- * existent one, without adjusting the BO offset. +- * +- * :: +- * +- * 0 a 2 +- * old: |-----------| (bo_offset=n) +- * +- * 1 b 3 +- * req: |-----------| (bo_offset=m) +- * +- * 0 a 1 b 3 +- * new: |-----|-----------| (a.bo_offset=n,b.bo_offset=m) +- * +- * +- * 10) Existent mapping is overlapped by the requested mapping, both having the +- * same backing BO with a contiguous offset. Indicate the backing PTEs of +- * the old mapping could be kept. +- * +- * :: +- * +- * 0 a 2 +- * old: |-----------| (bo_offset=n) +- * +- * 1 a 3 +- * req: |-----------| (bo_offset=n+1) +- * +- * 0 a' 1 a 3 +- * new: |-----|-----------| (a'.bo_offset=n, a.bo_offset=n+1) +- * +- * +- * 11) Requested mapping's range is a centered subset of the existent one +- * having a different backing BO. Hence, map the requested mapping and split +- * up the existent one in two mappings, adjusting the BO offset of the right +- * one accordingly. +- * +- * :: +- * +- * 0 a 3 +- * old: |-----------------| (bo_offset=n) +- * +- * 1 b 2 +- * req: |-----| (bo_offset=m) +- * +- * 0 a 1 b 2 a' 3 +- * new: |-----|-----|-----| (a.bo_offset=n,b.bo_offset=m,a'.bo_offset=n+2) +- * +- * +- * 12) Requested mapping is a contiguous subset of the existent one. Split it +- * up, but indicate that the backing PTEs could be kept. +- * +- * :: +- * +- * 0 a 3 +- * old: |-----------------| (bo_offset=n) +- * +- * 1 a 2 +- * req: |-----| (bo_offset=n+1) +- * +- * 0 a' 1 a 2 a'' 3 +- * old: |-----|-----|-----| (a'.bo_offset=n, a.bo_offset=n+1, a''.bo_offset=n+2) +- * +- * +- * 13) Existent mapping is a right aligned subset of the requested one, hence +- * replace the existent one. +- * +- * :: +- * +- * 1 a 2 +- * old: |-----| (bo_offset=n+1) +- * +- * 0 a 2 +- * req: |-----------| (bo_offset=n) +- * +- * 0 a 2 +- * new: |-----------| (bo_offset=n) +- * +- * .. note:: +- * We expect to see the same result for a request with a different bo +- * and/or non-contiguous bo_offset. +- * +- * +- * 14) Existent mapping is a centered subset of the requested one, hence +- * replace the existent one. +- * +- * :: +- * +- * 1 a 2 +- * old: |-----| (bo_offset=n+1) +- * +- * 0 a 3 +- * req: |----------------| (bo_offset=n) +- * +- * 0 a 3 +- * new: |----------------| (bo_offset=n) +- * +- * .. note:: +- * We expect to see the same result for a request with a different bo +- * and/or non-contiguous bo_offset. +- * +- * +- * 15) Existent mappings is overlapped at the beginning by the requested mapping +- * backed by a different BO. Hence, map the requested mapping and split up +- * the existent one, adjusting its BO offset accordingly. +- * +- * :: +- * +- * 1 a 3 +- * old: |-----------| (bo_offset=n) +- * +- * 0 b 2 +- * req: |-----------| (bo_offset=m) +- * +- * 0 b 2 a' 3 +- * new: |-----------|-----| (b.bo_offset=m,a.bo_offset=n+2) +- */ +- +-/** +- * DOC: Locking +- * +- * Generally, the GPU VA manager does not take care of locking itself, it is +- * the drivers responsibility to take care about locking. Drivers might want to +- * protect the following operations: inserting, removing and iterating +- * &drm_gpuva objects as well as generating all kinds of operations, such as +- * split / merge or prefetch. +- * +- * The GPU VA manager also does not take care of the locking of the backing +- * &drm_gem_object buffers GPU VA lists by itself; drivers are responsible to +- * enforce mutual exclusion using either the GEMs dma_resv lock or alternatively +- * a driver specific external lock. For the latter see also +- * drm_gem_gpuva_set_lock(). +- * +- * However, the GPU VA manager contains lockdep checks to ensure callers of its +- * API hold the corresponding lock whenever the &drm_gem_objects GPU VA list is +- * accessed by functions such as drm_gpuva_link() or drm_gpuva_unlink(). +- */ +- +-/** +- * DOC: Examples +- * +- * This section gives two examples on how to let the DRM GPUVA Manager generate +- * &drm_gpuva_op in order to satisfy a given map or unmap request and how to +- * make use of them. +- * +- * The below code is strictly limited to illustrate the generic usage pattern. +- * To maintain simplicitly, it doesn't make use of any abstractions for common +- * code, different (asyncronous) stages with fence signalling critical paths, +- * any other helpers or error handling in terms of freeing memory and dropping +- * previously taken locks. +- * +- * 1) Obtain a list of &drm_gpuva_op to create a new mapping:: +- * +- * // Allocates a new &drm_gpuva. +- * struct drm_gpuva * driver_gpuva_alloc(void); +- * +- * // Typically drivers would embedd the &drm_gpuva_manager and &drm_gpuva +- * // structure in individual driver structures and lock the dma-resv with +- * // drm_exec or similar helpers. +- * int driver_mapping_create(struct drm_gpuva_manager *mgr, +- * u64 addr, u64 range, +- * struct drm_gem_object *obj, u64 offset) +- * { +- * struct drm_gpuva_ops *ops; +- * struct drm_gpuva_op *op +- * +- * driver_lock_va_space(); +- * ops = drm_gpuva_sm_map_ops_create(mgr, addr, range, +- * obj, offset); +- * if (IS_ERR(ops)) +- * return PTR_ERR(ops); +- * +- * drm_gpuva_for_each_op(op, ops) { +- * struct drm_gpuva *va; +- * +- * switch (op->op) { +- * case DRM_GPUVA_OP_MAP: +- * va = driver_gpuva_alloc(); +- * if (!va) +- * ; // unwind previous VA space updates, +- * // free memory and unlock +- * +- * driver_vm_map(); +- * drm_gpuva_map(mgr, va, &op->map); +- * drm_gpuva_link(va); +- * +- * break; +- * case DRM_GPUVA_OP_REMAP: { +- * struct drm_gpuva *prev = NULL, *next = NULL; +- * +- * va = op->remap.unmap->va; +- * +- * if (op->remap.prev) { +- * prev = driver_gpuva_alloc(); +- * if (!prev) +- * ; // unwind previous VA space +- * // updates, free memory and +- * // unlock +- * } +- * +- * if (op->remap.next) { +- * next = driver_gpuva_alloc(); +- * if (!next) +- * ; // unwind previous VA space +- * // updates, free memory and +- * // unlock +- * } +- * +- * driver_vm_remap(); +- * drm_gpuva_remap(prev, next, &op->remap); +- * +- * drm_gpuva_unlink(va); +- * if (prev) +- * drm_gpuva_link(prev); +- * if (next) +- * drm_gpuva_link(next); +- * +- * break; +- * } +- * case DRM_GPUVA_OP_UNMAP: +- * va = op->unmap->va; +- * +- * driver_vm_unmap(); +- * drm_gpuva_unlink(va); +- * drm_gpuva_unmap(&op->unmap); +- * +- * break; +- * default: +- * break; +- * } +- * } +- * driver_unlock_va_space(); +- * +- * return 0; +- * } +- * +- * 2) Receive a callback for each &drm_gpuva_op to create a new mapping:: +- * +- * struct driver_context { +- * struct drm_gpuva_manager *mgr; +- * struct drm_gpuva *new_va; +- * struct drm_gpuva *prev_va; +- * struct drm_gpuva *next_va; +- * }; +- * +- * // ops to pass to drm_gpuva_manager_init() +- * static const struct drm_gpuva_fn_ops driver_gpuva_ops = { +- * .sm_step_map = driver_gpuva_map, +- * .sm_step_remap = driver_gpuva_remap, +- * .sm_step_unmap = driver_gpuva_unmap, +- * }; +- * +- * // Typically drivers would embedd the &drm_gpuva_manager and &drm_gpuva +- * // structure in individual driver structures and lock the dma-resv with +- * // drm_exec or similar helpers. +- * int driver_mapping_create(struct drm_gpuva_manager *mgr, +- * u64 addr, u64 range, +- * struct drm_gem_object *obj, u64 offset) +- * { +- * struct driver_context ctx; +- * struct drm_gpuva_ops *ops; +- * struct drm_gpuva_op *op; +- * int ret = 0; +- * +- * ctx.mgr = mgr; +- * +- * ctx.new_va = kzalloc(sizeof(*ctx.new_va), GFP_KERNEL); +- * ctx.prev_va = kzalloc(sizeof(*ctx.prev_va), GFP_KERNEL); +- * ctx.next_va = kzalloc(sizeof(*ctx.next_va), GFP_KERNEL); +- * if (!ctx.new_va || !ctx.prev_va || !ctx.next_va) { +- * ret = -ENOMEM; +- * goto out; +- * } +- * +- * driver_lock_va_space(); +- * ret = drm_gpuva_sm_map(mgr, &ctx, addr, range, obj, offset); +- * driver_unlock_va_space(); +- * +- * out: +- * kfree(ctx.new_va); +- * kfree(ctx.prev_va); +- * kfree(ctx.next_va); +- * return ret; +- * } +- * +- * int driver_gpuva_map(struct drm_gpuva_op *op, void *__ctx) +- * { +- * struct driver_context *ctx = __ctx; +- * +- * drm_gpuva_map(ctx->mgr, ctx->new_va, &op->map); +- * +- * drm_gpuva_link(ctx->new_va); +- * +- * // prevent the new GPUVA from being freed in +- * // driver_mapping_create() +- * ctx->new_va = NULL; +- * +- * return 0; +- * } +- * +- * int driver_gpuva_remap(struct drm_gpuva_op *op, void *__ctx) +- * { +- * struct driver_context *ctx = __ctx; +- * +- * drm_gpuva_remap(ctx->prev_va, ctx->next_va, &op->remap); +- * +- * drm_gpuva_unlink(op->remap.unmap->va); +- * kfree(op->remap.unmap->va); +- * +- * if (op->remap.prev) { +- * drm_gpuva_link(ctx->prev_va); +- * ctx->prev_va = NULL; +- * } +- * +- * if (op->remap.next) { +- * drm_gpuva_link(ctx->next_va); +- * ctx->next_va = NULL; +- * } +- * +- * return 0; +- * } +- * +- * int driver_gpuva_unmap(struct drm_gpuva_op *op, void *__ctx) +- * { +- * drm_gpuva_unlink(op->unmap.va); +- * drm_gpuva_unmap(&op->unmap); +- * kfree(op->unmap.va); +- * +- * return 0; +- * } +- */ +- +-#define to_drm_gpuva(__node) container_of((__node), struct drm_gpuva, rb.node) +- +-#define GPUVA_START(node) ((node)->va.addr) +-#define GPUVA_LAST(node) ((node)->va.addr + (node)->va.range - 1) +- +-/* We do not actually use drm_gpuva_it_next(), tell the compiler to not complain +- * about this. +- */ +-INTERVAL_TREE_DEFINE(struct drm_gpuva, rb.node, u64, rb.__subtree_last, +- GPUVA_START, GPUVA_LAST, static __maybe_unused, +- drm_gpuva_it) +- +-static int __drm_gpuva_insert(struct drm_gpuva_manager *mgr, +- struct drm_gpuva *va); +-static void __drm_gpuva_remove(struct drm_gpuva *va); +- +-static bool +-drm_gpuva_check_overflow(u64 addr, u64 range) +-{ +- u64 end; +- +- return WARN(check_add_overflow(addr, range, &end), +- "GPUVA address limited to %zu bytes.\n", sizeof(end)); +-} +- +-static bool +-drm_gpuva_in_mm_range(struct drm_gpuva_manager *mgr, u64 addr, u64 range) +-{ +- u64 end = addr + range; +- u64 mm_start = mgr->mm_start; +- u64 mm_end = mm_start + mgr->mm_range; +- +- return addr >= mm_start && end <= mm_end; +-} +- +-static bool +-drm_gpuva_in_kernel_node(struct drm_gpuva_manager *mgr, u64 addr, u64 range) +-{ +- u64 end = addr + range; +- u64 kstart = mgr->kernel_alloc_node.va.addr; +- u64 krange = mgr->kernel_alloc_node.va.range; +- u64 kend = kstart + krange; +- +- return krange && addr < kend && kstart < end; +-} +- +-static bool +-drm_gpuva_range_valid(struct drm_gpuva_manager *mgr, +- u64 addr, u64 range) +-{ +- return !drm_gpuva_check_overflow(addr, range) && +- drm_gpuva_in_mm_range(mgr, addr, range) && +- !drm_gpuva_in_kernel_node(mgr, addr, range); +-} +- +-/** +- * drm_gpuva_manager_init() - initialize a &drm_gpuva_manager +- * @mgr: pointer to the &drm_gpuva_manager to initialize +- * @name: the name of the GPU VA space +- * @start_offset: the start offset of the GPU VA space +- * @range: the size of the GPU VA space +- * @reserve_offset: the start of the kernel reserved GPU VA area +- * @reserve_range: the size of the kernel reserved GPU VA area +- * @ops: &drm_gpuva_fn_ops called on &drm_gpuva_sm_map / &drm_gpuva_sm_unmap +- * +- * The &drm_gpuva_manager must be initialized with this function before use. +- * +- * Note that @mgr must be cleared to 0 before calling this function. The given +- * &name is expected to be managed by the surrounding driver structures. +- */ +-void +-drm_gpuva_manager_init(struct drm_gpuva_manager *mgr, +- const char *name, +- u64 start_offset, u64 range, +- u64 reserve_offset, u64 reserve_range, +- const struct drm_gpuva_fn_ops *ops) +-{ +- mgr->rb.tree = RB_ROOT_CACHED; +- INIT_LIST_HEAD(&mgr->rb.list); +- +- drm_gpuva_check_overflow(start_offset, range); +- mgr->mm_start = start_offset; +- mgr->mm_range = range; +- +- mgr->name = name ? name : "unknown"; +- mgr->ops = ops; +- +- memset(&mgr->kernel_alloc_node, 0, sizeof(struct drm_gpuva)); +- +- if (reserve_range) { +- mgr->kernel_alloc_node.va.addr = reserve_offset; +- mgr->kernel_alloc_node.va.range = reserve_range; +- +- if (likely(!drm_gpuva_check_overflow(reserve_offset, +- reserve_range))) +- __drm_gpuva_insert(mgr, &mgr->kernel_alloc_node); +- } +-} +-EXPORT_SYMBOL_GPL(drm_gpuva_manager_init); +- +-/** +- * drm_gpuva_manager_destroy() - cleanup a &drm_gpuva_manager +- * @mgr: pointer to the &drm_gpuva_manager to clean up +- * +- * Note that it is a bug to call this function on a manager that still +- * holds GPU VA mappings. +- */ +-void +-drm_gpuva_manager_destroy(struct drm_gpuva_manager *mgr) +-{ +- mgr->name = NULL; +- +- if (mgr->kernel_alloc_node.va.range) +- __drm_gpuva_remove(&mgr->kernel_alloc_node); +- +- WARN(!RB_EMPTY_ROOT(&mgr->rb.tree.rb_root), +- "GPUVA tree is not empty, potentially leaking memory."); +-} +-EXPORT_SYMBOL_GPL(drm_gpuva_manager_destroy); +- +-static int +-__drm_gpuva_insert(struct drm_gpuva_manager *mgr, +- struct drm_gpuva *va) +-{ +- struct rb_node *node; +- struct list_head *head; +- +- if (drm_gpuva_it_iter_first(&mgr->rb.tree, +- GPUVA_START(va), +- GPUVA_LAST(va))) +- return -EEXIST; +- +- va->mgr = mgr; +- +- drm_gpuva_it_insert(va, &mgr->rb.tree); +- +- node = rb_prev(&va->rb.node); +- if (node) +- head = &(to_drm_gpuva(node))->rb.entry; +- else +- head = &mgr->rb.list; +- +- list_add(&va->rb.entry, head); +- +- return 0; +-} +- +-/** +- * drm_gpuva_insert() - insert a &drm_gpuva +- * @mgr: the &drm_gpuva_manager to insert the &drm_gpuva in +- * @va: the &drm_gpuva to insert +- * +- * Insert a &drm_gpuva with a given address and range into a +- * &drm_gpuva_manager. +- * +- * It is safe to use this function using the safe versions of iterating the GPU +- * VA space, such as drm_gpuva_for_each_va_safe() and +- * drm_gpuva_for_each_va_range_safe(). +- * +- * Returns: 0 on success, negative error code on failure. +- */ +-int +-drm_gpuva_insert(struct drm_gpuva_manager *mgr, +- struct drm_gpuva *va) +-{ +- u64 addr = va->va.addr; +- u64 range = va->va.range; +- +- if (unlikely(!drm_gpuva_range_valid(mgr, addr, range))) +- return -EINVAL; +- +- return __drm_gpuva_insert(mgr, va); +-} +-EXPORT_SYMBOL_GPL(drm_gpuva_insert); +- +-static void +-__drm_gpuva_remove(struct drm_gpuva *va) +-{ +- drm_gpuva_it_remove(va, &va->mgr->rb.tree); +- list_del_init(&va->rb.entry); +-} +- +-/** +- * drm_gpuva_remove() - remove a &drm_gpuva +- * @va: the &drm_gpuva to remove +- * +- * This removes the given &va from the underlaying tree. +- * +- * It is safe to use this function using the safe versions of iterating the GPU +- * VA space, such as drm_gpuva_for_each_va_safe() and +- * drm_gpuva_for_each_va_range_safe(). +- */ +-void +-drm_gpuva_remove(struct drm_gpuva *va) +-{ +- struct drm_gpuva_manager *mgr = va->mgr; +- +- if (unlikely(va == &mgr->kernel_alloc_node)) { +- WARN(1, "Can't destroy kernel reserved node.\n"); +- return; +- } +- +- __drm_gpuva_remove(va); +-} +-EXPORT_SYMBOL_GPL(drm_gpuva_remove); +- +-/** +- * drm_gpuva_link() - link a &drm_gpuva +- * @va: the &drm_gpuva to link +- * +- * This adds the given &va to the GPU VA list of the &drm_gem_object it is +- * associated with. +- * +- * This function expects the caller to protect the GEM's GPUVA list against +- * concurrent access using the GEMs dma_resv lock. +- */ +-void +-drm_gpuva_link(struct drm_gpuva *va) +-{ +- struct drm_gem_object *obj = va->gem.obj; +- +- if (unlikely(!obj)) +- return; +- +- drm_gem_gpuva_assert_lock_held(obj); +- +- list_add_tail(&va->gem.entry, &obj->gpuva.list); +-} +-EXPORT_SYMBOL_GPL(drm_gpuva_link); +- +-/** +- * drm_gpuva_unlink() - unlink a &drm_gpuva +- * @va: the &drm_gpuva to unlink +- * +- * This removes the given &va from the GPU VA list of the &drm_gem_object it is +- * associated with. +- * +- * This function expects the caller to protect the GEM's GPUVA list against +- * concurrent access using the GEMs dma_resv lock. +- */ +-void +-drm_gpuva_unlink(struct drm_gpuva *va) +-{ +- struct drm_gem_object *obj = va->gem.obj; +- +- if (unlikely(!obj)) +- return; +- +- drm_gem_gpuva_assert_lock_held(obj); +- +- list_del_init(&va->gem.entry); +-} +-EXPORT_SYMBOL_GPL(drm_gpuva_unlink); +- +-/** +- * drm_gpuva_find_first() - find the first &drm_gpuva in the given range +- * @mgr: the &drm_gpuva_manager to search in +- * @addr: the &drm_gpuvas address +- * @range: the &drm_gpuvas range +- * +- * Returns: the first &drm_gpuva within the given range +- */ +-struct drm_gpuva * +-drm_gpuva_find_first(struct drm_gpuva_manager *mgr, +- u64 addr, u64 range) +-{ +- u64 last = addr + range - 1; +- +- return drm_gpuva_it_iter_first(&mgr->rb.tree, addr, last); +-} +-EXPORT_SYMBOL_GPL(drm_gpuva_find_first); +- +-/** +- * drm_gpuva_find() - find a &drm_gpuva +- * @mgr: the &drm_gpuva_manager to search in +- * @addr: the &drm_gpuvas address +- * @range: the &drm_gpuvas range +- * +- * Returns: the &drm_gpuva at a given &addr and with a given &range +- */ +-struct drm_gpuva * +-drm_gpuva_find(struct drm_gpuva_manager *mgr, +- u64 addr, u64 range) +-{ +- struct drm_gpuva *va; +- +- va = drm_gpuva_find_first(mgr, addr, range); +- if (!va) +- goto out; +- +- if (va->va.addr != addr || +- va->va.range != range) +- goto out; +- +- return va; +- +-out: +- return NULL; +-} +-EXPORT_SYMBOL_GPL(drm_gpuva_find); +- +-/** +- * drm_gpuva_find_prev() - find the &drm_gpuva before the given address +- * @mgr: the &drm_gpuva_manager to search in +- * @start: the given GPU VA's start address +- * +- * Find the adjacent &drm_gpuva before the GPU VA with given &start address. +- * +- * Note that if there is any free space between the GPU VA mappings no mapping +- * is returned. +- * +- * Returns: a pointer to the found &drm_gpuva or NULL if none was found +- */ +-struct drm_gpuva * +-drm_gpuva_find_prev(struct drm_gpuva_manager *mgr, u64 start) +-{ +- if (!drm_gpuva_range_valid(mgr, start - 1, 1)) +- return NULL; +- +- return drm_gpuva_it_iter_first(&mgr->rb.tree, start - 1, start); +-} +-EXPORT_SYMBOL_GPL(drm_gpuva_find_prev); +- +-/** +- * drm_gpuva_find_next() - find the &drm_gpuva after the given address +- * @mgr: the &drm_gpuva_manager to search in +- * @end: the given GPU VA's end address +- * +- * Find the adjacent &drm_gpuva after the GPU VA with given &end address. +- * +- * Note that if there is any free space between the GPU VA mappings no mapping +- * is returned. +- * +- * Returns: a pointer to the found &drm_gpuva or NULL if none was found +- */ +-struct drm_gpuva * +-drm_gpuva_find_next(struct drm_gpuva_manager *mgr, u64 end) +-{ +- if (!drm_gpuva_range_valid(mgr, end, 1)) +- return NULL; +- +- return drm_gpuva_it_iter_first(&mgr->rb.tree, end, end + 1); +-} +-EXPORT_SYMBOL_GPL(drm_gpuva_find_next); +- +-/** +- * drm_gpuva_interval_empty() - indicate whether a given interval of the VA space +- * is empty +- * @mgr: the &drm_gpuva_manager to check the range for +- * @addr: the start address of the range +- * @range: the range of the interval +- * +- * Returns: true if the interval is empty, false otherwise +- */ +-bool +-drm_gpuva_interval_empty(struct drm_gpuva_manager *mgr, u64 addr, u64 range) +-{ +- return !drm_gpuva_find_first(mgr, addr, range); +-} +-EXPORT_SYMBOL_GPL(drm_gpuva_interval_empty); +- +-/** +- * drm_gpuva_map() - helper to insert a &drm_gpuva according to a +- * &drm_gpuva_op_map +- * @mgr: the &drm_gpuva_manager +- * @va: the &drm_gpuva to insert +- * @op: the &drm_gpuva_op_map to initialize @va with +- * +- * Initializes the @va from the @op and inserts it into the given @mgr. +- */ +-void +-drm_gpuva_map(struct drm_gpuva_manager *mgr, +- struct drm_gpuva *va, +- struct drm_gpuva_op_map *op) +-{ +- drm_gpuva_init_from_op(va, op); +- drm_gpuva_insert(mgr, va); +-} +-EXPORT_SYMBOL_GPL(drm_gpuva_map); +- +-/** +- * drm_gpuva_remap() - helper to remap a &drm_gpuva according to a +- * &drm_gpuva_op_remap +- * @prev: the &drm_gpuva to remap when keeping the start of a mapping +- * @next: the &drm_gpuva to remap when keeping the end of a mapping +- * @op: the &drm_gpuva_op_remap to initialize @prev and @next with +- * +- * Removes the currently mapped &drm_gpuva and remaps it using @prev and/or +- * @next. +- */ +-void +-drm_gpuva_remap(struct drm_gpuva *prev, +- struct drm_gpuva *next, +- struct drm_gpuva_op_remap *op) +-{ +- struct drm_gpuva *curr = op->unmap->va; +- struct drm_gpuva_manager *mgr = curr->mgr; +- +- drm_gpuva_remove(curr); +- +- if (op->prev) { +- drm_gpuva_init_from_op(prev, op->prev); +- drm_gpuva_insert(mgr, prev); +- } +- +- if (op->next) { +- drm_gpuva_init_from_op(next, op->next); +- drm_gpuva_insert(mgr, next); +- } +-} +-EXPORT_SYMBOL_GPL(drm_gpuva_remap); +- +-/** +- * drm_gpuva_unmap() - helper to remove a &drm_gpuva according to a +- * &drm_gpuva_op_unmap +- * @op: the &drm_gpuva_op_unmap specifying the &drm_gpuva to remove +- * +- * Removes the &drm_gpuva associated with the &drm_gpuva_op_unmap. +- */ +-void +-drm_gpuva_unmap(struct drm_gpuva_op_unmap *op) +-{ +- drm_gpuva_remove(op->va); +-} +-EXPORT_SYMBOL_GPL(drm_gpuva_unmap); +- +-static int +-op_map_cb(const struct drm_gpuva_fn_ops *fn, void *priv, +- u64 addr, u64 range, +- struct drm_gem_object *obj, u64 offset) +-{ +- struct drm_gpuva_op op = {}; +- +- op.op = DRM_GPUVA_OP_MAP; +- op.map.va.addr = addr; +- op.map.va.range = range; +- op.map.gem.obj = obj; +- op.map.gem.offset = offset; +- +- return fn->sm_step_map(&op, priv); +-} +- +-static int +-op_remap_cb(const struct drm_gpuva_fn_ops *fn, void *priv, +- struct drm_gpuva_op_map *prev, +- struct drm_gpuva_op_map *next, +- struct drm_gpuva_op_unmap *unmap) +-{ +- struct drm_gpuva_op op = {}; +- struct drm_gpuva_op_remap *r; +- +- op.op = DRM_GPUVA_OP_REMAP; +- r = &op.remap; +- r->prev = prev; +- r->next = next; +- r->unmap = unmap; +- +- return fn->sm_step_remap(&op, priv); +-} +- +-static int +-op_unmap_cb(const struct drm_gpuva_fn_ops *fn, void *priv, +- struct drm_gpuva *va, bool merge) +-{ +- struct drm_gpuva_op op = {}; +- +- op.op = DRM_GPUVA_OP_UNMAP; +- op.unmap.va = va; +- op.unmap.keep = merge; +- +- return fn->sm_step_unmap(&op, priv); +-} +- +-static int +-__drm_gpuva_sm_map(struct drm_gpuva_manager *mgr, +- const struct drm_gpuva_fn_ops *ops, void *priv, +- u64 req_addr, u64 req_range, +- struct drm_gem_object *req_obj, u64 req_offset) +-{ +- struct drm_gpuva *va, *next; +- u64 req_end = req_addr + req_range; +- int ret; +- +- if (unlikely(!drm_gpuva_range_valid(mgr, req_addr, req_range))) +- return -EINVAL; +- +- drm_gpuva_for_each_va_range_safe(va, next, mgr, req_addr, req_end) { +- struct drm_gem_object *obj = va->gem.obj; +- u64 offset = va->gem.offset; +- u64 addr = va->va.addr; +- u64 range = va->va.range; +- u64 end = addr + range; +- bool merge = !!va->gem.obj; +- +- if (addr == req_addr) { +- merge &= obj == req_obj && +- offset == req_offset; +- +- if (end == req_end) { +- ret = op_unmap_cb(ops, priv, va, merge); +- if (ret) +- return ret; +- break; +- } +- +- if (end < req_end) { +- ret = op_unmap_cb(ops, priv, va, merge); +- if (ret) +- return ret; +- continue; +- } +- +- if (end > req_end) { +- struct drm_gpuva_op_map n = { +- .va.addr = req_end, +- .va.range = range - req_range, +- .gem.obj = obj, +- .gem.offset = offset + req_range, +- }; +- struct drm_gpuva_op_unmap u = { +- .va = va, +- .keep = merge, +- }; +- +- ret = op_remap_cb(ops, priv, NULL, &n, &u); +- if (ret) +- return ret; +- break; +- } +- } else if (addr < req_addr) { +- u64 ls_range = req_addr - addr; +- struct drm_gpuva_op_map p = { +- .va.addr = addr, +- .va.range = ls_range, +- .gem.obj = obj, +- .gem.offset = offset, +- }; +- struct drm_gpuva_op_unmap u = { .va = va }; +- +- merge &= obj == req_obj && +- offset + ls_range == req_offset; +- u.keep = merge; +- +- if (end == req_end) { +- ret = op_remap_cb(ops, priv, &p, NULL, &u); +- if (ret) +- return ret; +- break; +- } +- +- if (end < req_end) { +- ret = op_remap_cb(ops, priv, &p, NULL, &u); +- if (ret) +- return ret; +- continue; +- } +- +- if (end > req_end) { +- struct drm_gpuva_op_map n = { +- .va.addr = req_end, +- .va.range = end - req_end, +- .gem.obj = obj, +- .gem.offset = offset + ls_range + +- req_range, +- }; +- +- ret = op_remap_cb(ops, priv, &p, &n, &u); +- if (ret) +- return ret; +- break; +- } +- } else if (addr > req_addr) { +- merge &= obj == req_obj && +- offset == req_offset + +- (addr - req_addr); +- +- if (end == req_end) { +- ret = op_unmap_cb(ops, priv, va, merge); +- if (ret) +- return ret; +- break; +- } +- +- if (end < req_end) { +- ret = op_unmap_cb(ops, priv, va, merge); +- if (ret) +- return ret; +- continue; +- } +- +- if (end > req_end) { +- struct drm_gpuva_op_map n = { +- .va.addr = req_end, +- .va.range = end - req_end, +- .gem.obj = obj, +- .gem.offset = offset + req_end - addr, +- }; +- struct drm_gpuva_op_unmap u = { +- .va = va, +- .keep = merge, +- }; +- +- ret = op_remap_cb(ops, priv, NULL, &n, &u); +- if (ret) +- return ret; +- break; +- } +- } +- } +- +- return op_map_cb(ops, priv, +- req_addr, req_range, +- req_obj, req_offset); +-} +- +-static int +-__drm_gpuva_sm_unmap(struct drm_gpuva_manager *mgr, +- const struct drm_gpuva_fn_ops *ops, void *priv, +- u64 req_addr, u64 req_range) +-{ +- struct drm_gpuva *va, *next; +- u64 req_end = req_addr + req_range; +- int ret; +- +- if (unlikely(!drm_gpuva_range_valid(mgr, req_addr, req_range))) +- return -EINVAL; +- +- drm_gpuva_for_each_va_range_safe(va, next, mgr, req_addr, req_end) { +- struct drm_gpuva_op_map prev = {}, next = {}; +- bool prev_split = false, next_split = false; +- struct drm_gem_object *obj = va->gem.obj; +- u64 offset = va->gem.offset; +- u64 addr = va->va.addr; +- u64 range = va->va.range; +- u64 end = addr + range; +- +- if (addr < req_addr) { +- prev.va.addr = addr; +- prev.va.range = req_addr - addr; +- prev.gem.obj = obj; +- prev.gem.offset = offset; +- +- prev_split = true; +- } +- +- if (end > req_end) { +- next.va.addr = req_end; +- next.va.range = end - req_end; +- next.gem.obj = obj; +- next.gem.offset = offset + (req_end - addr); +- +- next_split = true; +- } +- +- if (prev_split || next_split) { +- struct drm_gpuva_op_unmap unmap = { .va = va }; +- +- ret = op_remap_cb(ops, priv, +- prev_split ? &prev : NULL, +- next_split ? &next : NULL, +- &unmap); +- if (ret) +- return ret; +- } else { +- ret = op_unmap_cb(ops, priv, va, false); +- if (ret) +- return ret; +- } +- } +- +- return 0; +-} +- +-/** +- * drm_gpuva_sm_map() - creates the &drm_gpuva_op split/merge steps +- * @mgr: the &drm_gpuva_manager representing the GPU VA space +- * @req_addr: the start address of the new mapping +- * @req_range: the range of the new mapping +- * @req_obj: the &drm_gem_object to map +- * @req_offset: the offset within the &drm_gem_object +- * @priv: pointer to a driver private data structure +- * +- * This function iterates the given range of the GPU VA space. It utilizes the +- * &drm_gpuva_fn_ops to call back into the driver providing the split and merge +- * steps. +- * +- * Drivers may use these callbacks to update the GPU VA space right away within +- * the callback. In case the driver decides to copy and store the operations for +- * later processing neither this function nor &drm_gpuva_sm_unmap is allowed to +- * be called before the &drm_gpuva_manager's view of the GPU VA space was +- * updated with the previous set of operations. To update the +- * &drm_gpuva_manager's view of the GPU VA space drm_gpuva_insert(), +- * drm_gpuva_destroy_locked() and/or drm_gpuva_destroy_unlocked() should be +- * used. +- * +- * A sequence of callbacks can contain map, unmap and remap operations, but +- * the sequence of callbacks might also be empty if no operation is required, +- * e.g. if the requested mapping already exists in the exact same way. +- * +- * There can be an arbitrary amount of unmap operations, a maximum of two remap +- * operations and a single map operation. The latter one represents the original +- * map operation requested by the caller. +- * +- * Returns: 0 on success or a negative error code +- */ +-int +-drm_gpuva_sm_map(struct drm_gpuva_manager *mgr, void *priv, +- u64 req_addr, u64 req_range, +- struct drm_gem_object *req_obj, u64 req_offset) +-{ +- const struct drm_gpuva_fn_ops *ops = mgr->ops; +- +- if (unlikely(!(ops && ops->sm_step_map && +- ops->sm_step_remap && +- ops->sm_step_unmap))) +- return -EINVAL; +- +- return __drm_gpuva_sm_map(mgr, ops, priv, +- req_addr, req_range, +- req_obj, req_offset); +-} +-EXPORT_SYMBOL_GPL(drm_gpuva_sm_map); +- +-/** +- * drm_gpuva_sm_unmap() - creates the &drm_gpuva_ops to split on unmap +- * @mgr: the &drm_gpuva_manager representing the GPU VA space +- * @priv: pointer to a driver private data structure +- * @req_addr: the start address of the range to unmap +- * @req_range: the range of the mappings to unmap +- * +- * This function iterates the given range of the GPU VA space. It utilizes the +- * &drm_gpuva_fn_ops to call back into the driver providing the operations to +- * unmap and, if required, split existent mappings. +- * +- * Drivers may use these callbacks to update the GPU VA space right away within +- * the callback. In case the driver decides to copy and store the operations for +- * later processing neither this function nor &drm_gpuva_sm_map is allowed to be +- * called before the &drm_gpuva_manager's view of the GPU VA space was updated +- * with the previous set of operations. To update the &drm_gpuva_manager's view +- * of the GPU VA space drm_gpuva_insert(), drm_gpuva_destroy_locked() and/or +- * drm_gpuva_destroy_unlocked() should be used. +- * +- * A sequence of callbacks can contain unmap and remap operations, depending on +- * whether there are actual overlapping mappings to split. +- * +- * There can be an arbitrary amount of unmap operations and a maximum of two +- * remap operations. +- * +- * Returns: 0 on success or a negative error code +- */ +-int +-drm_gpuva_sm_unmap(struct drm_gpuva_manager *mgr, void *priv, +- u64 req_addr, u64 req_range) +-{ +- const struct drm_gpuva_fn_ops *ops = mgr->ops; +- +- if (unlikely(!(ops && ops->sm_step_remap && +- ops->sm_step_unmap))) +- return -EINVAL; +- +- return __drm_gpuva_sm_unmap(mgr, ops, priv, +- req_addr, req_range); +-} +-EXPORT_SYMBOL_GPL(drm_gpuva_sm_unmap); +- +-static struct drm_gpuva_op * +-gpuva_op_alloc(struct drm_gpuva_manager *mgr) +-{ +- const struct drm_gpuva_fn_ops *fn = mgr->ops; +- struct drm_gpuva_op *op; +- +- if (fn && fn->op_alloc) +- op = fn->op_alloc(); +- else +- op = kzalloc(sizeof(*op), GFP_KERNEL); +- +- if (unlikely(!op)) +- return NULL; +- +- return op; +-} +- +-static void +-gpuva_op_free(struct drm_gpuva_manager *mgr, +- struct drm_gpuva_op *op) +-{ +- const struct drm_gpuva_fn_ops *fn = mgr->ops; +- +- if (fn && fn->op_free) +- fn->op_free(op); +- else +- kfree(op); +-} +- +-static int +-drm_gpuva_sm_step(struct drm_gpuva_op *__op, +- void *priv) +-{ +- struct { +- struct drm_gpuva_manager *mgr; +- struct drm_gpuva_ops *ops; +- } *args = priv; +- struct drm_gpuva_manager *mgr = args->mgr; +- struct drm_gpuva_ops *ops = args->ops; +- struct drm_gpuva_op *op; +- +- op = gpuva_op_alloc(mgr); +- if (unlikely(!op)) +- goto err; +- +- memcpy(op, __op, sizeof(*op)); +- +- if (op->op == DRM_GPUVA_OP_REMAP) { +- struct drm_gpuva_op_remap *__r = &__op->remap; +- struct drm_gpuva_op_remap *r = &op->remap; +- +- r->unmap = kmemdup(__r->unmap, sizeof(*r->unmap), +- GFP_KERNEL); +- if (unlikely(!r->unmap)) +- goto err_free_op; +- +- if (__r->prev) { +- r->prev = kmemdup(__r->prev, sizeof(*r->prev), +- GFP_KERNEL); +- if (unlikely(!r->prev)) +- goto err_free_unmap; +- } +- +- if (__r->next) { +- r->next = kmemdup(__r->next, sizeof(*r->next), +- GFP_KERNEL); +- if (unlikely(!r->next)) +- goto err_free_prev; +- } +- } +- +- list_add_tail(&op->entry, &ops->list); +- +- return 0; +- +-err_free_unmap: +- kfree(op->remap.unmap); +-err_free_prev: +- kfree(op->remap.prev); +-err_free_op: +- gpuva_op_free(mgr, op); +-err: +- return -ENOMEM; +-} +- +-static const struct drm_gpuva_fn_ops gpuva_list_ops = { +- .sm_step_map = drm_gpuva_sm_step, +- .sm_step_remap = drm_gpuva_sm_step, +- .sm_step_unmap = drm_gpuva_sm_step, +-}; +- +-/** +- * drm_gpuva_sm_map_ops_create() - creates the &drm_gpuva_ops to split and merge +- * @mgr: the &drm_gpuva_manager representing the GPU VA space +- * @req_addr: the start address of the new mapping +- * @req_range: the range of the new mapping +- * @req_obj: the &drm_gem_object to map +- * @req_offset: the offset within the &drm_gem_object +- * +- * This function creates a list of operations to perform splitting and merging +- * of existent mapping(s) with the newly requested one. +- * +- * The list can be iterated with &drm_gpuva_for_each_op and must be processed +- * in the given order. It can contain map, unmap and remap operations, but it +- * also can be empty if no operation is required, e.g. if the requested mapping +- * already exists is the exact same way. +- * +- * There can be an arbitrary amount of unmap operations, a maximum of two remap +- * operations and a single map operation. The latter one represents the original +- * map operation requested by the caller. +- * +- * Note that before calling this function again with another mapping request it +- * is necessary to update the &drm_gpuva_manager's view of the GPU VA space. The +- * previously obtained operations must be either processed or abandoned. To +- * update the &drm_gpuva_manager's view of the GPU VA space drm_gpuva_insert(), +- * drm_gpuva_destroy_locked() and/or drm_gpuva_destroy_unlocked() should be +- * used. +- * +- * After the caller finished processing the returned &drm_gpuva_ops, they must +- * be freed with &drm_gpuva_ops_free. +- * +- * Returns: a pointer to the &drm_gpuva_ops on success, an ERR_PTR on failure +- */ +-struct drm_gpuva_ops * +-drm_gpuva_sm_map_ops_create(struct drm_gpuva_manager *mgr, +- u64 req_addr, u64 req_range, +- struct drm_gem_object *req_obj, u64 req_offset) +-{ +- struct drm_gpuva_ops *ops; +- struct { +- struct drm_gpuva_manager *mgr; +- struct drm_gpuva_ops *ops; +- } args; +- int ret; +- +- ops = kzalloc(sizeof(*ops), GFP_KERNEL); +- if (unlikely(!ops)) +- return ERR_PTR(-ENOMEM); +- +- INIT_LIST_HEAD(&ops->list); +- +- args.mgr = mgr; +- args.ops = ops; +- +- ret = __drm_gpuva_sm_map(mgr, &gpuva_list_ops, &args, +- req_addr, req_range, +- req_obj, req_offset); +- if (ret) +- goto err_free_ops; +- +- return ops; +- +-err_free_ops: +- drm_gpuva_ops_free(mgr, ops); +- return ERR_PTR(ret); +-} +-EXPORT_SYMBOL_GPL(drm_gpuva_sm_map_ops_create); +- +-/** +- * drm_gpuva_sm_unmap_ops_create() - creates the &drm_gpuva_ops to split on +- * unmap +- * @mgr: the &drm_gpuva_manager representing the GPU VA space +- * @req_addr: the start address of the range to unmap +- * @req_range: the range of the mappings to unmap +- * +- * This function creates a list of operations to perform unmapping and, if +- * required, splitting of the mappings overlapping the unmap range. +- * +- * The list can be iterated with &drm_gpuva_for_each_op and must be processed +- * in the given order. It can contain unmap and remap operations, depending on +- * whether there are actual overlapping mappings to split. +- * +- * There can be an arbitrary amount of unmap operations and a maximum of two +- * remap operations. +- * +- * Note that before calling this function again with another range to unmap it +- * is necessary to update the &drm_gpuva_manager's view of the GPU VA space. The +- * previously obtained operations must be processed or abandoned. To update the +- * &drm_gpuva_manager's view of the GPU VA space drm_gpuva_insert(), +- * drm_gpuva_destroy_locked() and/or drm_gpuva_destroy_unlocked() should be +- * used. +- * +- * After the caller finished processing the returned &drm_gpuva_ops, they must +- * be freed with &drm_gpuva_ops_free. +- * +- * Returns: a pointer to the &drm_gpuva_ops on success, an ERR_PTR on failure +- */ +-struct drm_gpuva_ops * +-drm_gpuva_sm_unmap_ops_create(struct drm_gpuva_manager *mgr, +- u64 req_addr, u64 req_range) +-{ +- struct drm_gpuva_ops *ops; +- struct { +- struct drm_gpuva_manager *mgr; +- struct drm_gpuva_ops *ops; +- } args; +- int ret; +- +- ops = kzalloc(sizeof(*ops), GFP_KERNEL); +- if (unlikely(!ops)) +- return ERR_PTR(-ENOMEM); +- +- INIT_LIST_HEAD(&ops->list); +- +- args.mgr = mgr; +- args.ops = ops; +- +- ret = __drm_gpuva_sm_unmap(mgr, &gpuva_list_ops, &args, +- req_addr, req_range); +- if (ret) +- goto err_free_ops; +- +- return ops; +- +-err_free_ops: +- drm_gpuva_ops_free(mgr, ops); +- return ERR_PTR(ret); +-} +-EXPORT_SYMBOL_GPL(drm_gpuva_sm_unmap_ops_create); +- +-/** +- * drm_gpuva_prefetch_ops_create() - creates the &drm_gpuva_ops to prefetch +- * @mgr: the &drm_gpuva_manager representing the GPU VA space +- * @addr: the start address of the range to prefetch +- * @range: the range of the mappings to prefetch +- * +- * This function creates a list of operations to perform prefetching. +- * +- * The list can be iterated with &drm_gpuva_for_each_op and must be processed +- * in the given order. It can contain prefetch operations. +- * +- * There can be an arbitrary amount of prefetch operations. +- * +- * After the caller finished processing the returned &drm_gpuva_ops, they must +- * be freed with &drm_gpuva_ops_free. +- * +- * Returns: a pointer to the &drm_gpuva_ops on success, an ERR_PTR on failure +- */ +-struct drm_gpuva_ops * +-drm_gpuva_prefetch_ops_create(struct drm_gpuva_manager *mgr, +- u64 addr, u64 range) +-{ +- struct drm_gpuva_ops *ops; +- struct drm_gpuva_op *op; +- struct drm_gpuva *va; +- u64 end = addr + range; +- int ret; +- +- ops = kzalloc(sizeof(*ops), GFP_KERNEL); +- if (!ops) +- return ERR_PTR(-ENOMEM); +- +- INIT_LIST_HEAD(&ops->list); +- +- drm_gpuva_for_each_va_range(va, mgr, addr, end) { +- op = gpuva_op_alloc(mgr); +- if (!op) { +- ret = -ENOMEM; +- goto err_free_ops; +- } +- +- op->op = DRM_GPUVA_OP_PREFETCH; +- op->prefetch.va = va; +- list_add_tail(&op->entry, &ops->list); +- } +- +- return ops; +- +-err_free_ops: +- drm_gpuva_ops_free(mgr, ops); +- return ERR_PTR(ret); +-} +-EXPORT_SYMBOL_GPL(drm_gpuva_prefetch_ops_create); +- +-/** +- * drm_gpuva_gem_unmap_ops_create() - creates the &drm_gpuva_ops to unmap a GEM +- * @mgr: the &drm_gpuva_manager representing the GPU VA space +- * @obj: the &drm_gem_object to unmap +- * +- * This function creates a list of operations to perform unmapping for every +- * GPUVA attached to a GEM. +- * +- * The list can be iterated with &drm_gpuva_for_each_op and consists out of an +- * arbitrary amount of unmap operations. +- * +- * After the caller finished processing the returned &drm_gpuva_ops, they must +- * be freed with &drm_gpuva_ops_free. +- * +- * It is the callers responsibility to protect the GEMs GPUVA list against +- * concurrent access using the GEMs dma_resv lock. +- * +- * Returns: a pointer to the &drm_gpuva_ops on success, an ERR_PTR on failure +- */ +-struct drm_gpuva_ops * +-drm_gpuva_gem_unmap_ops_create(struct drm_gpuva_manager *mgr, +- struct drm_gem_object *obj) +-{ +- struct drm_gpuva_ops *ops; +- struct drm_gpuva_op *op; +- struct drm_gpuva *va; +- int ret; +- +- drm_gem_gpuva_assert_lock_held(obj); +- +- ops = kzalloc(sizeof(*ops), GFP_KERNEL); +- if (!ops) +- return ERR_PTR(-ENOMEM); +- +- INIT_LIST_HEAD(&ops->list); +- +- drm_gem_for_each_gpuva(va, obj) { +- op = gpuva_op_alloc(mgr); +- if (!op) { +- ret = -ENOMEM; +- goto err_free_ops; +- } +- +- op->op = DRM_GPUVA_OP_UNMAP; +- op->unmap.va = va; +- list_add_tail(&op->entry, &ops->list); +- } +- +- return ops; +- +-err_free_ops: +- drm_gpuva_ops_free(mgr, ops); +- return ERR_PTR(ret); +-} +-EXPORT_SYMBOL_GPL(drm_gpuva_gem_unmap_ops_create); +- +-/** +- * drm_gpuva_ops_free() - free the given &drm_gpuva_ops +- * @mgr: the &drm_gpuva_manager the ops were created for +- * @ops: the &drm_gpuva_ops to free +- * +- * Frees the given &drm_gpuva_ops structure including all the ops associated +- * with it. +- */ +-void +-drm_gpuva_ops_free(struct drm_gpuva_manager *mgr, +- struct drm_gpuva_ops *ops) +-{ +- struct drm_gpuva_op *op, *next; +- +- drm_gpuva_for_each_op_safe(op, next, ops) { +- list_del(&op->entry); +- +- if (op->op == DRM_GPUVA_OP_REMAP) { +- kfree(op->remap.prev); +- kfree(op->remap.next); +- kfree(op->remap.unmap); +- } +- +- gpuva_op_free(mgr, op); +- } +- +- kfree(ops); +-} +-EXPORT_SYMBOL_GPL(drm_gpuva_ops_free); +--- /dev/null ++++ b/drivers/gpu/drm/drm_gpuvm.c +@@ -0,0 +1,1723 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Copyright (c) 2022 Red Hat. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ * ++ * Authors: ++ * Danilo Krummrich ++ * ++ */ ++ ++#include ++ ++#include ++#include ++ ++/** ++ * DOC: Overview ++ * ++ * The DRM GPU VA Manager, represented by struct drm_gpuvm keeps track of a ++ * GPU's virtual address (VA) space and manages the corresponding virtual ++ * mappings represented by &drm_gpuva objects. It also keeps track of the ++ * mapping's backing &drm_gem_object buffers. ++ * ++ * &drm_gem_object buffers maintain a list of &drm_gpuva objects representing ++ * all existent GPU VA mappings using this &drm_gem_object as backing buffer. ++ * ++ * GPU VAs can be flagged as sparse, such that drivers may use GPU VAs to also ++ * keep track of sparse PTEs in order to support Vulkan 'Sparse Resources'. ++ * ++ * The GPU VA manager internally uses a rb-tree to manage the ++ * &drm_gpuva mappings within a GPU's virtual address space. ++ * ++ * The &drm_gpuvm structure contains a special &drm_gpuva representing the ++ * portion of VA space reserved by the kernel. This node is initialized together ++ * with the GPU VA manager instance and removed when the GPU VA manager is ++ * destroyed. ++ * ++ * In a typical application drivers would embed struct drm_gpuvm and ++ * struct drm_gpuva within their own driver specific structures, there won't be ++ * any memory allocations of its own nor memory allocations of &drm_gpuva ++ * entries. ++ * ++ * The data structures needed to store &drm_gpuvas within the &drm_gpuvm are ++ * contained within struct drm_gpuva already. Hence, for inserting &drm_gpuva ++ * entries from within dma-fence signalling critical sections it is enough to ++ * pre-allocate the &drm_gpuva structures. ++ */ ++ ++/** ++ * DOC: Split and Merge ++ * ++ * Besides its capability to manage and represent a GPU VA space, the ++ * GPU VA manager also provides functions to let the &drm_gpuvm calculate a ++ * sequence of operations to satisfy a given map or unmap request. ++ * ++ * Therefore the DRM GPU VA manager provides an algorithm implementing splitting ++ * and merging of existent GPU VA mappings with the ones that are requested to ++ * be mapped or unmapped. This feature is required by the Vulkan API to ++ * implement Vulkan 'Sparse Memory Bindings' - drivers UAPIs often refer to this ++ * as VM BIND. ++ * ++ * Drivers can call drm_gpuvm_sm_map() to receive a sequence of callbacks ++ * containing map, unmap and remap operations for a given newly requested ++ * mapping. The sequence of callbacks represents the set of operations to ++ * execute in order to integrate the new mapping cleanly into the current state ++ * of the GPU VA space. ++ * ++ * Depending on how the new GPU VA mapping intersects with the existent mappings ++ * of the GPU VA space the &drm_gpuvm_ops callbacks contain an arbitrary amount ++ * of unmap operations, a maximum of two remap operations and a single map ++ * operation. The caller might receive no callback at all if no operation is ++ * required, e.g. if the requested mapping already exists in the exact same way. ++ * ++ * The single map operation represents the original map operation requested by ++ * the caller. ++ * ++ * &drm_gpuva_op_unmap contains a 'keep' field, which indicates whether the ++ * &drm_gpuva to unmap is physically contiguous with the original mapping ++ * request. Optionally, if 'keep' is set, drivers may keep the actual page table ++ * entries for this &drm_gpuva, adding the missing page table entries only and ++ * update the &drm_gpuvm's view of things accordingly. ++ * ++ * Drivers may do the same optimization, namely delta page table updates, also ++ * for remap operations. This is possible since &drm_gpuva_op_remap consists of ++ * one unmap operation and one or two map operations, such that drivers can ++ * derive the page table update delta accordingly. ++ * ++ * Note that there can't be more than two existent mappings to split up, one at ++ * the beginning and one at the end of the new mapping, hence there is a ++ * maximum of two remap operations. ++ * ++ * Analogous to drm_gpuvm_sm_map() drm_gpuvm_sm_unmap() uses &drm_gpuvm_ops to ++ * call back into the driver in order to unmap a range of GPU VA space. The ++ * logic behind this function is way simpler though: For all existent mappings ++ * enclosed by the given range unmap operations are created. For mappings which ++ * are only partically located within the given range, remap operations are ++ * created such that those mappings are split up and re-mapped partically. ++ * ++ * As an alternative to drm_gpuvm_sm_map() and drm_gpuvm_sm_unmap(), ++ * drm_gpuvm_sm_map_ops_create() and drm_gpuvm_sm_unmap_ops_create() can be used ++ * to directly obtain an instance of struct drm_gpuva_ops containing a list of ++ * &drm_gpuva_op, which can be iterated with drm_gpuva_for_each_op(). This list ++ * contains the &drm_gpuva_ops analogous to the callbacks one would receive when ++ * calling drm_gpuvm_sm_map() or drm_gpuvm_sm_unmap(). While this way requires ++ * more memory (to allocate the &drm_gpuva_ops), it provides drivers a way to ++ * iterate the &drm_gpuva_op multiple times, e.g. once in a context where memory ++ * allocations are possible (e.g. to allocate GPU page tables) and once in the ++ * dma-fence signalling critical path. ++ * ++ * To update the &drm_gpuvm's view of the GPU VA space drm_gpuva_insert() and ++ * drm_gpuva_remove() may be used. These functions can safely be used from ++ * &drm_gpuvm_ops callbacks originating from drm_gpuvm_sm_map() or ++ * drm_gpuvm_sm_unmap(). However, it might be more convenient to use the ++ * provided helper functions drm_gpuva_map(), drm_gpuva_remap() and ++ * drm_gpuva_unmap() instead. ++ * ++ * The following diagram depicts the basic relationships of existent GPU VA ++ * mappings, a newly requested mapping and the resulting mappings as implemented ++ * by drm_gpuvm_sm_map() - it doesn't cover any arbitrary combinations of these. ++ * ++ * 1) Requested mapping is identical. Replace it, but indicate the backing PTEs ++ * could be kept. ++ * ++ * :: ++ * ++ * 0 a 1 ++ * old: |-----------| (bo_offset=n) ++ * ++ * 0 a 1 ++ * req: |-----------| (bo_offset=n) ++ * ++ * 0 a 1 ++ * new: |-----------| (bo_offset=n) ++ * ++ * ++ * 2) Requested mapping is identical, except for the BO offset, hence replace ++ * the mapping. ++ * ++ * :: ++ * ++ * 0 a 1 ++ * old: |-----------| (bo_offset=n) ++ * ++ * 0 a 1 ++ * req: |-----------| (bo_offset=m) ++ * ++ * 0 a 1 ++ * new: |-----------| (bo_offset=m) ++ * ++ * ++ * 3) Requested mapping is identical, except for the backing BO, hence replace ++ * the mapping. ++ * ++ * :: ++ * ++ * 0 a 1 ++ * old: |-----------| (bo_offset=n) ++ * ++ * 0 b 1 ++ * req: |-----------| (bo_offset=n) ++ * ++ * 0 b 1 ++ * new: |-----------| (bo_offset=n) ++ * ++ * ++ * 4) Existent mapping is a left aligned subset of the requested one, hence ++ * replace the existent one. ++ * ++ * :: ++ * ++ * 0 a 1 ++ * old: |-----| (bo_offset=n) ++ * ++ * 0 a 2 ++ * req: |-----------| (bo_offset=n) ++ * ++ * 0 a 2 ++ * new: |-----------| (bo_offset=n) ++ * ++ * .. note:: ++ * We expect to see the same result for a request with a different BO ++ * and/or non-contiguous BO offset. ++ * ++ * ++ * 5) Requested mapping's range is a left aligned subset of the existent one, ++ * but backed by a different BO. Hence, map the requested mapping and split ++ * the existent one adjusting its BO offset. ++ * ++ * :: ++ * ++ * 0 a 2 ++ * old: |-----------| (bo_offset=n) ++ * ++ * 0 b 1 ++ * req: |-----| (bo_offset=n) ++ * ++ * 0 b 1 a' 2 ++ * new: |-----|-----| (b.bo_offset=n, a.bo_offset=n+1) ++ * ++ * .. note:: ++ * We expect to see the same result for a request with a different BO ++ * and/or non-contiguous BO offset. ++ * ++ * ++ * 6) Existent mapping is a superset of the requested mapping. Split it up, but ++ * indicate that the backing PTEs could be kept. ++ * ++ * :: ++ * ++ * 0 a 2 ++ * old: |-----------| (bo_offset=n) ++ * ++ * 0 a 1 ++ * req: |-----| (bo_offset=n) ++ * ++ * 0 a 1 a' 2 ++ * new: |-----|-----| (a.bo_offset=n, a'.bo_offset=n+1) ++ * ++ * ++ * 7) Requested mapping's range is a right aligned subset of the existent one, ++ * but backed by a different BO. Hence, map the requested mapping and split ++ * the existent one, without adjusting the BO offset. ++ * ++ * :: ++ * ++ * 0 a 2 ++ * old: |-----------| (bo_offset=n) ++ * ++ * 1 b 2 ++ * req: |-----| (bo_offset=m) ++ * ++ * 0 a 1 b 2 ++ * new: |-----|-----| (a.bo_offset=n,b.bo_offset=m) ++ * ++ * ++ * 8) Existent mapping is a superset of the requested mapping. Split it up, but ++ * indicate that the backing PTEs could be kept. ++ * ++ * :: ++ * ++ * 0 a 2 ++ * old: |-----------| (bo_offset=n) ++ * ++ * 1 a 2 ++ * req: |-----| (bo_offset=n+1) ++ * ++ * 0 a' 1 a 2 ++ * new: |-----|-----| (a'.bo_offset=n, a.bo_offset=n+1) ++ * ++ * ++ * 9) Existent mapping is overlapped at the end by the requested mapping backed ++ * by a different BO. Hence, map the requested mapping and split up the ++ * existent one, without adjusting the BO offset. ++ * ++ * :: ++ * ++ * 0 a 2 ++ * old: |-----------| (bo_offset=n) ++ * ++ * 1 b 3 ++ * req: |-----------| (bo_offset=m) ++ * ++ * 0 a 1 b 3 ++ * new: |-----|-----------| (a.bo_offset=n,b.bo_offset=m) ++ * ++ * ++ * 10) Existent mapping is overlapped by the requested mapping, both having the ++ * same backing BO with a contiguous offset. Indicate the backing PTEs of ++ * the old mapping could be kept. ++ * ++ * :: ++ * ++ * 0 a 2 ++ * old: |-----------| (bo_offset=n) ++ * ++ * 1 a 3 ++ * req: |-----------| (bo_offset=n+1) ++ * ++ * 0 a' 1 a 3 ++ * new: |-----|-----------| (a'.bo_offset=n, a.bo_offset=n+1) ++ * ++ * ++ * 11) Requested mapping's range is a centered subset of the existent one ++ * having a different backing BO. Hence, map the requested mapping and split ++ * up the existent one in two mappings, adjusting the BO offset of the right ++ * one accordingly. ++ * ++ * :: ++ * ++ * 0 a 3 ++ * old: |-----------------| (bo_offset=n) ++ * ++ * 1 b 2 ++ * req: |-----| (bo_offset=m) ++ * ++ * 0 a 1 b 2 a' 3 ++ * new: |-----|-----|-----| (a.bo_offset=n,b.bo_offset=m,a'.bo_offset=n+2) ++ * ++ * ++ * 12) Requested mapping is a contiguous subset of the existent one. Split it ++ * up, but indicate that the backing PTEs could be kept. ++ * ++ * :: ++ * ++ * 0 a 3 ++ * old: |-----------------| (bo_offset=n) ++ * ++ * 1 a 2 ++ * req: |-----| (bo_offset=n+1) ++ * ++ * 0 a' 1 a 2 a'' 3 ++ * old: |-----|-----|-----| (a'.bo_offset=n, a.bo_offset=n+1, a''.bo_offset=n+2) ++ * ++ * ++ * 13) Existent mapping is a right aligned subset of the requested one, hence ++ * replace the existent one. ++ * ++ * :: ++ * ++ * 1 a 2 ++ * old: |-----| (bo_offset=n+1) ++ * ++ * 0 a 2 ++ * req: |-----------| (bo_offset=n) ++ * ++ * 0 a 2 ++ * new: |-----------| (bo_offset=n) ++ * ++ * .. note:: ++ * We expect to see the same result for a request with a different bo ++ * and/or non-contiguous bo_offset. ++ * ++ * ++ * 14) Existent mapping is a centered subset of the requested one, hence ++ * replace the existent one. ++ * ++ * :: ++ * ++ * 1 a 2 ++ * old: |-----| (bo_offset=n+1) ++ * ++ * 0 a 3 ++ * req: |----------------| (bo_offset=n) ++ * ++ * 0 a 3 ++ * new: |----------------| (bo_offset=n) ++ * ++ * .. note:: ++ * We expect to see the same result for a request with a different bo ++ * and/or non-contiguous bo_offset. ++ * ++ * ++ * 15) Existent mappings is overlapped at the beginning by the requested mapping ++ * backed by a different BO. Hence, map the requested mapping and split up ++ * the existent one, adjusting its BO offset accordingly. ++ * ++ * :: ++ * ++ * 1 a 3 ++ * old: |-----------| (bo_offset=n) ++ * ++ * 0 b 2 ++ * req: |-----------| (bo_offset=m) ++ * ++ * 0 b 2 a' 3 ++ * new: |-----------|-----| (b.bo_offset=m,a.bo_offset=n+2) ++ */ ++ ++/** ++ * DOC: Locking ++ * ++ * Generally, the GPU VA manager does not take care of locking itself, it is ++ * the drivers responsibility to take care about locking. Drivers might want to ++ * protect the following operations: inserting, removing and iterating ++ * &drm_gpuva objects as well as generating all kinds of operations, such as ++ * split / merge or prefetch. ++ * ++ * The GPU VA manager also does not take care of the locking of the backing ++ * &drm_gem_object buffers GPU VA lists by itself; drivers are responsible to ++ * enforce mutual exclusion using either the GEMs dma_resv lock or alternatively ++ * a driver specific external lock. For the latter see also ++ * drm_gem_gpuva_set_lock(). ++ * ++ * However, the GPU VA manager contains lockdep checks to ensure callers of its ++ * API hold the corresponding lock whenever the &drm_gem_objects GPU VA list is ++ * accessed by functions such as drm_gpuva_link() or drm_gpuva_unlink(). ++ */ ++ ++/** ++ * DOC: Examples ++ * ++ * This section gives two examples on how to let the DRM GPUVA Manager generate ++ * &drm_gpuva_op in order to satisfy a given map or unmap request and how to ++ * make use of them. ++ * ++ * The below code is strictly limited to illustrate the generic usage pattern. ++ * To maintain simplicitly, it doesn't make use of any abstractions for common ++ * code, different (asyncronous) stages with fence signalling critical paths, ++ * any other helpers or error handling in terms of freeing memory and dropping ++ * previously taken locks. ++ * ++ * 1) Obtain a list of &drm_gpuva_op to create a new mapping:: ++ * ++ * // Allocates a new &drm_gpuva. ++ * struct drm_gpuva * driver_gpuva_alloc(void); ++ * ++ * // Typically drivers would embedd the &drm_gpuvm and &drm_gpuva ++ * // structure in individual driver structures and lock the dma-resv with ++ * // drm_exec or similar helpers. ++ * int driver_mapping_create(struct drm_gpuvm *gpuvm, ++ * u64 addr, u64 range, ++ * struct drm_gem_object *obj, u64 offset) ++ * { ++ * struct drm_gpuva_ops *ops; ++ * struct drm_gpuva_op *op ++ * ++ * driver_lock_va_space(); ++ * ops = drm_gpuvm_sm_map_ops_create(gpuvm, addr, range, ++ * obj, offset); ++ * if (IS_ERR(ops)) ++ * return PTR_ERR(ops); ++ * ++ * drm_gpuva_for_each_op(op, ops) { ++ * struct drm_gpuva *va; ++ * ++ * switch (op->op) { ++ * case DRM_GPUVA_OP_MAP: ++ * va = driver_gpuva_alloc(); ++ * if (!va) ++ * ; // unwind previous VA space updates, ++ * // free memory and unlock ++ * ++ * driver_vm_map(); ++ * drm_gpuva_map(gpuvm, va, &op->map); ++ * drm_gpuva_link(va); ++ * ++ * break; ++ * case DRM_GPUVA_OP_REMAP: { ++ * struct drm_gpuva *prev = NULL, *next = NULL; ++ * ++ * va = op->remap.unmap->va; ++ * ++ * if (op->remap.prev) { ++ * prev = driver_gpuva_alloc(); ++ * if (!prev) ++ * ; // unwind previous VA space ++ * // updates, free memory and ++ * // unlock ++ * } ++ * ++ * if (op->remap.next) { ++ * next = driver_gpuva_alloc(); ++ * if (!next) ++ * ; // unwind previous VA space ++ * // updates, free memory and ++ * // unlock ++ * } ++ * ++ * driver_vm_remap(); ++ * drm_gpuva_remap(prev, next, &op->remap); ++ * ++ * drm_gpuva_unlink(va); ++ * if (prev) ++ * drm_gpuva_link(prev); ++ * if (next) ++ * drm_gpuva_link(next); ++ * ++ * break; ++ * } ++ * case DRM_GPUVA_OP_UNMAP: ++ * va = op->unmap->va; ++ * ++ * driver_vm_unmap(); ++ * drm_gpuva_unlink(va); ++ * drm_gpuva_unmap(&op->unmap); ++ * ++ * break; ++ * default: ++ * break; ++ * } ++ * } ++ * driver_unlock_va_space(); ++ * ++ * return 0; ++ * } ++ * ++ * 2) Receive a callback for each &drm_gpuva_op to create a new mapping:: ++ * ++ * struct driver_context { ++ * struct drm_gpuvm *gpuvm; ++ * struct drm_gpuva *new_va; ++ * struct drm_gpuva *prev_va; ++ * struct drm_gpuva *next_va; ++ * }; ++ * ++ * // ops to pass to drm_gpuvm_init() ++ * static const struct drm_gpuvm_ops driver_gpuvm_ops = { ++ * .sm_step_map = driver_gpuva_map, ++ * .sm_step_remap = driver_gpuva_remap, ++ * .sm_step_unmap = driver_gpuva_unmap, ++ * }; ++ * ++ * // Typically drivers would embedd the &drm_gpuvm and &drm_gpuva ++ * // structure in individual driver structures and lock the dma-resv with ++ * // drm_exec or similar helpers. ++ * int driver_mapping_create(struct drm_gpuvm *gpuvm, ++ * u64 addr, u64 range, ++ * struct drm_gem_object *obj, u64 offset) ++ * { ++ * struct driver_context ctx; ++ * struct drm_gpuva_ops *ops; ++ * struct drm_gpuva_op *op; ++ * int ret = 0; ++ * ++ * ctx.gpuvm = gpuvm; ++ * ++ * ctx.new_va = kzalloc(sizeof(*ctx.new_va), GFP_KERNEL); ++ * ctx.prev_va = kzalloc(sizeof(*ctx.prev_va), GFP_KERNEL); ++ * ctx.next_va = kzalloc(sizeof(*ctx.next_va), GFP_KERNEL); ++ * if (!ctx.new_va || !ctx.prev_va || !ctx.next_va) { ++ * ret = -ENOMEM; ++ * goto out; ++ * } ++ * ++ * driver_lock_va_space(); ++ * ret = drm_gpuvm_sm_map(gpuvm, &ctx, addr, range, obj, offset); ++ * driver_unlock_va_space(); ++ * ++ * out: ++ * kfree(ctx.new_va); ++ * kfree(ctx.prev_va); ++ * kfree(ctx.next_va); ++ * return ret; ++ * } ++ * ++ * int driver_gpuva_map(struct drm_gpuva_op *op, void *__ctx) ++ * { ++ * struct driver_context *ctx = __ctx; ++ * ++ * drm_gpuva_map(ctx->vm, ctx->new_va, &op->map); ++ * ++ * drm_gpuva_link(ctx->new_va); ++ * ++ * // prevent the new GPUVA from being freed in ++ * // driver_mapping_create() ++ * ctx->new_va = NULL; ++ * ++ * return 0; ++ * } ++ * ++ * int driver_gpuva_remap(struct drm_gpuva_op *op, void *__ctx) ++ * { ++ * struct driver_context *ctx = __ctx; ++ * ++ * drm_gpuva_remap(ctx->prev_va, ctx->next_va, &op->remap); ++ * ++ * drm_gpuva_unlink(op->remap.unmap->va); ++ * kfree(op->remap.unmap->va); ++ * ++ * if (op->remap.prev) { ++ * drm_gpuva_link(ctx->prev_va); ++ * ctx->prev_va = NULL; ++ * } ++ * ++ * if (op->remap.next) { ++ * drm_gpuva_link(ctx->next_va); ++ * ctx->next_va = NULL; ++ * } ++ * ++ * return 0; ++ * } ++ * ++ * int driver_gpuva_unmap(struct drm_gpuva_op *op, void *__ctx) ++ * { ++ * drm_gpuva_unlink(op->unmap.va); ++ * drm_gpuva_unmap(&op->unmap); ++ * kfree(op->unmap.va); ++ * ++ * return 0; ++ * } ++ */ ++ ++#define to_drm_gpuva(__node) container_of((__node), struct drm_gpuva, rb.node) ++ ++#define GPUVA_START(node) ((node)->va.addr) ++#define GPUVA_LAST(node) ((node)->va.addr + (node)->va.range - 1) ++ ++/* We do not actually use drm_gpuva_it_next(), tell the compiler to not complain ++ * about this. ++ */ ++INTERVAL_TREE_DEFINE(struct drm_gpuva, rb.node, u64, rb.__subtree_last, ++ GPUVA_START, GPUVA_LAST, static __maybe_unused, ++ drm_gpuva_it) ++ ++static int __drm_gpuva_insert(struct drm_gpuvm *gpuvm, ++ struct drm_gpuva *va); ++static void __drm_gpuva_remove(struct drm_gpuva *va); ++ ++static bool ++drm_gpuvm_check_overflow(u64 addr, u64 range) ++{ ++ u64 end; ++ ++ return WARN(check_add_overflow(addr, range, &end), ++ "GPUVA address limited to %zu bytes.\n", sizeof(end)); ++} ++ ++static bool ++drm_gpuvm_in_mm_range(struct drm_gpuvm *gpuvm, u64 addr, u64 range) ++{ ++ u64 end = addr + range; ++ u64 mm_start = gpuvm->mm_start; ++ u64 mm_end = mm_start + gpuvm->mm_range; ++ ++ return addr >= mm_start && end <= mm_end; ++} ++ ++static bool ++drm_gpuvm_in_kernel_node(struct drm_gpuvm *gpuvm, u64 addr, u64 range) ++{ ++ u64 end = addr + range; ++ u64 kstart = gpuvm->kernel_alloc_node.va.addr; ++ u64 krange = gpuvm->kernel_alloc_node.va.range; ++ u64 kend = kstart + krange; ++ ++ return krange && addr < kend && kstart < end; ++} ++ ++static bool ++drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm, ++ u64 addr, u64 range) ++{ ++ return !drm_gpuvm_check_overflow(addr, range) && ++ drm_gpuvm_in_mm_range(gpuvm, addr, range) && ++ !drm_gpuvm_in_kernel_node(gpuvm, addr, range); ++} ++ ++/** ++ * drm_gpuvm_init() - initialize a &drm_gpuvm ++ * @gpuvm: pointer to the &drm_gpuvm to initialize ++ * @name: the name of the GPU VA space ++ * @start_offset: the start offset of the GPU VA space ++ * @range: the size of the GPU VA space ++ * @reserve_offset: the start of the kernel reserved GPU VA area ++ * @reserve_range: the size of the kernel reserved GPU VA area ++ * @ops: &drm_gpuvm_ops called on &drm_gpuvm_sm_map / &drm_gpuvm_sm_unmap ++ * ++ * The &drm_gpuvm must be initialized with this function before use. ++ * ++ * Note that @gpuvm must be cleared to 0 before calling this function. The given ++ * &name is expected to be managed by the surrounding driver structures. ++ */ ++void ++drm_gpuvm_init(struct drm_gpuvm *gpuvm, ++ const char *name, ++ u64 start_offset, u64 range, ++ u64 reserve_offset, u64 reserve_range, ++ const struct drm_gpuvm_ops *ops) ++{ ++ gpuvm->rb.tree = RB_ROOT_CACHED; ++ INIT_LIST_HEAD(&gpuvm->rb.list); ++ ++ drm_gpuvm_check_overflow(start_offset, range); ++ gpuvm->mm_start = start_offset; ++ gpuvm->mm_range = range; ++ ++ gpuvm->name = name ? name : "unknown"; ++ gpuvm->ops = ops; ++ ++ memset(&gpuvm->kernel_alloc_node, 0, sizeof(struct drm_gpuva)); ++ ++ if (reserve_range) { ++ gpuvm->kernel_alloc_node.va.addr = reserve_offset; ++ gpuvm->kernel_alloc_node.va.range = reserve_range; ++ ++ if (likely(!drm_gpuvm_check_overflow(reserve_offset, ++ reserve_range))) ++ __drm_gpuva_insert(gpuvm, &gpuvm->kernel_alloc_node); ++ } ++} ++EXPORT_SYMBOL_GPL(drm_gpuvm_init); ++ ++/** ++ * drm_gpuvm_destroy() - cleanup a &drm_gpuvm ++ * @gpuvm: pointer to the &drm_gpuvm to clean up ++ * ++ * Note that it is a bug to call this function on a manager that still ++ * holds GPU VA mappings. ++ */ ++void ++drm_gpuvm_destroy(struct drm_gpuvm *gpuvm) ++{ ++ gpuvm->name = NULL; ++ ++ if (gpuvm->kernel_alloc_node.va.range) ++ __drm_gpuva_remove(&gpuvm->kernel_alloc_node); ++ ++ WARN(!RB_EMPTY_ROOT(&gpuvm->rb.tree.rb_root), ++ "GPUVA tree is not empty, potentially leaking memory."); ++} ++EXPORT_SYMBOL_GPL(drm_gpuvm_destroy); ++ ++static int ++__drm_gpuva_insert(struct drm_gpuvm *gpuvm, ++ struct drm_gpuva *va) ++{ ++ struct rb_node *node; ++ struct list_head *head; ++ ++ if (drm_gpuva_it_iter_first(&gpuvm->rb.tree, ++ GPUVA_START(va), ++ GPUVA_LAST(va))) ++ return -EEXIST; ++ ++ va->vm = gpuvm; ++ ++ drm_gpuva_it_insert(va, &gpuvm->rb.tree); ++ ++ node = rb_prev(&va->rb.node); ++ if (node) ++ head = &(to_drm_gpuva(node))->rb.entry; ++ else ++ head = &gpuvm->rb.list; ++ ++ list_add(&va->rb.entry, head); ++ ++ return 0; ++} ++ ++/** ++ * drm_gpuva_insert() - insert a &drm_gpuva ++ * @gpuvm: the &drm_gpuvm to insert the &drm_gpuva in ++ * @va: the &drm_gpuva to insert ++ * ++ * Insert a &drm_gpuva with a given address and range into a ++ * &drm_gpuvm. ++ * ++ * It is safe to use this function using the safe versions of iterating the GPU ++ * VA space, such as drm_gpuvm_for_each_va_safe() and ++ * drm_gpuvm_for_each_va_range_safe(). ++ * ++ * Returns: 0 on success, negative error code on failure. ++ */ ++int ++drm_gpuva_insert(struct drm_gpuvm *gpuvm, ++ struct drm_gpuva *va) ++{ ++ u64 addr = va->va.addr; ++ u64 range = va->va.range; ++ ++ if (unlikely(!drm_gpuvm_range_valid(gpuvm, addr, range))) ++ return -EINVAL; ++ ++ return __drm_gpuva_insert(gpuvm, va); ++} ++EXPORT_SYMBOL_GPL(drm_gpuva_insert); ++ ++static void ++__drm_gpuva_remove(struct drm_gpuva *va) ++{ ++ drm_gpuva_it_remove(va, &va->vm->rb.tree); ++ list_del_init(&va->rb.entry); ++} ++ ++/** ++ * drm_gpuva_remove() - remove a &drm_gpuva ++ * @va: the &drm_gpuva to remove ++ * ++ * This removes the given &va from the underlaying tree. ++ * ++ * It is safe to use this function using the safe versions of iterating the GPU ++ * VA space, such as drm_gpuvm_for_each_va_safe() and ++ * drm_gpuvm_for_each_va_range_safe(). ++ */ ++void ++drm_gpuva_remove(struct drm_gpuva *va) ++{ ++ struct drm_gpuvm *gpuvm = va->vm; ++ ++ if (unlikely(va == &gpuvm->kernel_alloc_node)) { ++ WARN(1, "Can't destroy kernel reserved node.\n"); ++ return; ++ } ++ ++ __drm_gpuva_remove(va); ++} ++EXPORT_SYMBOL_GPL(drm_gpuva_remove); ++ ++/** ++ * drm_gpuva_link() - link a &drm_gpuva ++ * @va: the &drm_gpuva to link ++ * ++ * This adds the given &va to the GPU VA list of the &drm_gem_object it is ++ * associated with. ++ * ++ * This function expects the caller to protect the GEM's GPUVA list against ++ * concurrent access using the GEMs dma_resv lock. ++ */ ++void ++drm_gpuva_link(struct drm_gpuva *va) ++{ ++ struct drm_gem_object *obj = va->gem.obj; ++ ++ if (unlikely(!obj)) ++ return; ++ ++ drm_gem_gpuva_assert_lock_held(obj); ++ ++ list_add_tail(&va->gem.entry, &obj->gpuva.list); ++} ++EXPORT_SYMBOL_GPL(drm_gpuva_link); ++ ++/** ++ * drm_gpuva_unlink() - unlink a &drm_gpuva ++ * @va: the &drm_gpuva to unlink ++ * ++ * This removes the given &va from the GPU VA list of the &drm_gem_object it is ++ * associated with. ++ * ++ * This function expects the caller to protect the GEM's GPUVA list against ++ * concurrent access using the GEMs dma_resv lock. ++ */ ++void ++drm_gpuva_unlink(struct drm_gpuva *va) ++{ ++ struct drm_gem_object *obj = va->gem.obj; ++ ++ if (unlikely(!obj)) ++ return; ++ ++ drm_gem_gpuva_assert_lock_held(obj); ++ ++ list_del_init(&va->gem.entry); ++} ++EXPORT_SYMBOL_GPL(drm_gpuva_unlink); ++ ++/** ++ * drm_gpuva_find_first() - find the first &drm_gpuva in the given range ++ * @gpuvm: the &drm_gpuvm to search in ++ * @addr: the &drm_gpuvas address ++ * @range: the &drm_gpuvas range ++ * ++ * Returns: the first &drm_gpuva within the given range ++ */ ++struct drm_gpuva * ++drm_gpuva_find_first(struct drm_gpuvm *gpuvm, ++ u64 addr, u64 range) ++{ ++ u64 last = addr + range - 1; ++ ++ return drm_gpuva_it_iter_first(&gpuvm->rb.tree, addr, last); ++} ++EXPORT_SYMBOL_GPL(drm_gpuva_find_first); ++ ++/** ++ * drm_gpuva_find() - find a &drm_gpuva ++ * @gpuvm: the &drm_gpuvm to search in ++ * @addr: the &drm_gpuvas address ++ * @range: the &drm_gpuvas range ++ * ++ * Returns: the &drm_gpuva at a given &addr and with a given &range ++ */ ++struct drm_gpuva * ++drm_gpuva_find(struct drm_gpuvm *gpuvm, ++ u64 addr, u64 range) ++{ ++ struct drm_gpuva *va; ++ ++ va = drm_gpuva_find_first(gpuvm, addr, range); ++ if (!va) ++ goto out; ++ ++ if (va->va.addr != addr || ++ va->va.range != range) ++ goto out; ++ ++ return va; ++ ++out: ++ return NULL; ++} ++EXPORT_SYMBOL_GPL(drm_gpuva_find); ++ ++/** ++ * drm_gpuva_find_prev() - find the &drm_gpuva before the given address ++ * @gpuvm: the &drm_gpuvm to search in ++ * @start: the given GPU VA's start address ++ * ++ * Find the adjacent &drm_gpuva before the GPU VA with given &start address. ++ * ++ * Note that if there is any free space between the GPU VA mappings no mapping ++ * is returned. ++ * ++ * Returns: a pointer to the found &drm_gpuva or NULL if none was found ++ */ ++struct drm_gpuva * ++drm_gpuva_find_prev(struct drm_gpuvm *gpuvm, u64 start) ++{ ++ if (!drm_gpuvm_range_valid(gpuvm, start - 1, 1)) ++ return NULL; ++ ++ return drm_gpuva_it_iter_first(&gpuvm->rb.tree, start - 1, start); ++} ++EXPORT_SYMBOL_GPL(drm_gpuva_find_prev); ++ ++/** ++ * drm_gpuva_find_next() - find the &drm_gpuva after the given address ++ * @gpuvm: the &drm_gpuvm to search in ++ * @end: the given GPU VA's end address ++ * ++ * Find the adjacent &drm_gpuva after the GPU VA with given &end address. ++ * ++ * Note that if there is any free space between the GPU VA mappings no mapping ++ * is returned. ++ * ++ * Returns: a pointer to the found &drm_gpuva or NULL if none was found ++ */ ++struct drm_gpuva * ++drm_gpuva_find_next(struct drm_gpuvm *gpuvm, u64 end) ++{ ++ if (!drm_gpuvm_range_valid(gpuvm, end, 1)) ++ return NULL; ++ ++ return drm_gpuva_it_iter_first(&gpuvm->rb.tree, end, end + 1); ++} ++EXPORT_SYMBOL_GPL(drm_gpuva_find_next); ++ ++/** ++ * drm_gpuvm_interval_empty() - indicate whether a given interval of the VA space ++ * is empty ++ * @gpuvm: the &drm_gpuvm to check the range for ++ * @addr: the start address of the range ++ * @range: the range of the interval ++ * ++ * Returns: true if the interval is empty, false otherwise ++ */ ++bool ++drm_gpuvm_interval_empty(struct drm_gpuvm *gpuvm, u64 addr, u64 range) ++{ ++ return !drm_gpuva_find_first(gpuvm, addr, range); ++} ++EXPORT_SYMBOL_GPL(drm_gpuvm_interval_empty); ++ ++/** ++ * drm_gpuva_map() - helper to insert a &drm_gpuva according to a ++ * &drm_gpuva_op_map ++ * @gpuvm: the &drm_gpuvm ++ * @va: the &drm_gpuva to insert ++ * @op: the &drm_gpuva_op_map to initialize @va with ++ * ++ * Initializes the @va from the @op and inserts it into the given @gpuvm. ++ */ ++void ++drm_gpuva_map(struct drm_gpuvm *gpuvm, ++ struct drm_gpuva *va, ++ struct drm_gpuva_op_map *op) ++{ ++ drm_gpuva_init_from_op(va, op); ++ drm_gpuva_insert(gpuvm, va); ++} ++EXPORT_SYMBOL_GPL(drm_gpuva_map); ++ ++/** ++ * drm_gpuva_remap() - helper to remap a &drm_gpuva according to a ++ * &drm_gpuva_op_remap ++ * @prev: the &drm_gpuva to remap when keeping the start of a mapping ++ * @next: the &drm_gpuva to remap when keeping the end of a mapping ++ * @op: the &drm_gpuva_op_remap to initialize @prev and @next with ++ * ++ * Removes the currently mapped &drm_gpuva and remaps it using @prev and/or ++ * @next. ++ */ ++void ++drm_gpuva_remap(struct drm_gpuva *prev, ++ struct drm_gpuva *next, ++ struct drm_gpuva_op_remap *op) ++{ ++ struct drm_gpuva *curr = op->unmap->va; ++ struct drm_gpuvm *gpuvm = curr->vm; ++ ++ drm_gpuva_remove(curr); ++ ++ if (op->prev) { ++ drm_gpuva_init_from_op(prev, op->prev); ++ drm_gpuva_insert(gpuvm, prev); ++ } ++ ++ if (op->next) { ++ drm_gpuva_init_from_op(next, op->next); ++ drm_gpuva_insert(gpuvm, next); ++ } ++} ++EXPORT_SYMBOL_GPL(drm_gpuva_remap); ++ ++/** ++ * drm_gpuva_unmap() - helper to remove a &drm_gpuva according to a ++ * &drm_gpuva_op_unmap ++ * @op: the &drm_gpuva_op_unmap specifying the &drm_gpuva to remove ++ * ++ * Removes the &drm_gpuva associated with the &drm_gpuva_op_unmap. ++ */ ++void ++drm_gpuva_unmap(struct drm_gpuva_op_unmap *op) ++{ ++ drm_gpuva_remove(op->va); ++} ++EXPORT_SYMBOL_GPL(drm_gpuva_unmap); ++ ++static int ++op_map_cb(const struct drm_gpuvm_ops *fn, void *priv, ++ u64 addr, u64 range, ++ struct drm_gem_object *obj, u64 offset) ++{ ++ struct drm_gpuva_op op = {}; ++ ++ op.op = DRM_GPUVA_OP_MAP; ++ op.map.va.addr = addr; ++ op.map.va.range = range; ++ op.map.gem.obj = obj; ++ op.map.gem.offset = offset; ++ ++ return fn->sm_step_map(&op, priv); ++} ++ ++static int ++op_remap_cb(const struct drm_gpuvm_ops *fn, void *priv, ++ struct drm_gpuva_op_map *prev, ++ struct drm_gpuva_op_map *next, ++ struct drm_gpuva_op_unmap *unmap) ++{ ++ struct drm_gpuva_op op = {}; ++ struct drm_gpuva_op_remap *r; ++ ++ op.op = DRM_GPUVA_OP_REMAP; ++ r = &op.remap; ++ r->prev = prev; ++ r->next = next; ++ r->unmap = unmap; ++ ++ return fn->sm_step_remap(&op, priv); ++} ++ ++static int ++op_unmap_cb(const struct drm_gpuvm_ops *fn, void *priv, ++ struct drm_gpuva *va, bool merge) ++{ ++ struct drm_gpuva_op op = {}; ++ ++ op.op = DRM_GPUVA_OP_UNMAP; ++ op.unmap.va = va; ++ op.unmap.keep = merge; ++ ++ return fn->sm_step_unmap(&op, priv); ++} ++ ++static int ++__drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, ++ const struct drm_gpuvm_ops *ops, void *priv, ++ u64 req_addr, u64 req_range, ++ struct drm_gem_object *req_obj, u64 req_offset) ++{ ++ struct drm_gpuva *va, *next; ++ u64 req_end = req_addr + req_range; ++ int ret; ++ ++ if (unlikely(!drm_gpuvm_range_valid(gpuvm, req_addr, req_range))) ++ return -EINVAL; ++ ++ drm_gpuvm_for_each_va_range_safe(va, next, gpuvm, req_addr, req_end) { ++ struct drm_gem_object *obj = va->gem.obj; ++ u64 offset = va->gem.offset; ++ u64 addr = va->va.addr; ++ u64 range = va->va.range; ++ u64 end = addr + range; ++ bool merge = !!va->gem.obj; ++ ++ if (addr == req_addr) { ++ merge &= obj == req_obj && ++ offset == req_offset; ++ ++ if (end == req_end) { ++ ret = op_unmap_cb(ops, priv, va, merge); ++ if (ret) ++ return ret; ++ break; ++ } ++ ++ if (end < req_end) { ++ ret = op_unmap_cb(ops, priv, va, merge); ++ if (ret) ++ return ret; ++ continue; ++ } ++ ++ if (end > req_end) { ++ struct drm_gpuva_op_map n = { ++ .va.addr = req_end, ++ .va.range = range - req_range, ++ .gem.obj = obj, ++ .gem.offset = offset + req_range, ++ }; ++ struct drm_gpuva_op_unmap u = { ++ .va = va, ++ .keep = merge, ++ }; ++ ++ ret = op_remap_cb(ops, priv, NULL, &n, &u); ++ if (ret) ++ return ret; ++ break; ++ } ++ } else if (addr < req_addr) { ++ u64 ls_range = req_addr - addr; ++ struct drm_gpuva_op_map p = { ++ .va.addr = addr, ++ .va.range = ls_range, ++ .gem.obj = obj, ++ .gem.offset = offset, ++ }; ++ struct drm_gpuva_op_unmap u = { .va = va }; ++ ++ merge &= obj == req_obj && ++ offset + ls_range == req_offset; ++ u.keep = merge; ++ ++ if (end == req_end) { ++ ret = op_remap_cb(ops, priv, &p, NULL, &u); ++ if (ret) ++ return ret; ++ break; ++ } ++ ++ if (end < req_end) { ++ ret = op_remap_cb(ops, priv, &p, NULL, &u); ++ if (ret) ++ return ret; ++ continue; ++ } ++ ++ if (end > req_end) { ++ struct drm_gpuva_op_map n = { ++ .va.addr = req_end, ++ .va.range = end - req_end, ++ .gem.obj = obj, ++ .gem.offset = offset + ls_range + ++ req_range, ++ }; ++ ++ ret = op_remap_cb(ops, priv, &p, &n, &u); ++ if (ret) ++ return ret; ++ break; ++ } ++ } else if (addr > req_addr) { ++ merge &= obj == req_obj && ++ offset == req_offset + ++ (addr - req_addr); ++ ++ if (end == req_end) { ++ ret = op_unmap_cb(ops, priv, va, merge); ++ if (ret) ++ return ret; ++ break; ++ } ++ ++ if (end < req_end) { ++ ret = op_unmap_cb(ops, priv, va, merge); ++ if (ret) ++ return ret; ++ continue; ++ } ++ ++ if (end > req_end) { ++ struct drm_gpuva_op_map n = { ++ .va.addr = req_end, ++ .va.range = end - req_end, ++ .gem.obj = obj, ++ .gem.offset = offset + req_end - addr, ++ }; ++ struct drm_gpuva_op_unmap u = { ++ .va = va, ++ .keep = merge, ++ }; ++ ++ ret = op_remap_cb(ops, priv, NULL, &n, &u); ++ if (ret) ++ return ret; ++ break; ++ } ++ } ++ } ++ ++ return op_map_cb(ops, priv, ++ req_addr, req_range, ++ req_obj, req_offset); ++} ++ ++static int ++__drm_gpuvm_sm_unmap(struct drm_gpuvm *gpuvm, ++ const struct drm_gpuvm_ops *ops, void *priv, ++ u64 req_addr, u64 req_range) ++{ ++ struct drm_gpuva *va, *next; ++ u64 req_end = req_addr + req_range; ++ int ret; ++ ++ if (unlikely(!drm_gpuvm_range_valid(gpuvm, req_addr, req_range))) ++ return -EINVAL; ++ ++ drm_gpuvm_for_each_va_range_safe(va, next, gpuvm, req_addr, req_end) { ++ struct drm_gpuva_op_map prev = {}, next = {}; ++ bool prev_split = false, next_split = false; ++ struct drm_gem_object *obj = va->gem.obj; ++ u64 offset = va->gem.offset; ++ u64 addr = va->va.addr; ++ u64 range = va->va.range; ++ u64 end = addr + range; ++ ++ if (addr < req_addr) { ++ prev.va.addr = addr; ++ prev.va.range = req_addr - addr; ++ prev.gem.obj = obj; ++ prev.gem.offset = offset; ++ ++ prev_split = true; ++ } ++ ++ if (end > req_end) { ++ next.va.addr = req_end; ++ next.va.range = end - req_end; ++ next.gem.obj = obj; ++ next.gem.offset = offset + (req_end - addr); ++ ++ next_split = true; ++ } ++ ++ if (prev_split || next_split) { ++ struct drm_gpuva_op_unmap unmap = { .va = va }; ++ ++ ret = op_remap_cb(ops, priv, ++ prev_split ? &prev : NULL, ++ next_split ? &next : NULL, ++ &unmap); ++ if (ret) ++ return ret; ++ } else { ++ ret = op_unmap_cb(ops, priv, va, false); ++ if (ret) ++ return ret; ++ } ++ } ++ ++ return 0; ++} ++ ++/** ++ * drm_gpuvm_sm_map() - creates the &drm_gpuva_op split/merge steps ++ * @gpuvm: the &drm_gpuvm representing the GPU VA space ++ * @req_addr: the start address of the new mapping ++ * @req_range: the range of the new mapping ++ * @req_obj: the &drm_gem_object to map ++ * @req_offset: the offset within the &drm_gem_object ++ * @priv: pointer to a driver private data structure ++ * ++ * This function iterates the given range of the GPU VA space. It utilizes the ++ * &drm_gpuvm_ops to call back into the driver providing the split and merge ++ * steps. ++ * ++ * Drivers may use these callbacks to update the GPU VA space right away within ++ * the callback. In case the driver decides to copy and store the operations for ++ * later processing neither this function nor &drm_gpuvm_sm_unmap is allowed to ++ * be called before the &drm_gpuvm's view of the GPU VA space was ++ * updated with the previous set of operations. To update the ++ * &drm_gpuvm's view of the GPU VA space drm_gpuva_insert(), ++ * drm_gpuva_destroy_locked() and/or drm_gpuva_destroy_unlocked() should be ++ * used. ++ * ++ * A sequence of callbacks can contain map, unmap and remap operations, but ++ * the sequence of callbacks might also be empty if no operation is required, ++ * e.g. if the requested mapping already exists in the exact same way. ++ * ++ * There can be an arbitrary amount of unmap operations, a maximum of two remap ++ * operations and a single map operation. The latter one represents the original ++ * map operation requested by the caller. ++ * ++ * Returns: 0 on success or a negative error code ++ */ ++int ++drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, void *priv, ++ u64 req_addr, u64 req_range, ++ struct drm_gem_object *req_obj, u64 req_offset) ++{ ++ const struct drm_gpuvm_ops *ops = gpuvm->ops; ++ ++ if (unlikely(!(ops && ops->sm_step_map && ++ ops->sm_step_remap && ++ ops->sm_step_unmap))) ++ return -EINVAL; ++ ++ return __drm_gpuvm_sm_map(gpuvm, ops, priv, ++ req_addr, req_range, ++ req_obj, req_offset); ++} ++EXPORT_SYMBOL_GPL(drm_gpuvm_sm_map); ++ ++/** ++ * drm_gpuvm_sm_unmap() - creates the &drm_gpuva_ops to split on unmap ++ * @gpuvm: the &drm_gpuvm representing the GPU VA space ++ * @priv: pointer to a driver private data structure ++ * @req_addr: the start address of the range to unmap ++ * @req_range: the range of the mappings to unmap ++ * ++ * This function iterates the given range of the GPU VA space. It utilizes the ++ * &drm_gpuvm_ops to call back into the driver providing the operations to ++ * unmap and, if required, split existent mappings. ++ * ++ * Drivers may use these callbacks to update the GPU VA space right away within ++ * the callback. In case the driver decides to copy and store the operations for ++ * later processing neither this function nor &drm_gpuvm_sm_map is allowed to be ++ * called before the &drm_gpuvm's view of the GPU VA space was updated ++ * with the previous set of operations. To update the &drm_gpuvm's view ++ * of the GPU VA space drm_gpuva_insert(), drm_gpuva_destroy_locked() and/or ++ * drm_gpuva_destroy_unlocked() should be used. ++ * ++ * A sequence of callbacks can contain unmap and remap operations, depending on ++ * whether there are actual overlapping mappings to split. ++ * ++ * There can be an arbitrary amount of unmap operations and a maximum of two ++ * remap operations. ++ * ++ * Returns: 0 on success or a negative error code ++ */ ++int ++drm_gpuvm_sm_unmap(struct drm_gpuvm *gpuvm, void *priv, ++ u64 req_addr, u64 req_range) ++{ ++ const struct drm_gpuvm_ops *ops = gpuvm->ops; ++ ++ if (unlikely(!(ops && ops->sm_step_remap && ++ ops->sm_step_unmap))) ++ return -EINVAL; ++ ++ return __drm_gpuvm_sm_unmap(gpuvm, ops, priv, ++ req_addr, req_range); ++} ++EXPORT_SYMBOL_GPL(drm_gpuvm_sm_unmap); ++ ++static struct drm_gpuva_op * ++gpuva_op_alloc(struct drm_gpuvm *gpuvm) ++{ ++ const struct drm_gpuvm_ops *fn = gpuvm->ops; ++ struct drm_gpuva_op *op; ++ ++ if (fn && fn->op_alloc) ++ op = fn->op_alloc(); ++ else ++ op = kzalloc(sizeof(*op), GFP_KERNEL); ++ ++ if (unlikely(!op)) ++ return NULL; ++ ++ return op; ++} ++ ++static void ++gpuva_op_free(struct drm_gpuvm *gpuvm, ++ struct drm_gpuva_op *op) ++{ ++ const struct drm_gpuvm_ops *fn = gpuvm->ops; ++ ++ if (fn && fn->op_free) ++ fn->op_free(op); ++ else ++ kfree(op); ++} ++ ++static int ++drm_gpuva_sm_step(struct drm_gpuva_op *__op, ++ void *priv) ++{ ++ struct { ++ struct drm_gpuvm *vm; ++ struct drm_gpuva_ops *ops; ++ } *args = priv; ++ struct drm_gpuvm *gpuvm = args->vm; ++ struct drm_gpuva_ops *ops = args->ops; ++ struct drm_gpuva_op *op; ++ ++ op = gpuva_op_alloc(gpuvm); ++ if (unlikely(!op)) ++ goto err; ++ ++ memcpy(op, __op, sizeof(*op)); ++ ++ if (op->op == DRM_GPUVA_OP_REMAP) { ++ struct drm_gpuva_op_remap *__r = &__op->remap; ++ struct drm_gpuva_op_remap *r = &op->remap; ++ ++ r->unmap = kmemdup(__r->unmap, sizeof(*r->unmap), ++ GFP_KERNEL); ++ if (unlikely(!r->unmap)) ++ goto err_free_op; ++ ++ if (__r->prev) { ++ r->prev = kmemdup(__r->prev, sizeof(*r->prev), ++ GFP_KERNEL); ++ if (unlikely(!r->prev)) ++ goto err_free_unmap; ++ } ++ ++ if (__r->next) { ++ r->next = kmemdup(__r->next, sizeof(*r->next), ++ GFP_KERNEL); ++ if (unlikely(!r->next)) ++ goto err_free_prev; ++ } ++ } ++ ++ list_add_tail(&op->entry, &ops->list); ++ ++ return 0; ++ ++err_free_unmap: ++ kfree(op->remap.unmap); ++err_free_prev: ++ kfree(op->remap.prev); ++err_free_op: ++ gpuva_op_free(gpuvm, op); ++err: ++ return -ENOMEM; ++} ++ ++static const struct drm_gpuvm_ops gpuvm_list_ops = { ++ .sm_step_map = drm_gpuva_sm_step, ++ .sm_step_remap = drm_gpuva_sm_step, ++ .sm_step_unmap = drm_gpuva_sm_step, ++}; ++ ++/** ++ * drm_gpuvm_sm_map_ops_create() - creates the &drm_gpuva_ops to split and merge ++ * @gpuvm: the &drm_gpuvm representing the GPU VA space ++ * @req_addr: the start address of the new mapping ++ * @req_range: the range of the new mapping ++ * @req_obj: the &drm_gem_object to map ++ * @req_offset: the offset within the &drm_gem_object ++ * ++ * This function creates a list of operations to perform splitting and merging ++ * of existent mapping(s) with the newly requested one. ++ * ++ * The list can be iterated with &drm_gpuva_for_each_op and must be processed ++ * in the given order. It can contain map, unmap and remap operations, but it ++ * also can be empty if no operation is required, e.g. if the requested mapping ++ * already exists is the exact same way. ++ * ++ * There can be an arbitrary amount of unmap operations, a maximum of two remap ++ * operations and a single map operation. The latter one represents the original ++ * map operation requested by the caller. ++ * ++ * Note that before calling this function again with another mapping request it ++ * is necessary to update the &drm_gpuvm's view of the GPU VA space. The ++ * previously obtained operations must be either processed or abandoned. To ++ * update the &drm_gpuvm's view of the GPU VA space drm_gpuva_insert(), ++ * drm_gpuva_destroy_locked() and/or drm_gpuva_destroy_unlocked() should be ++ * used. ++ * ++ * After the caller finished processing the returned &drm_gpuva_ops, they must ++ * be freed with &drm_gpuva_ops_free. ++ * ++ * Returns: a pointer to the &drm_gpuva_ops on success, an ERR_PTR on failure ++ */ ++struct drm_gpuva_ops * ++drm_gpuvm_sm_map_ops_create(struct drm_gpuvm *gpuvm, ++ u64 req_addr, u64 req_range, ++ struct drm_gem_object *req_obj, u64 req_offset) ++{ ++ struct drm_gpuva_ops *ops; ++ struct { ++ struct drm_gpuvm *vm; ++ struct drm_gpuva_ops *ops; ++ } args; ++ int ret; ++ ++ ops = kzalloc(sizeof(*ops), GFP_KERNEL); ++ if (unlikely(!ops)) ++ return ERR_PTR(-ENOMEM); ++ ++ INIT_LIST_HEAD(&ops->list); ++ ++ args.vm = gpuvm; ++ args.ops = ops; ++ ++ ret = __drm_gpuvm_sm_map(gpuvm, &gpuvm_list_ops, &args, ++ req_addr, req_range, ++ req_obj, req_offset); ++ if (ret) ++ goto err_free_ops; ++ ++ return ops; ++ ++err_free_ops: ++ drm_gpuva_ops_free(gpuvm, ops); ++ return ERR_PTR(ret); ++} ++EXPORT_SYMBOL_GPL(drm_gpuvm_sm_map_ops_create); ++ ++/** ++ * drm_gpuvm_sm_unmap_ops_create() - creates the &drm_gpuva_ops to split on ++ * unmap ++ * @gpuvm: the &drm_gpuvm representing the GPU VA space ++ * @req_addr: the start address of the range to unmap ++ * @req_range: the range of the mappings to unmap ++ * ++ * This function creates a list of operations to perform unmapping and, if ++ * required, splitting of the mappings overlapping the unmap range. ++ * ++ * The list can be iterated with &drm_gpuva_for_each_op and must be processed ++ * in the given order. It can contain unmap and remap operations, depending on ++ * whether there are actual overlapping mappings to split. ++ * ++ * There can be an arbitrary amount of unmap operations and a maximum of two ++ * remap operations. ++ * ++ * Note that before calling this function again with another range to unmap it ++ * is necessary to update the &drm_gpuvm's view of the GPU VA space. The ++ * previously obtained operations must be processed or abandoned. To update the ++ * &drm_gpuvm's view of the GPU VA space drm_gpuva_insert(), ++ * drm_gpuva_destroy_locked() and/or drm_gpuva_destroy_unlocked() should be ++ * used. ++ * ++ * After the caller finished processing the returned &drm_gpuva_ops, they must ++ * be freed with &drm_gpuva_ops_free. ++ * ++ * Returns: a pointer to the &drm_gpuva_ops on success, an ERR_PTR on failure ++ */ ++struct drm_gpuva_ops * ++drm_gpuvm_sm_unmap_ops_create(struct drm_gpuvm *gpuvm, ++ u64 req_addr, u64 req_range) ++{ ++ struct drm_gpuva_ops *ops; ++ struct { ++ struct drm_gpuvm *vm; ++ struct drm_gpuva_ops *ops; ++ } args; ++ int ret; ++ ++ ops = kzalloc(sizeof(*ops), GFP_KERNEL); ++ if (unlikely(!ops)) ++ return ERR_PTR(-ENOMEM); ++ ++ INIT_LIST_HEAD(&ops->list); ++ ++ args.vm = gpuvm; ++ args.ops = ops; ++ ++ ret = __drm_gpuvm_sm_unmap(gpuvm, &gpuvm_list_ops, &args, ++ req_addr, req_range); ++ if (ret) ++ goto err_free_ops; ++ ++ return ops; ++ ++err_free_ops: ++ drm_gpuva_ops_free(gpuvm, ops); ++ return ERR_PTR(ret); ++} ++EXPORT_SYMBOL_GPL(drm_gpuvm_sm_unmap_ops_create); ++ ++/** ++ * drm_gpuvm_prefetch_ops_create() - creates the &drm_gpuva_ops to prefetch ++ * @gpuvm: the &drm_gpuvm representing the GPU VA space ++ * @addr: the start address of the range to prefetch ++ * @range: the range of the mappings to prefetch ++ * ++ * This function creates a list of operations to perform prefetching. ++ * ++ * The list can be iterated with &drm_gpuva_for_each_op and must be processed ++ * in the given order. It can contain prefetch operations. ++ * ++ * There can be an arbitrary amount of prefetch operations. ++ * ++ * After the caller finished processing the returned &drm_gpuva_ops, they must ++ * be freed with &drm_gpuva_ops_free. ++ * ++ * Returns: a pointer to the &drm_gpuva_ops on success, an ERR_PTR on failure ++ */ ++struct drm_gpuva_ops * ++drm_gpuvm_prefetch_ops_create(struct drm_gpuvm *gpuvm, ++ u64 addr, u64 range) ++{ ++ struct drm_gpuva_ops *ops; ++ struct drm_gpuva_op *op; ++ struct drm_gpuva *va; ++ u64 end = addr + range; ++ int ret; ++ ++ ops = kzalloc(sizeof(*ops), GFP_KERNEL); ++ if (!ops) ++ return ERR_PTR(-ENOMEM); ++ ++ INIT_LIST_HEAD(&ops->list); ++ ++ drm_gpuvm_for_each_va_range(va, gpuvm, addr, end) { ++ op = gpuva_op_alloc(gpuvm); ++ if (!op) { ++ ret = -ENOMEM; ++ goto err_free_ops; ++ } ++ ++ op->op = DRM_GPUVA_OP_PREFETCH; ++ op->prefetch.va = va; ++ list_add_tail(&op->entry, &ops->list); ++ } ++ ++ return ops; ++ ++err_free_ops: ++ drm_gpuva_ops_free(gpuvm, ops); ++ return ERR_PTR(ret); ++} ++EXPORT_SYMBOL_GPL(drm_gpuvm_prefetch_ops_create); ++ ++/** ++ * drm_gpuvm_gem_unmap_ops_create() - creates the &drm_gpuva_ops to unmap a GEM ++ * @gpuvm: the &drm_gpuvm representing the GPU VA space ++ * @obj: the &drm_gem_object to unmap ++ * ++ * This function creates a list of operations to perform unmapping for every ++ * GPUVA attached to a GEM. ++ * ++ * The list can be iterated with &drm_gpuva_for_each_op and consists out of an ++ * arbitrary amount of unmap operations. ++ * ++ * After the caller finished processing the returned &drm_gpuva_ops, they must ++ * be freed with &drm_gpuva_ops_free. ++ * ++ * It is the callers responsibility to protect the GEMs GPUVA list against ++ * concurrent access using the GEMs dma_resv lock. ++ * ++ * Returns: a pointer to the &drm_gpuva_ops on success, an ERR_PTR on failure ++ */ ++struct drm_gpuva_ops * ++drm_gpuvm_gem_unmap_ops_create(struct drm_gpuvm *gpuvm, ++ struct drm_gem_object *obj) ++{ ++ struct drm_gpuva_ops *ops; ++ struct drm_gpuva_op *op; ++ struct drm_gpuva *va; ++ int ret; ++ ++ drm_gem_gpuva_assert_lock_held(obj); ++ ++ ops = kzalloc(sizeof(*ops), GFP_KERNEL); ++ if (!ops) ++ return ERR_PTR(-ENOMEM); ++ ++ INIT_LIST_HEAD(&ops->list); ++ ++ drm_gem_for_each_gpuva(va, obj) { ++ op = gpuva_op_alloc(gpuvm); ++ if (!op) { ++ ret = -ENOMEM; ++ goto err_free_ops; ++ } ++ ++ op->op = DRM_GPUVA_OP_UNMAP; ++ op->unmap.va = va; ++ list_add_tail(&op->entry, &ops->list); ++ } ++ ++ return ops; ++ ++err_free_ops: ++ drm_gpuva_ops_free(gpuvm, ops); ++ return ERR_PTR(ret); ++} ++EXPORT_SYMBOL_GPL(drm_gpuvm_gem_unmap_ops_create); ++ ++/** ++ * drm_gpuva_ops_free() - free the given &drm_gpuva_ops ++ * @gpuvm: the &drm_gpuvm the ops were created for ++ * @ops: the &drm_gpuva_ops to free ++ * ++ * Frees the given &drm_gpuva_ops structure including all the ops associated ++ * with it. ++ */ ++void ++drm_gpuva_ops_free(struct drm_gpuvm *gpuvm, ++ struct drm_gpuva_ops *ops) ++{ ++ struct drm_gpuva_op *op, *next; ++ ++ drm_gpuva_for_each_op_safe(op, next, ops) { ++ list_del(&op->entry); ++ ++ if (op->op == DRM_GPUVA_OP_REMAP) { ++ kfree(op->remap.prev); ++ kfree(op->remap.next); ++ kfree(op->remap.unmap); ++ } ++ ++ gpuva_op_free(gpuvm, op); ++ } ++ ++ kfree(ops); ++} ++EXPORT_SYMBOL_GPL(drm_gpuva_ops_free); +--- a/drivers/gpu/drm/nouveau/nouveau_exec.c ++++ b/drivers/gpu/drm/nouveau/nouveau_exec.c +@@ -107,7 +107,7 @@ nouveau_exec_job_submit(struct nouveau_j + drm_exec_until_all_locked(exec) { + struct drm_gpuva *va; + +- drm_gpuva_for_each_va(va, &uvmm->umgr) { ++ drm_gpuvm_for_each_va(va, &uvmm->umgr) { + if (unlikely(va == &uvmm->umgr.kernel_alloc_node)) + continue; + +--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c ++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c +@@ -329,7 +329,7 @@ nouveau_uvma_region_create(struct nouvea + struct nouveau_uvma_region *reg; + int ret; + +- if (!drm_gpuva_interval_empty(&uvmm->umgr, addr, range)) ++ if (!drm_gpuvm_interval_empty(&uvmm->umgr, addr, range)) + return -ENOSPC; + + ret = nouveau_uvma_region_alloc(®); +@@ -384,7 +384,7 @@ nouveau_uvma_region_empty(struct nouveau + { + struct nouveau_uvmm *uvmm = reg->uvmm; + +- return drm_gpuva_interval_empty(&uvmm->umgr, ++ return drm_gpuvm_interval_empty(&uvmm->umgr, + reg->va.addr, + reg->va.range); + } +@@ -444,7 +444,7 @@ op_map_prepare_unwind(struct nouveau_uvm + static void + op_unmap_prepare_unwind(struct drm_gpuva *va) + { +- drm_gpuva_insert(va->mgr, va); ++ drm_gpuva_insert(va->vm, va); + } + + static void +@@ -1194,7 +1194,7 @@ nouveau_uvmm_bind_job_submit(struct nouv + goto unwind_continue; + } + +- op->ops = drm_gpuva_sm_unmap_ops_create(&uvmm->umgr, ++ op->ops = drm_gpuvm_sm_unmap_ops_create(&uvmm->umgr, + op->va.addr, + op->va.range); + if (IS_ERR(op->ops)) { +@@ -1240,7 +1240,7 @@ nouveau_uvmm_bind_job_submit(struct nouv + } + } + +- op->ops = drm_gpuva_sm_map_ops_create(&uvmm->umgr, ++ op->ops = drm_gpuvm_sm_map_ops_create(&uvmm->umgr, + op->va.addr, + op->va.range, + op->gem.obj, +@@ -1264,7 +1264,7 @@ nouveau_uvmm_bind_job_submit(struct nouv + break; + } + case OP_UNMAP: +- op->ops = drm_gpuva_sm_unmap_ops_create(&uvmm->umgr, ++ op->ops = drm_gpuvm_sm_unmap_ops_create(&uvmm->umgr, + op->va.addr, + op->va.range); + if (IS_ERR(op->ops)) { +@@ -1836,11 +1836,11 @@ nouveau_uvmm_init(struct nouveau_uvmm *u + uvmm->kernel_managed_addr = kernel_managed_addr; + uvmm->kernel_managed_size = kernel_managed_size; + +- drm_gpuva_manager_init(&uvmm->umgr, cli->name, +- NOUVEAU_VA_SPACE_START, +- NOUVEAU_VA_SPACE_END, +- kernel_managed_addr, kernel_managed_size, +- NULL); ++ drm_gpuvm_init(&uvmm->umgr, cli->name, ++ NOUVEAU_VA_SPACE_START, ++ NOUVEAU_VA_SPACE_END, ++ kernel_managed_addr, kernel_managed_size, ++ NULL); + + ret = nvif_vmm_ctor(&cli->mmu, "uvmm", + cli->vmm.vmm.object.oclass, RAW, +@@ -1855,7 +1855,7 @@ nouveau_uvmm_init(struct nouveau_uvmm *u + return 0; + + out_free_gpuva_mgr: +- drm_gpuva_manager_destroy(&uvmm->umgr); ++ drm_gpuvm_destroy(&uvmm->umgr); + out_unlock: + mutex_unlock(&cli->mutex); + return ret; +@@ -1877,7 +1877,7 @@ nouveau_uvmm_fini(struct nouveau_uvmm *u + wait_event(entity->job.wq, list_empty(&entity->job.list.head)); + + nouveau_uvmm_lock(uvmm); +- drm_gpuva_for_each_va_safe(va, next, &uvmm->umgr) { ++ drm_gpuvm_for_each_va_safe(va, next, &uvmm->umgr) { + struct nouveau_uvma *uvma = uvma_from_va(va); + struct drm_gem_object *obj = va->gem.obj; + +@@ -1910,7 +1910,7 @@ nouveau_uvmm_fini(struct nouveau_uvmm *u + + mutex_lock(&cli->mutex); + nouveau_vmm_fini(&uvmm->vmm); +- drm_gpuva_manager_destroy(&uvmm->umgr); ++ drm_gpuvm_destroy(&uvmm->umgr); + mutex_unlock(&cli->mutex); + + dma_resv_fini(&uvmm->resv); +--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.h ++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.h +@@ -3,13 +3,13 @@ + #ifndef __NOUVEAU_UVMM_H__ + #define __NOUVEAU_UVMM_H__ + +-#include ++#include + + #include "nouveau_drv.h" + + struct nouveau_uvmm { + struct nouveau_vmm vmm; +- struct drm_gpuva_manager umgr; ++ struct drm_gpuvm umgr; + struct maple_tree region_mt; + struct mutex mutex; + struct dma_resv resv; +@@ -44,7 +44,7 @@ struct nouveau_uvma { + #define uvmm_from_mgr(x) container_of((x), struct nouveau_uvmm, umgr) + #define uvma_from_va(x) container_of((x), struct nouveau_uvma, va) + +-#define to_uvmm(x) uvmm_from_mgr((x)->va.mgr) ++#define to_uvmm(x) uvmm_from_mgr((x)->va.vm) + + struct nouveau_uvmm_bind_job { + struct nouveau_job base; +--- a/include/drm/drm_debugfs.h ++++ b/include/drm/drm_debugfs.h +@@ -35,7 +35,7 @@ + #include + #include + +-#include ++#include + + /** + * DRM_DEBUGFS_GPUVA_INFO - &drm_info_list entry to dump a GPU VA space +@@ -152,7 +152,7 @@ void drm_debugfs_add_files(struct drm_de + const struct drm_debugfs_info *files, int count); + + int drm_debugfs_gpuva_info(struct seq_file *m, +- struct drm_gpuva_manager *mgr); ++ struct drm_gpuvm *gpuvm); + #else + static inline void drm_debugfs_create_files(const struct drm_info_list *files, + int count, struct dentry *root, +@@ -176,7 +176,7 @@ static inline void drm_debugfs_add_files + {} + + static inline int drm_debugfs_gpuva_info(struct seq_file *m, +- struct drm_gpuva_manager *mgr) ++ struct drm_gpuvm *gpuvm) + { + return 0; + } +--- a/include/drm/drm_gpuva_mgr.h ++++ /dev/null +@@ -1,706 +0,0 @@ +-/* SPDX-License-Identifier: GPL-2.0-only */ +- +-#ifndef __DRM_GPUVA_MGR_H__ +-#define __DRM_GPUVA_MGR_H__ +- +-/* +- * Copyright (c) 2022 Red Hat. +- * +- * Permission is hereby granted, free of charge, to any person obtaining a +- * copy of this software and associated documentation files (the "Software"), +- * to deal in the Software without restriction, including without limitation +- * the rights to use, copy, modify, merge, publish, distribute, sublicense, +- * and/or sell copies of the Software, and to permit persons to whom the +- * Software is furnished to do so, subject to the following conditions: +- * +- * The above copyright notice and this permission notice shall be included in +- * all copies or substantial portions of the Software. +- * +- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +- * OTHER DEALINGS IN THE SOFTWARE. +- */ +- +-#include +-#include +-#include +- +-#include +- +-struct drm_gpuva_manager; +-struct drm_gpuva_fn_ops; +- +-/** +- * enum drm_gpuva_flags - flags for struct drm_gpuva +- */ +-enum drm_gpuva_flags { +- /** +- * @DRM_GPUVA_INVALIDATED: +- * +- * Flag indicating that the &drm_gpuva's backing GEM is invalidated. +- */ +- DRM_GPUVA_INVALIDATED = (1 << 0), +- +- /** +- * @DRM_GPUVA_SPARSE: +- * +- * Flag indicating that the &drm_gpuva is a sparse mapping. +- */ +- DRM_GPUVA_SPARSE = (1 << 1), +- +- /** +- * @DRM_GPUVA_USERBITS: user defined bits +- */ +- DRM_GPUVA_USERBITS = (1 << 2), +-}; +- +-/** +- * struct drm_gpuva - structure to track a GPU VA mapping +- * +- * This structure represents a GPU VA mapping and is associated with a +- * &drm_gpuva_manager. +- * +- * Typically, this structure is embedded in bigger driver structures. +- */ +-struct drm_gpuva { +- /** +- * @mgr: the &drm_gpuva_manager this object is associated with +- */ +- struct drm_gpuva_manager *mgr; +- +- /** +- * @flags: the &drm_gpuva_flags for this mapping +- */ +- enum drm_gpuva_flags flags; +- +- /** +- * @va: structure containing the address and range of the &drm_gpuva +- */ +- struct { +- /** +- * @addr: the start address +- */ +- u64 addr; +- +- /* +- * @range: the range +- */ +- u64 range; +- } va; +- +- /** +- * @gem: structure containing the &drm_gem_object and it's offset +- */ +- struct { +- /** +- * @offset: the offset within the &drm_gem_object +- */ +- u64 offset; +- +- /** +- * @obj: the mapped &drm_gem_object +- */ +- struct drm_gem_object *obj; +- +- /** +- * @entry: the &list_head to attach this object to a &drm_gem_object +- */ +- struct list_head entry; +- } gem; +- +- /** +- * @rb: structure containing data to store &drm_gpuvas in a rb-tree +- */ +- struct { +- /** +- * @rb: the rb-tree node +- */ +- struct rb_node node; +- +- /** +- * @entry: The &list_head to additionally connect &drm_gpuvas +- * in the same order they appear in the interval tree. This is +- * useful to keep iterating &drm_gpuvas from a start node found +- * through the rb-tree while doing modifications on the rb-tree +- * itself. +- */ +- struct list_head entry; +- +- /** +- * @__subtree_last: needed by the interval tree, holding last-in-subtree +- */ +- u64 __subtree_last; +- } rb; +-}; +- +-int drm_gpuva_insert(struct drm_gpuva_manager *mgr, struct drm_gpuva *va); +-void drm_gpuva_remove(struct drm_gpuva *va); +- +-void drm_gpuva_link(struct drm_gpuva *va); +-void drm_gpuva_unlink(struct drm_gpuva *va); +- +-struct drm_gpuva *drm_gpuva_find(struct drm_gpuva_manager *mgr, +- u64 addr, u64 range); +-struct drm_gpuva *drm_gpuva_find_first(struct drm_gpuva_manager *mgr, +- u64 addr, u64 range); +-struct drm_gpuva *drm_gpuva_find_prev(struct drm_gpuva_manager *mgr, u64 start); +-struct drm_gpuva *drm_gpuva_find_next(struct drm_gpuva_manager *mgr, u64 end); +- +-bool drm_gpuva_interval_empty(struct drm_gpuva_manager *mgr, u64 addr, u64 range); +- +-static inline void drm_gpuva_init(struct drm_gpuva *va, u64 addr, u64 range, +- struct drm_gem_object *obj, u64 offset) +-{ +- va->va.addr = addr; +- va->va.range = range; +- va->gem.obj = obj; +- va->gem.offset = offset; +-} +- +-/** +- * drm_gpuva_invalidate() - sets whether the backing GEM of this &drm_gpuva is +- * invalidated +- * @va: the &drm_gpuva to set the invalidate flag for +- * @invalidate: indicates whether the &drm_gpuva is invalidated +- */ +-static inline void drm_gpuva_invalidate(struct drm_gpuva *va, bool invalidate) +-{ +- if (invalidate) +- va->flags |= DRM_GPUVA_INVALIDATED; +- else +- va->flags &= ~DRM_GPUVA_INVALIDATED; +-} +- +-/** +- * drm_gpuva_invalidated() - indicates whether the backing BO of this &drm_gpuva +- * is invalidated +- * @va: the &drm_gpuva to check +- */ +-static inline bool drm_gpuva_invalidated(struct drm_gpuva *va) +-{ +- return va->flags & DRM_GPUVA_INVALIDATED; +-} +- +-/** +- * struct drm_gpuva_manager - DRM GPU VA Manager +- * +- * The DRM GPU VA Manager keeps track of a GPU's virtual address space by using +- * &maple_tree structures. Typically, this structure is embedded in bigger +- * driver structures. +- * +- * Drivers can pass addresses and ranges in an arbitrary unit, e.g. bytes or +- * pages. +- * +- * There should be one manager instance per GPU virtual address space. +- */ +-struct drm_gpuva_manager { +- /** +- * @name: the name of the DRM GPU VA space +- */ +- const char *name; +- +- /** +- * @mm_start: start of the VA space +- */ +- u64 mm_start; +- +- /** +- * @mm_range: length of the VA space +- */ +- u64 mm_range; +- +- /** +- * @rb: structures to track &drm_gpuva entries +- */ +- struct { +- /** +- * @tree: the rb-tree to track GPU VA mappings +- */ +- struct rb_root_cached tree; +- +- /** +- * @list: the &list_head to track GPU VA mappings +- */ +- struct list_head list; +- } rb; +- +- /** +- * @kernel_alloc_node: +- * +- * &drm_gpuva representing the address space cutout reserved for +- * the kernel +- */ +- struct drm_gpuva kernel_alloc_node; +- +- /** +- * @ops: &drm_gpuva_fn_ops providing the split/merge steps to drivers +- */ +- const struct drm_gpuva_fn_ops *ops; +-}; +- +-void drm_gpuva_manager_init(struct drm_gpuva_manager *mgr, +- const char *name, +- u64 start_offset, u64 range, +- u64 reserve_offset, u64 reserve_range, +- const struct drm_gpuva_fn_ops *ops); +-void drm_gpuva_manager_destroy(struct drm_gpuva_manager *mgr); +- +-static inline struct drm_gpuva * +-__drm_gpuva_next(struct drm_gpuva *va) +-{ +- if (va && !list_is_last(&va->rb.entry, &va->mgr->rb.list)) +- return list_next_entry(va, rb.entry); +- +- return NULL; +-} +- +-/** +- * drm_gpuva_for_each_va_range() - iterate over a range of &drm_gpuvas +- * @va__: &drm_gpuva structure to assign to in each iteration step +- * @mgr__: &drm_gpuva_manager to walk over +- * @start__: starting offset, the first gpuva will overlap this +- * @end__: ending offset, the last gpuva will start before this (but may +- * overlap) +- * +- * This iterator walks over all &drm_gpuvas in the &drm_gpuva_manager that lie +- * between @start__ and @end__. It is implemented similarly to list_for_each(), +- * but is using the &drm_gpuva_manager's internal interval tree to accelerate +- * the search for the starting &drm_gpuva, and hence isn't safe against removal +- * of elements. It assumes that @end__ is within (or is the upper limit of) the +- * &drm_gpuva_manager. This iterator does not skip over the &drm_gpuva_manager's +- * @kernel_alloc_node. +- */ +-#define drm_gpuva_for_each_va_range(va__, mgr__, start__, end__) \ +- for (va__ = drm_gpuva_find_first((mgr__), (start__), (end__) - (start__)); \ +- va__ && (va__->va.addr < (end__)); \ +- va__ = __drm_gpuva_next(va__)) +- +-/** +- * drm_gpuva_for_each_va_range_safe() - safely iterate over a range of +- * &drm_gpuvas +- * @va__: &drm_gpuva to assign to in each iteration step +- * @next__: another &drm_gpuva to use as temporary storage +- * @mgr__: &drm_gpuva_manager to walk over +- * @start__: starting offset, the first gpuva will overlap this +- * @end__: ending offset, the last gpuva will start before this (but may +- * overlap) +- * +- * This iterator walks over all &drm_gpuvas in the &drm_gpuva_manager that lie +- * between @start__ and @end__. It is implemented similarly to +- * list_for_each_safe(), but is using the &drm_gpuva_manager's internal interval +- * tree to accelerate the search for the starting &drm_gpuva, and hence is safe +- * against removal of elements. It assumes that @end__ is within (or is the +- * upper limit of) the &drm_gpuva_manager. This iterator does not skip over the +- * &drm_gpuva_manager's @kernel_alloc_node. +- */ +-#define drm_gpuva_for_each_va_range_safe(va__, next__, mgr__, start__, end__) \ +- for (va__ = drm_gpuva_find_first((mgr__), (start__), (end__) - (start__)), \ +- next__ = __drm_gpuva_next(va__); \ +- va__ && (va__->va.addr < (end__)); \ +- va__ = next__, next__ = __drm_gpuva_next(va__)) +- +-/** +- * drm_gpuva_for_each_va() - iterate over all &drm_gpuvas +- * @va__: &drm_gpuva to assign to in each iteration step +- * @mgr__: &drm_gpuva_manager to walk over +- * +- * This iterator walks over all &drm_gpuva structures associated with the given +- * &drm_gpuva_manager. +- */ +-#define drm_gpuva_for_each_va(va__, mgr__) \ +- list_for_each_entry(va__, &(mgr__)->rb.list, rb.entry) +- +-/** +- * drm_gpuva_for_each_va_safe() - safely iterate over all &drm_gpuvas +- * @va__: &drm_gpuva to assign to in each iteration step +- * @next__: another &drm_gpuva to use as temporary storage +- * @mgr__: &drm_gpuva_manager to walk over +- * +- * This iterator walks over all &drm_gpuva structures associated with the given +- * &drm_gpuva_manager. It is implemented with list_for_each_entry_safe(), and +- * hence safe against the removal of elements. +- */ +-#define drm_gpuva_for_each_va_safe(va__, next__, mgr__) \ +- list_for_each_entry_safe(va__, next__, &(mgr__)->rb.list, rb.entry) +- +-/** +- * enum drm_gpuva_op_type - GPU VA operation type +- * +- * Operations to alter the GPU VA mappings tracked by the &drm_gpuva_manager. +- */ +-enum drm_gpuva_op_type { +- /** +- * @DRM_GPUVA_OP_MAP: the map op type +- */ +- DRM_GPUVA_OP_MAP, +- +- /** +- * @DRM_GPUVA_OP_REMAP: the remap op type +- */ +- DRM_GPUVA_OP_REMAP, +- +- /** +- * @DRM_GPUVA_OP_UNMAP: the unmap op type +- */ +- DRM_GPUVA_OP_UNMAP, +- +- /** +- * @DRM_GPUVA_OP_PREFETCH: the prefetch op type +- */ +- DRM_GPUVA_OP_PREFETCH, +-}; +- +-/** +- * struct drm_gpuva_op_map - GPU VA map operation +- * +- * This structure represents a single map operation generated by the +- * DRM GPU VA manager. +- */ +-struct drm_gpuva_op_map { +- /** +- * @va: structure containing address and range of a map +- * operation +- */ +- struct { +- /** +- * @addr: the base address of the new mapping +- */ +- u64 addr; +- +- /** +- * @range: the range of the new mapping +- */ +- u64 range; +- } va; +- +- /** +- * @gem: structure containing the &drm_gem_object and it's offset +- */ +- struct { +- /** +- * @offset: the offset within the &drm_gem_object +- */ +- u64 offset; +- +- /** +- * @obj: the &drm_gem_object to map +- */ +- struct drm_gem_object *obj; +- } gem; +-}; +- +-/** +- * struct drm_gpuva_op_unmap - GPU VA unmap operation +- * +- * This structure represents a single unmap operation generated by the +- * DRM GPU VA manager. +- */ +-struct drm_gpuva_op_unmap { +- /** +- * @va: the &drm_gpuva to unmap +- */ +- struct drm_gpuva *va; +- +- /** +- * @keep: +- * +- * Indicates whether this &drm_gpuva is physically contiguous with the +- * original mapping request. +- * +- * Optionally, if &keep is set, drivers may keep the actual page table +- * mappings for this &drm_gpuva, adding the missing page table entries +- * only and update the &drm_gpuva_manager accordingly. +- */ +- bool keep; +-}; +- +-/** +- * struct drm_gpuva_op_remap - GPU VA remap operation +- * +- * This represents a single remap operation generated by the DRM GPU VA manager. +- * +- * A remap operation is generated when an existing GPU VA mmapping is split up +- * by inserting a new GPU VA mapping or by partially unmapping existent +- * mapping(s), hence it consists of a maximum of two map and one unmap +- * operation. +- * +- * The @unmap operation takes care of removing the original existing mapping. +- * @prev is used to remap the preceding part, @next the subsequent part. +- * +- * If either a new mapping's start address is aligned with the start address +- * of the old mapping or the new mapping's end address is aligned with the +- * end address of the old mapping, either @prev or @next is NULL. +- * +- * Note, the reason for a dedicated remap operation, rather than arbitrary +- * unmap and map operations, is to give drivers the chance of extracting driver +- * specific data for creating the new mappings from the unmap operations's +- * &drm_gpuva structure which typically is embedded in larger driver specific +- * structures. +- */ +-struct drm_gpuva_op_remap { +- /** +- * @prev: the preceding part of a split mapping +- */ +- struct drm_gpuva_op_map *prev; +- +- /** +- * @next: the subsequent part of a split mapping +- */ +- struct drm_gpuva_op_map *next; +- +- /** +- * @unmap: the unmap operation for the original existing mapping +- */ +- struct drm_gpuva_op_unmap *unmap; +-}; +- +-/** +- * struct drm_gpuva_op_prefetch - GPU VA prefetch operation +- * +- * This structure represents a single prefetch operation generated by the +- * DRM GPU VA manager. +- */ +-struct drm_gpuva_op_prefetch { +- /** +- * @va: the &drm_gpuva to prefetch +- */ +- struct drm_gpuva *va; +-}; +- +-/** +- * struct drm_gpuva_op - GPU VA operation +- * +- * This structure represents a single generic operation. +- * +- * The particular type of the operation is defined by @op. +- */ +-struct drm_gpuva_op { +- /** +- * @entry: +- * +- * The &list_head used to distribute instances of this struct within +- * &drm_gpuva_ops. +- */ +- struct list_head entry; +- +- /** +- * @op: the type of the operation +- */ +- enum drm_gpuva_op_type op; +- +- union { +- /** +- * @map: the map operation +- */ +- struct drm_gpuva_op_map map; +- +- /** +- * @remap: the remap operation +- */ +- struct drm_gpuva_op_remap remap; +- +- /** +- * @unmap: the unmap operation +- */ +- struct drm_gpuva_op_unmap unmap; +- +- /** +- * @prefetch: the prefetch operation +- */ +- struct drm_gpuva_op_prefetch prefetch; +- }; +-}; +- +-/** +- * struct drm_gpuva_ops - wraps a list of &drm_gpuva_op +- */ +-struct drm_gpuva_ops { +- /** +- * @list: the &list_head +- */ +- struct list_head list; +-}; +- +-/** +- * drm_gpuva_for_each_op() - iterator to walk over &drm_gpuva_ops +- * @op: &drm_gpuva_op to assign in each iteration step +- * @ops: &drm_gpuva_ops to walk +- * +- * This iterator walks over all ops within a given list of operations. +- */ +-#define drm_gpuva_for_each_op(op, ops) list_for_each_entry(op, &(ops)->list, entry) +- +-/** +- * drm_gpuva_for_each_op_safe() - iterator to safely walk over &drm_gpuva_ops +- * @op: &drm_gpuva_op to assign in each iteration step +- * @next: &next &drm_gpuva_op to store the next step +- * @ops: &drm_gpuva_ops to walk +- * +- * This iterator walks over all ops within a given list of operations. It is +- * implemented with list_for_each_safe(), so save against removal of elements. +- */ +-#define drm_gpuva_for_each_op_safe(op, next, ops) \ +- list_for_each_entry_safe(op, next, &(ops)->list, entry) +- +-/** +- * drm_gpuva_for_each_op_from_reverse() - iterate backwards from the given point +- * @op: &drm_gpuva_op to assign in each iteration step +- * @ops: &drm_gpuva_ops to walk +- * +- * This iterator walks over all ops within a given list of operations beginning +- * from the given operation in reverse order. +- */ +-#define drm_gpuva_for_each_op_from_reverse(op, ops) \ +- list_for_each_entry_from_reverse(op, &(ops)->list, entry) +- +-/** +- * drm_gpuva_first_op() - returns the first &drm_gpuva_op from &drm_gpuva_ops +- * @ops: the &drm_gpuva_ops to get the fist &drm_gpuva_op from +- */ +-#define drm_gpuva_first_op(ops) \ +- list_first_entry(&(ops)->list, struct drm_gpuva_op, entry) +- +-/** +- * drm_gpuva_last_op() - returns the last &drm_gpuva_op from &drm_gpuva_ops +- * @ops: the &drm_gpuva_ops to get the last &drm_gpuva_op from +- */ +-#define drm_gpuva_last_op(ops) \ +- list_last_entry(&(ops)->list, struct drm_gpuva_op, entry) +- +-/** +- * drm_gpuva_prev_op() - previous &drm_gpuva_op in the list +- * @op: the current &drm_gpuva_op +- */ +-#define drm_gpuva_prev_op(op) list_prev_entry(op, entry) +- +-/** +- * drm_gpuva_next_op() - next &drm_gpuva_op in the list +- * @op: the current &drm_gpuva_op +- */ +-#define drm_gpuva_next_op(op) list_next_entry(op, entry) +- +-struct drm_gpuva_ops * +-drm_gpuva_sm_map_ops_create(struct drm_gpuva_manager *mgr, +- u64 addr, u64 range, +- struct drm_gem_object *obj, u64 offset); +-struct drm_gpuva_ops * +-drm_gpuva_sm_unmap_ops_create(struct drm_gpuva_manager *mgr, +- u64 addr, u64 range); +- +-struct drm_gpuva_ops * +-drm_gpuva_prefetch_ops_create(struct drm_gpuva_manager *mgr, +- u64 addr, u64 range); +- +-struct drm_gpuva_ops * +-drm_gpuva_gem_unmap_ops_create(struct drm_gpuva_manager *mgr, +- struct drm_gem_object *obj); +- +-void drm_gpuva_ops_free(struct drm_gpuva_manager *mgr, +- struct drm_gpuva_ops *ops); +- +-static inline void drm_gpuva_init_from_op(struct drm_gpuva *va, +- struct drm_gpuva_op_map *op) +-{ +- drm_gpuva_init(va, op->va.addr, op->va.range, +- op->gem.obj, op->gem.offset); +-} +- +-/** +- * struct drm_gpuva_fn_ops - callbacks for split/merge steps +- * +- * This structure defines the callbacks used by &drm_gpuva_sm_map and +- * &drm_gpuva_sm_unmap to provide the split/merge steps for map and unmap +- * operations to drivers. +- */ +-struct drm_gpuva_fn_ops { +- /** +- * @op_alloc: called when the &drm_gpuva_manager allocates +- * a struct drm_gpuva_op +- * +- * Some drivers may want to embed struct drm_gpuva_op into driver +- * specific structures. By implementing this callback drivers can +- * allocate memory accordingly. +- * +- * This callback is optional. +- */ +- struct drm_gpuva_op *(*op_alloc)(void); +- +- /** +- * @op_free: called when the &drm_gpuva_manager frees a +- * struct drm_gpuva_op +- * +- * Some drivers may want to embed struct drm_gpuva_op into driver +- * specific structures. By implementing this callback drivers can +- * free the previously allocated memory accordingly. +- * +- * This callback is optional. +- */ +- void (*op_free)(struct drm_gpuva_op *op); +- +- /** +- * @sm_step_map: called from &drm_gpuva_sm_map to finally insert the +- * mapping once all previous steps were completed +- * +- * The &priv pointer matches the one the driver passed to +- * &drm_gpuva_sm_map or &drm_gpuva_sm_unmap, respectively. +- * +- * Can be NULL if &drm_gpuva_sm_map is used. +- */ +- int (*sm_step_map)(struct drm_gpuva_op *op, void *priv); +- +- /** +- * @sm_step_remap: called from &drm_gpuva_sm_map and +- * &drm_gpuva_sm_unmap to split up an existent mapping +- * +- * This callback is called when existent mapping needs to be split up. +- * This is the case when either a newly requested mapping overlaps or +- * is enclosed by an existent mapping or a partial unmap of an existent +- * mapping is requested. +- * +- * The &priv pointer matches the one the driver passed to +- * &drm_gpuva_sm_map or &drm_gpuva_sm_unmap, respectively. +- * +- * Can be NULL if neither &drm_gpuva_sm_map nor &drm_gpuva_sm_unmap is +- * used. +- */ +- int (*sm_step_remap)(struct drm_gpuva_op *op, void *priv); +- +- /** +- * @sm_step_unmap: called from &drm_gpuva_sm_map and +- * &drm_gpuva_sm_unmap to unmap an existent mapping +- * +- * This callback is called when existent mapping needs to be unmapped. +- * This is the case when either a newly requested mapping encloses an +- * existent mapping or an unmap of an existent mapping is requested. +- * +- * The &priv pointer matches the one the driver passed to +- * &drm_gpuva_sm_map or &drm_gpuva_sm_unmap, respectively. +- * +- * Can be NULL if neither &drm_gpuva_sm_map nor &drm_gpuva_sm_unmap is +- * used. +- */ +- int (*sm_step_unmap)(struct drm_gpuva_op *op, void *priv); +-}; +- +-int drm_gpuva_sm_map(struct drm_gpuva_manager *mgr, void *priv, +- u64 addr, u64 range, +- struct drm_gem_object *obj, u64 offset); +- +-int drm_gpuva_sm_unmap(struct drm_gpuva_manager *mgr, void *priv, +- u64 addr, u64 range); +- +-void drm_gpuva_map(struct drm_gpuva_manager *mgr, +- struct drm_gpuva *va, +- struct drm_gpuva_op_map *op); +- +-void drm_gpuva_remap(struct drm_gpuva *prev, +- struct drm_gpuva *next, +- struct drm_gpuva_op_remap *op); +- +-void drm_gpuva_unmap(struct drm_gpuva_op_unmap *op); +- +-#endif /* __DRM_GPUVA_MGR_H__ */ +--- /dev/null ++++ b/include/drm/drm_gpuvm.h +@@ -0,0 +1,705 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++ ++#ifndef __DRM_GPUVM_H__ ++#define __DRM_GPUVM_H__ ++ ++/* ++ * Copyright (c) 2022 Red Hat. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ */ ++ ++#include ++#include ++#include ++ ++#include ++ ++struct drm_gpuvm; ++struct drm_gpuvm_ops; ++ ++/** ++ * enum drm_gpuva_flags - flags for struct drm_gpuva ++ */ ++enum drm_gpuva_flags { ++ /** ++ * @DRM_GPUVA_INVALIDATED: ++ * ++ * Flag indicating that the &drm_gpuva's backing GEM is invalidated. ++ */ ++ DRM_GPUVA_INVALIDATED = (1 << 0), ++ ++ /** ++ * @DRM_GPUVA_SPARSE: ++ * ++ * Flag indicating that the &drm_gpuva is a sparse mapping. ++ */ ++ DRM_GPUVA_SPARSE = (1 << 1), ++ ++ /** ++ * @DRM_GPUVA_USERBITS: user defined bits ++ */ ++ DRM_GPUVA_USERBITS = (1 << 2), ++}; ++ ++/** ++ * struct drm_gpuva - structure to track a GPU VA mapping ++ * ++ * This structure represents a GPU VA mapping and is associated with a ++ * &drm_gpuvm. ++ * ++ * Typically, this structure is embedded in bigger driver structures. ++ */ ++struct drm_gpuva { ++ /** ++ * @vm: the &drm_gpuvm this object is associated with ++ */ ++ struct drm_gpuvm *vm; ++ ++ /** ++ * @flags: the &drm_gpuva_flags for this mapping ++ */ ++ enum drm_gpuva_flags flags; ++ ++ /** ++ * @va: structure containing the address and range of the &drm_gpuva ++ */ ++ struct { ++ /** ++ * @addr: the start address ++ */ ++ u64 addr; ++ ++ /* ++ * @range: the range ++ */ ++ u64 range; ++ } va; ++ ++ /** ++ * @gem: structure containing the &drm_gem_object and it's offset ++ */ ++ struct { ++ /** ++ * @offset: the offset within the &drm_gem_object ++ */ ++ u64 offset; ++ ++ /** ++ * @obj: the mapped &drm_gem_object ++ */ ++ struct drm_gem_object *obj; ++ ++ /** ++ * @entry: the &list_head to attach this object to a &drm_gem_object ++ */ ++ struct list_head entry; ++ } gem; ++ ++ /** ++ * @rb: structure containing data to store &drm_gpuvas in a rb-tree ++ */ ++ struct { ++ /** ++ * @rb: the rb-tree node ++ */ ++ struct rb_node node; ++ ++ /** ++ * @entry: The &list_head to additionally connect &drm_gpuvas ++ * in the same order they appear in the interval tree. This is ++ * useful to keep iterating &drm_gpuvas from a start node found ++ * through the rb-tree while doing modifications on the rb-tree ++ * itself. ++ */ ++ struct list_head entry; ++ ++ /** ++ * @__subtree_last: needed by the interval tree, holding last-in-subtree ++ */ ++ u64 __subtree_last; ++ } rb; ++}; ++ ++int drm_gpuva_insert(struct drm_gpuvm *gpuvm, struct drm_gpuva *va); ++void drm_gpuva_remove(struct drm_gpuva *va); ++ ++void drm_gpuva_link(struct drm_gpuva *va); ++void drm_gpuva_unlink(struct drm_gpuva *va); ++ ++struct drm_gpuva *drm_gpuva_find(struct drm_gpuvm *gpuvm, ++ u64 addr, u64 range); ++struct drm_gpuva *drm_gpuva_find_first(struct drm_gpuvm *gpuvm, ++ u64 addr, u64 range); ++struct drm_gpuva *drm_gpuva_find_prev(struct drm_gpuvm *gpuvm, u64 start); ++struct drm_gpuva *drm_gpuva_find_next(struct drm_gpuvm *gpuvm, u64 end); ++ ++static inline void drm_gpuva_init(struct drm_gpuva *va, u64 addr, u64 range, ++ struct drm_gem_object *obj, u64 offset) ++{ ++ va->va.addr = addr; ++ va->va.range = range; ++ va->gem.obj = obj; ++ va->gem.offset = offset; ++} ++ ++/** ++ * drm_gpuva_invalidate() - sets whether the backing GEM of this &drm_gpuva is ++ * invalidated ++ * @va: the &drm_gpuva to set the invalidate flag for ++ * @invalidate: indicates whether the &drm_gpuva is invalidated ++ */ ++static inline void drm_gpuva_invalidate(struct drm_gpuva *va, bool invalidate) ++{ ++ if (invalidate) ++ va->flags |= DRM_GPUVA_INVALIDATED; ++ else ++ va->flags &= ~DRM_GPUVA_INVALIDATED; ++} ++ ++/** ++ * drm_gpuva_invalidated() - indicates whether the backing BO of this &drm_gpuva ++ * is invalidated ++ * @va: the &drm_gpuva to check ++ */ ++static inline bool drm_gpuva_invalidated(struct drm_gpuva *va) ++{ ++ return va->flags & DRM_GPUVA_INVALIDATED; ++} ++ ++/** ++ * struct drm_gpuvm - DRM GPU VA Manager ++ * ++ * The DRM GPU VA Manager keeps track of a GPU's virtual address space by using ++ * &maple_tree structures. Typically, this structure is embedded in bigger ++ * driver structures. ++ * ++ * Drivers can pass addresses and ranges in an arbitrary unit, e.g. bytes or ++ * pages. ++ * ++ * There should be one manager instance per GPU virtual address space. ++ */ ++struct drm_gpuvm { ++ /** ++ * @name: the name of the DRM GPU VA space ++ */ ++ const char *name; ++ ++ /** ++ * @mm_start: start of the VA space ++ */ ++ u64 mm_start; ++ ++ /** ++ * @mm_range: length of the VA space ++ */ ++ u64 mm_range; ++ ++ /** ++ * @rb: structures to track &drm_gpuva entries ++ */ ++ struct { ++ /** ++ * @tree: the rb-tree to track GPU VA mappings ++ */ ++ struct rb_root_cached tree; ++ ++ /** ++ * @list: the &list_head to track GPU VA mappings ++ */ ++ struct list_head list; ++ } rb; ++ ++ /** ++ * @kernel_alloc_node: ++ * ++ * &drm_gpuva representing the address space cutout reserved for ++ * the kernel ++ */ ++ struct drm_gpuva kernel_alloc_node; ++ ++ /** ++ * @ops: &drm_gpuvm_ops providing the split/merge steps to drivers ++ */ ++ const struct drm_gpuvm_ops *ops; ++}; ++ ++void drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name, ++ u64 start_offset, u64 range, ++ u64 reserve_offset, u64 reserve_range, ++ const struct drm_gpuvm_ops *ops); ++void drm_gpuvm_destroy(struct drm_gpuvm *gpuvm); ++ ++bool drm_gpuvm_interval_empty(struct drm_gpuvm *gpuvm, u64 addr, u64 range); ++ ++static inline struct drm_gpuva * ++__drm_gpuva_next(struct drm_gpuva *va) ++{ ++ if (va && !list_is_last(&va->rb.entry, &va->vm->rb.list)) ++ return list_next_entry(va, rb.entry); ++ ++ return NULL; ++} ++ ++/** ++ * drm_gpuvm_for_each_va_range() - iterate over a range of &drm_gpuvas ++ * @va__: &drm_gpuva structure to assign to in each iteration step ++ * @gpuvm__: &drm_gpuvm to walk over ++ * @start__: starting offset, the first gpuva will overlap this ++ * @end__: ending offset, the last gpuva will start before this (but may ++ * overlap) ++ * ++ * This iterator walks over all &drm_gpuvas in the &drm_gpuvm that lie ++ * between @start__ and @end__. It is implemented similarly to list_for_each(), ++ * but is using the &drm_gpuvm's internal interval tree to accelerate ++ * the search for the starting &drm_gpuva, and hence isn't safe against removal ++ * of elements. It assumes that @end__ is within (or is the upper limit of) the ++ * &drm_gpuvm. This iterator does not skip over the &drm_gpuvm's ++ * @kernel_alloc_node. ++ */ ++#define drm_gpuvm_for_each_va_range(va__, gpuvm__, start__, end__) \ ++ for (va__ = drm_gpuva_find_first((gpuvm__), (start__), (end__) - (start__)); \ ++ va__ && (va__->va.addr < (end__)); \ ++ va__ = __drm_gpuva_next(va__)) ++ ++/** ++ * drm_gpuvm_for_each_va_range_safe() - safely iterate over a range of ++ * &drm_gpuvas ++ * @va__: &drm_gpuva to assign to in each iteration step ++ * @next__: another &drm_gpuva to use as temporary storage ++ * @gpuvm__: &drm_gpuvm to walk over ++ * @start__: starting offset, the first gpuva will overlap this ++ * @end__: ending offset, the last gpuva will start before this (but may ++ * overlap) ++ * ++ * This iterator walks over all &drm_gpuvas in the &drm_gpuvm that lie ++ * between @start__ and @end__. It is implemented similarly to ++ * list_for_each_safe(), but is using the &drm_gpuvm's internal interval ++ * tree to accelerate the search for the starting &drm_gpuva, and hence is safe ++ * against removal of elements. It assumes that @end__ is within (or is the ++ * upper limit of) the &drm_gpuvm. This iterator does not skip over the ++ * &drm_gpuvm's @kernel_alloc_node. ++ */ ++#define drm_gpuvm_for_each_va_range_safe(va__, next__, gpuvm__, start__, end__) \ ++ for (va__ = drm_gpuva_find_first((gpuvm__), (start__), (end__) - (start__)), \ ++ next__ = __drm_gpuva_next(va__); \ ++ va__ && (va__->va.addr < (end__)); \ ++ va__ = next__, next__ = __drm_gpuva_next(va__)) ++ ++/** ++ * drm_gpuvm_for_each_va() - iterate over all &drm_gpuvas ++ * @va__: &drm_gpuva to assign to in each iteration step ++ * @gpuvm__: &drm_gpuvm to walk over ++ * ++ * This iterator walks over all &drm_gpuva structures associated with the given ++ * &drm_gpuvm. ++ */ ++#define drm_gpuvm_for_each_va(va__, gpuvm__) \ ++ list_for_each_entry(va__, &(gpuvm__)->rb.list, rb.entry) ++ ++/** ++ * drm_gpuvm_for_each_va_safe() - safely iterate over all &drm_gpuvas ++ * @va__: &drm_gpuva to assign to in each iteration step ++ * @next__: another &drm_gpuva to use as temporary storage ++ * @gpuvm__: &drm_gpuvm to walk over ++ * ++ * This iterator walks over all &drm_gpuva structures associated with the given ++ * &drm_gpuvm. It is implemented with list_for_each_entry_safe(), and ++ * hence safe against the removal of elements. ++ */ ++#define drm_gpuvm_for_each_va_safe(va__, next__, gpuvm__) \ ++ list_for_each_entry_safe(va__, next__, &(gpuvm__)->rb.list, rb.entry) ++ ++/** ++ * enum drm_gpuva_op_type - GPU VA operation type ++ * ++ * Operations to alter the GPU VA mappings tracked by the &drm_gpuvm. ++ */ ++enum drm_gpuva_op_type { ++ /** ++ * @DRM_GPUVA_OP_MAP: the map op type ++ */ ++ DRM_GPUVA_OP_MAP, ++ ++ /** ++ * @DRM_GPUVA_OP_REMAP: the remap op type ++ */ ++ DRM_GPUVA_OP_REMAP, ++ ++ /** ++ * @DRM_GPUVA_OP_UNMAP: the unmap op type ++ */ ++ DRM_GPUVA_OP_UNMAP, ++ ++ /** ++ * @DRM_GPUVA_OP_PREFETCH: the prefetch op type ++ */ ++ DRM_GPUVA_OP_PREFETCH, ++}; ++ ++/** ++ * struct drm_gpuva_op_map - GPU VA map operation ++ * ++ * This structure represents a single map operation generated by the ++ * DRM GPU VA manager. ++ */ ++struct drm_gpuva_op_map { ++ /** ++ * @va: structure containing address and range of a map ++ * operation ++ */ ++ struct { ++ /** ++ * @addr: the base address of the new mapping ++ */ ++ u64 addr; ++ ++ /** ++ * @range: the range of the new mapping ++ */ ++ u64 range; ++ } va; ++ ++ /** ++ * @gem: structure containing the &drm_gem_object and it's offset ++ */ ++ struct { ++ /** ++ * @offset: the offset within the &drm_gem_object ++ */ ++ u64 offset; ++ ++ /** ++ * @obj: the &drm_gem_object to map ++ */ ++ struct drm_gem_object *obj; ++ } gem; ++}; ++ ++/** ++ * struct drm_gpuva_op_unmap - GPU VA unmap operation ++ * ++ * This structure represents a single unmap operation generated by the ++ * DRM GPU VA manager. ++ */ ++struct drm_gpuva_op_unmap { ++ /** ++ * @va: the &drm_gpuva to unmap ++ */ ++ struct drm_gpuva *va; ++ ++ /** ++ * @keep: ++ * ++ * Indicates whether this &drm_gpuva is physically contiguous with the ++ * original mapping request. ++ * ++ * Optionally, if &keep is set, drivers may keep the actual page table ++ * mappings for this &drm_gpuva, adding the missing page table entries ++ * only and update the &drm_gpuvm accordingly. ++ */ ++ bool keep; ++}; ++ ++/** ++ * struct drm_gpuva_op_remap - GPU VA remap operation ++ * ++ * This represents a single remap operation generated by the DRM GPU VA manager. ++ * ++ * A remap operation is generated when an existing GPU VA mmapping is split up ++ * by inserting a new GPU VA mapping or by partially unmapping existent ++ * mapping(s), hence it consists of a maximum of two map and one unmap ++ * operation. ++ * ++ * The @unmap operation takes care of removing the original existing mapping. ++ * @prev is used to remap the preceding part, @next the subsequent part. ++ * ++ * If either a new mapping's start address is aligned with the start address ++ * of the old mapping or the new mapping's end address is aligned with the ++ * end address of the old mapping, either @prev or @next is NULL. ++ * ++ * Note, the reason for a dedicated remap operation, rather than arbitrary ++ * unmap and map operations, is to give drivers the chance of extracting driver ++ * specific data for creating the new mappings from the unmap operations's ++ * &drm_gpuva structure which typically is embedded in larger driver specific ++ * structures. ++ */ ++struct drm_gpuva_op_remap { ++ /** ++ * @prev: the preceding part of a split mapping ++ */ ++ struct drm_gpuva_op_map *prev; ++ ++ /** ++ * @next: the subsequent part of a split mapping ++ */ ++ struct drm_gpuva_op_map *next; ++ ++ /** ++ * @unmap: the unmap operation for the original existing mapping ++ */ ++ struct drm_gpuva_op_unmap *unmap; ++}; ++ ++/** ++ * struct drm_gpuva_op_prefetch - GPU VA prefetch operation ++ * ++ * This structure represents a single prefetch operation generated by the ++ * DRM GPU VA manager. ++ */ ++struct drm_gpuva_op_prefetch { ++ /** ++ * @va: the &drm_gpuva to prefetch ++ */ ++ struct drm_gpuva *va; ++}; ++ ++/** ++ * struct drm_gpuva_op - GPU VA operation ++ * ++ * This structure represents a single generic operation. ++ * ++ * The particular type of the operation is defined by @op. ++ */ ++struct drm_gpuva_op { ++ /** ++ * @entry: ++ * ++ * The &list_head used to distribute instances of this struct within ++ * &drm_gpuva_ops. ++ */ ++ struct list_head entry; ++ ++ /** ++ * @op: the type of the operation ++ */ ++ enum drm_gpuva_op_type op; ++ ++ union { ++ /** ++ * @map: the map operation ++ */ ++ struct drm_gpuva_op_map map; ++ ++ /** ++ * @remap: the remap operation ++ */ ++ struct drm_gpuva_op_remap remap; ++ ++ /** ++ * @unmap: the unmap operation ++ */ ++ struct drm_gpuva_op_unmap unmap; ++ ++ /** ++ * @prefetch: the prefetch operation ++ */ ++ struct drm_gpuva_op_prefetch prefetch; ++ }; ++}; ++ ++/** ++ * struct drm_gpuva_ops - wraps a list of &drm_gpuva_op ++ */ ++struct drm_gpuva_ops { ++ /** ++ * @list: the &list_head ++ */ ++ struct list_head list; ++}; ++ ++/** ++ * drm_gpuva_for_each_op() - iterator to walk over &drm_gpuva_ops ++ * @op: &drm_gpuva_op to assign in each iteration step ++ * @ops: &drm_gpuva_ops to walk ++ * ++ * This iterator walks over all ops within a given list of operations. ++ */ ++#define drm_gpuva_for_each_op(op, ops) list_for_each_entry(op, &(ops)->list, entry) ++ ++/** ++ * drm_gpuva_for_each_op_safe() - iterator to safely walk over &drm_gpuva_ops ++ * @op: &drm_gpuva_op to assign in each iteration step ++ * @next: &next &drm_gpuva_op to store the next step ++ * @ops: &drm_gpuva_ops to walk ++ * ++ * This iterator walks over all ops within a given list of operations. It is ++ * implemented with list_for_each_safe(), so save against removal of elements. ++ */ ++#define drm_gpuva_for_each_op_safe(op, next, ops) \ ++ list_for_each_entry_safe(op, next, &(ops)->list, entry) ++ ++/** ++ * drm_gpuva_for_each_op_from_reverse() - iterate backwards from the given point ++ * @op: &drm_gpuva_op to assign in each iteration step ++ * @ops: &drm_gpuva_ops to walk ++ * ++ * This iterator walks over all ops within a given list of operations beginning ++ * from the given operation in reverse order. ++ */ ++#define drm_gpuva_for_each_op_from_reverse(op, ops) \ ++ list_for_each_entry_from_reverse(op, &(ops)->list, entry) ++ ++/** ++ * drm_gpuva_first_op() - returns the first &drm_gpuva_op from &drm_gpuva_ops ++ * @ops: the &drm_gpuva_ops to get the fist &drm_gpuva_op from ++ */ ++#define drm_gpuva_first_op(ops) \ ++ list_first_entry(&(ops)->list, struct drm_gpuva_op, entry) ++ ++/** ++ * drm_gpuva_last_op() - returns the last &drm_gpuva_op from &drm_gpuva_ops ++ * @ops: the &drm_gpuva_ops to get the last &drm_gpuva_op from ++ */ ++#define drm_gpuva_last_op(ops) \ ++ list_last_entry(&(ops)->list, struct drm_gpuva_op, entry) ++ ++/** ++ * drm_gpuva_prev_op() - previous &drm_gpuva_op in the list ++ * @op: the current &drm_gpuva_op ++ */ ++#define drm_gpuva_prev_op(op) list_prev_entry(op, entry) ++ ++/** ++ * drm_gpuva_next_op() - next &drm_gpuva_op in the list ++ * @op: the current &drm_gpuva_op ++ */ ++#define drm_gpuva_next_op(op) list_next_entry(op, entry) ++ ++struct drm_gpuva_ops * ++drm_gpuvm_sm_map_ops_create(struct drm_gpuvm *gpuvm, ++ u64 addr, u64 range, ++ struct drm_gem_object *obj, u64 offset); ++struct drm_gpuva_ops * ++drm_gpuvm_sm_unmap_ops_create(struct drm_gpuvm *gpuvm, ++ u64 addr, u64 range); ++ ++struct drm_gpuva_ops * ++drm_gpuvm_prefetch_ops_create(struct drm_gpuvm *gpuvm, ++ u64 addr, u64 range); ++ ++struct drm_gpuva_ops * ++drm_gpuvm_gem_unmap_ops_create(struct drm_gpuvm *gpuvm, ++ struct drm_gem_object *obj); ++ ++void drm_gpuva_ops_free(struct drm_gpuvm *gpuvm, ++ struct drm_gpuva_ops *ops); ++ ++static inline void drm_gpuva_init_from_op(struct drm_gpuva *va, ++ struct drm_gpuva_op_map *op) ++{ ++ drm_gpuva_init(va, op->va.addr, op->va.range, ++ op->gem.obj, op->gem.offset); ++} ++ ++/** ++ * struct drm_gpuvm_ops - callbacks for split/merge steps ++ * ++ * This structure defines the callbacks used by &drm_gpuvm_sm_map and ++ * &drm_gpuvm_sm_unmap to provide the split/merge steps for map and unmap ++ * operations to drivers. ++ */ ++struct drm_gpuvm_ops { ++ /** ++ * @op_alloc: called when the &drm_gpuvm allocates ++ * a struct drm_gpuva_op ++ * ++ * Some drivers may want to embed struct drm_gpuva_op into driver ++ * specific structures. By implementing this callback drivers can ++ * allocate memory accordingly. ++ * ++ * This callback is optional. ++ */ ++ struct drm_gpuva_op *(*op_alloc)(void); ++ ++ /** ++ * @op_free: called when the &drm_gpuvm frees a ++ * struct drm_gpuva_op ++ * ++ * Some drivers may want to embed struct drm_gpuva_op into driver ++ * specific structures. By implementing this callback drivers can ++ * free the previously allocated memory accordingly. ++ * ++ * This callback is optional. ++ */ ++ void (*op_free)(struct drm_gpuva_op *op); ++ ++ /** ++ * @sm_step_map: called from &drm_gpuvm_sm_map to finally insert the ++ * mapping once all previous steps were completed ++ * ++ * The &priv pointer matches the one the driver passed to ++ * &drm_gpuvm_sm_map or &drm_gpuvm_sm_unmap, respectively. ++ * ++ * Can be NULL if &drm_gpuvm_sm_map is used. ++ */ ++ int (*sm_step_map)(struct drm_gpuva_op *op, void *priv); ++ ++ /** ++ * @sm_step_remap: called from &drm_gpuvm_sm_map and ++ * &drm_gpuvm_sm_unmap to split up an existent mapping ++ * ++ * This callback is called when existent mapping needs to be split up. ++ * This is the case when either a newly requested mapping overlaps or ++ * is enclosed by an existent mapping or a partial unmap of an existent ++ * mapping is requested. ++ * ++ * The &priv pointer matches the one the driver passed to ++ * &drm_gpuvm_sm_map or &drm_gpuvm_sm_unmap, respectively. ++ * ++ * Can be NULL if neither &drm_gpuvm_sm_map nor &drm_gpuvm_sm_unmap is ++ * used. ++ */ ++ int (*sm_step_remap)(struct drm_gpuva_op *op, void *priv); ++ ++ /** ++ * @sm_step_unmap: called from &drm_gpuvm_sm_map and ++ * &drm_gpuvm_sm_unmap to unmap an existent mapping ++ * ++ * This callback is called when existent mapping needs to be unmapped. ++ * This is the case when either a newly requested mapping encloses an ++ * existent mapping or an unmap of an existent mapping is requested. ++ * ++ * The &priv pointer matches the one the driver passed to ++ * &drm_gpuvm_sm_map or &drm_gpuvm_sm_unmap, respectively. ++ * ++ * Can be NULL if neither &drm_gpuvm_sm_map nor &drm_gpuvm_sm_unmap is ++ * used. ++ */ ++ int (*sm_step_unmap)(struct drm_gpuva_op *op, void *priv); ++}; ++ ++int drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, void *priv, ++ u64 addr, u64 range, ++ struct drm_gem_object *obj, u64 offset); ++ ++int drm_gpuvm_sm_unmap(struct drm_gpuvm *gpuvm, void *priv, ++ u64 addr, u64 range); ++ ++void drm_gpuva_map(struct drm_gpuvm *gpuvm, ++ struct drm_gpuva *va, ++ struct drm_gpuva_op_map *op); ++ ++void drm_gpuva_remap(struct drm_gpuva *prev, ++ struct drm_gpuva *next, ++ struct drm_gpuva_op_remap *op); ++ ++void drm_gpuva_unmap(struct drm_gpuva_op_unmap *op); ++ ++#endif /* __DRM_GPUVM_H__ */ diff --git a/patches-6.6/034-02-v6.7-drm-gpuvm-allow-building-as-module.patch b/patches-6.6/034-02-v6.7-drm-gpuvm-allow-building-as-module.patch new file mode 100644 index 0000000..6dba2d3 --- /dev/null +++ b/patches-6.6/034-02-v6.7-drm-gpuvm-allow-building-as-module.patch @@ -0,0 +1,78 @@ +From fe7acaa727e135621c062caa2d6d3ad4ad0b0185 Mon Sep 17 00:00:00 2001 +From: Danilo Krummrich +Date: Wed, 20 Sep 2023 16:42:35 +0200 +Subject: [PATCH] drm/gpuvm: allow building as module +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Currently, the DRM GPUVM does not have any core dependencies preventing +a module build. + +Also, new features from subsequent patches require helpers (namely +drm_exec) which can be built as module. + +Reviewed-by: Christian König +Reviewed-by: Dave Airlie +Signed-off-by: Danilo Krummrich +Link: https://patchwork.freedesktop.org/patch/msgid/20230920144343.64830-3-dakr@redhat.com +--- + drivers/gpu/drm/Kconfig | 7 +++++++ + drivers/gpu/drm/Makefile | 2 +- + drivers/gpu/drm/drm_gpuvm.c | 3 +++ + drivers/gpu/drm/nouveau/Kconfig | 1 + + 4 files changed, 12 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/Kconfig ++++ b/drivers/gpu/drm/Kconfig +@@ -217,6 +217,13 @@ config DRM_EXEC + help + Execution context for command submissions + ++config DRM_GPUVM ++ tristate ++ depends on DRM ++ help ++ GPU-VM representation providing helpers to manage a GPUs virtual ++ address space ++ + config DRM_BUDDY + tristate + depends on DRM +--- a/drivers/gpu/drm/Makefile ++++ b/drivers/gpu/drm/Makefile +@@ -45,7 +45,6 @@ drm-y := \ + drm_vblank.o \ + drm_vblank_work.o \ + drm_vma_manager.o \ +- drm_gpuvm.o \ + drm_writeback.o + drm-$(CONFIG_DRM_LEGACY) += \ + drm_agpsupport.o \ +@@ -81,6 +80,7 @@ obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRK + # + # + obj-$(CONFIG_DRM_EXEC) += drm_exec.o ++obj-$(CONFIG_DRM_GPUVM) += drm_gpuvm.o + + obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o + +--- a/drivers/gpu/drm/drm_gpuvm.c ++++ b/drivers/gpu/drm/drm_gpuvm.c +@@ -1721,3 +1721,6 @@ drm_gpuva_ops_free(struct drm_gpuvm *gpu + kfree(ops); + } + EXPORT_SYMBOL_GPL(drm_gpuva_ops_free); ++ ++MODULE_DESCRIPTION("DRM GPUVM"); ++MODULE_LICENSE("GPL"); +--- a/drivers/gpu/drm/nouveau/Kconfig ++++ b/drivers/gpu/drm/nouveau/Kconfig +@@ -11,6 +11,7 @@ config DRM_NOUVEAU + select DRM_TTM + select DRM_TTM_HELPER + select DRM_EXEC ++ select DRM_GPUVM + select DRM_SCHED + select I2C + select I2C_ALGOBIT diff --git a/patches-6.6/034-03-v6.7-drm-nouveau-uvmm-rename-umgr-to-base-.patch b/patches-6.6/034-03-v6.7-drm-nouveau-uvmm-rename-umgr-to-base-.patch new file mode 100644 index 0000000..8576238 --- /dev/null +++ b/patches-6.6/034-03-v6.7-drm-nouveau-uvmm-rename-umgr-to-base-.patch @@ -0,0 +1,208 @@ +From 78f54469b871db5ba8ea49abd4e5994e97bd525b Mon Sep 17 00:00:00 2001 +From: Danilo Krummrich +Date: Wed, 20 Sep 2023 16:42:36 +0200 +Subject: [PATCH] drm/nouveau: uvmm: rename 'umgr' to 'base' + +Rename struct drm_gpuvm within struct nouveau_uvmm from 'umgr' to base. + +Reviewed-by: Dave Airlie +Signed-off-by: Danilo Krummrich +Link: https://patchwork.freedesktop.org/patch/msgid/20230920144343.64830-4-dakr@redhat.com +--- + drivers/gpu/drm/nouveau/nouveau_debugfs.c | 2 +- + drivers/gpu/drm/nouveau/nouveau_exec.c | 4 +-- + drivers/gpu/drm/nouveau/nouveau_uvmm.c | 32 +++++++++++------------ + drivers/gpu/drm/nouveau/nouveau_uvmm.h | 6 ++--- + 4 files changed, 22 insertions(+), 22 deletions(-) + +--- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c ++++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c +@@ -231,7 +231,7 @@ nouveau_debugfs_gpuva(struct seq_file *m + continue; + + nouveau_uvmm_lock(uvmm); +- drm_debugfs_gpuva_info(m, &uvmm->umgr); ++ drm_debugfs_gpuva_info(m, &uvmm->base); + seq_puts(m, "\n"); + nouveau_debugfs_gpuva_regions(m, uvmm); + nouveau_uvmm_unlock(uvmm); +--- a/drivers/gpu/drm/nouveau/nouveau_exec.c ++++ b/drivers/gpu/drm/nouveau/nouveau_exec.c +@@ -107,8 +107,8 @@ nouveau_exec_job_submit(struct nouveau_j + drm_exec_until_all_locked(exec) { + struct drm_gpuva *va; + +- drm_gpuvm_for_each_va(va, &uvmm->umgr) { +- if (unlikely(va == &uvmm->umgr.kernel_alloc_node)) ++ drm_gpuvm_for_each_va(va, &uvmm->base) { ++ if (unlikely(va == &uvmm->base.kernel_alloc_node)) + continue; + + ret = drm_exec_prepare_obj(exec, va->gem.obj, 1); +--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c ++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c +@@ -329,7 +329,7 @@ nouveau_uvma_region_create(struct nouvea + struct nouveau_uvma_region *reg; + int ret; + +- if (!drm_gpuvm_interval_empty(&uvmm->umgr, addr, range)) ++ if (!drm_gpuvm_interval_empty(&uvmm->base, addr, range)) + return -ENOSPC; + + ret = nouveau_uvma_region_alloc(®); +@@ -384,7 +384,7 @@ nouveau_uvma_region_empty(struct nouveau + { + struct nouveau_uvmm *uvmm = reg->uvmm; + +- return drm_gpuvm_interval_empty(&uvmm->umgr, ++ return drm_gpuvm_interval_empty(&uvmm->base, + reg->va.addr, + reg->va.range); + } +@@ -589,7 +589,7 @@ op_map_prepare(struct nouveau_uvmm *uvmm + uvma->region = args->region; + uvma->kind = args->kind; + +- drm_gpuva_map(&uvmm->umgr, &uvma->va, op); ++ drm_gpuva_map(&uvmm->base, &uvma->va, op); + + /* Keep a reference until this uvma is destroyed. */ + nouveau_uvma_gem_get(uvma); +@@ -1194,7 +1194,7 @@ nouveau_uvmm_bind_job_submit(struct nouv + goto unwind_continue; + } + +- op->ops = drm_gpuvm_sm_unmap_ops_create(&uvmm->umgr, ++ op->ops = drm_gpuvm_sm_unmap_ops_create(&uvmm->base, + op->va.addr, + op->va.range); + if (IS_ERR(op->ops)) { +@@ -1205,7 +1205,7 @@ nouveau_uvmm_bind_job_submit(struct nouv + ret = nouveau_uvmm_sm_unmap_prepare(uvmm, &op->new, + op->ops); + if (ret) { +- drm_gpuva_ops_free(&uvmm->umgr, op->ops); ++ drm_gpuva_ops_free(&uvmm->base, op->ops); + op->ops = NULL; + op->reg = NULL; + goto unwind_continue; +@@ -1240,7 +1240,7 @@ nouveau_uvmm_bind_job_submit(struct nouv + } + } + +- op->ops = drm_gpuvm_sm_map_ops_create(&uvmm->umgr, ++ op->ops = drm_gpuvm_sm_map_ops_create(&uvmm->base, + op->va.addr, + op->va.range, + op->gem.obj, +@@ -1256,7 +1256,7 @@ nouveau_uvmm_bind_job_submit(struct nouv + op->va.range, + op->flags & 0xff); + if (ret) { +- drm_gpuva_ops_free(&uvmm->umgr, op->ops); ++ drm_gpuva_ops_free(&uvmm->base, op->ops); + op->ops = NULL; + goto unwind_continue; + } +@@ -1264,7 +1264,7 @@ nouveau_uvmm_bind_job_submit(struct nouv + break; + } + case OP_UNMAP: +- op->ops = drm_gpuvm_sm_unmap_ops_create(&uvmm->umgr, ++ op->ops = drm_gpuvm_sm_unmap_ops_create(&uvmm->base, + op->va.addr, + op->va.range); + if (IS_ERR(op->ops)) { +@@ -1275,7 +1275,7 @@ nouveau_uvmm_bind_job_submit(struct nouv + ret = nouveau_uvmm_sm_unmap_prepare(uvmm, &op->new, + op->ops); + if (ret) { +- drm_gpuva_ops_free(&uvmm->umgr, op->ops); ++ drm_gpuva_ops_free(&uvmm->base, op->ops); + op->ops = NULL; + goto unwind_continue; + } +@@ -1404,7 +1404,7 @@ unwind: + break; + } + +- drm_gpuva_ops_free(&uvmm->umgr, op->ops); ++ drm_gpuva_ops_free(&uvmm->base, op->ops); + op->ops = NULL; + op->reg = NULL; + } +@@ -1509,7 +1509,7 @@ nouveau_uvmm_bind_job_free_work_fn(struc + } + + if (!IS_ERR_OR_NULL(op->ops)) +- drm_gpuva_ops_free(&uvmm->umgr, op->ops); ++ drm_gpuva_ops_free(&uvmm->base, op->ops); + + if (obj) + drm_gem_object_put(obj); +@@ -1836,7 +1836,7 @@ nouveau_uvmm_init(struct nouveau_uvmm *u + uvmm->kernel_managed_addr = kernel_managed_addr; + uvmm->kernel_managed_size = kernel_managed_size; + +- drm_gpuvm_init(&uvmm->umgr, cli->name, ++ drm_gpuvm_init(&uvmm->base, cli->name, + NOUVEAU_VA_SPACE_START, + NOUVEAU_VA_SPACE_END, + kernel_managed_addr, kernel_managed_size, +@@ -1855,7 +1855,7 @@ nouveau_uvmm_init(struct nouveau_uvmm *u + return 0; + + out_free_gpuva_mgr: +- drm_gpuvm_destroy(&uvmm->umgr); ++ drm_gpuvm_destroy(&uvmm->base); + out_unlock: + mutex_unlock(&cli->mutex); + return ret; +@@ -1877,11 +1877,11 @@ nouveau_uvmm_fini(struct nouveau_uvmm *u + wait_event(entity->job.wq, list_empty(&entity->job.list.head)); + + nouveau_uvmm_lock(uvmm); +- drm_gpuvm_for_each_va_safe(va, next, &uvmm->umgr) { ++ drm_gpuvm_for_each_va_safe(va, next, &uvmm->base) { + struct nouveau_uvma *uvma = uvma_from_va(va); + struct drm_gem_object *obj = va->gem.obj; + +- if (unlikely(va == &uvmm->umgr.kernel_alloc_node)) ++ if (unlikely(va == &uvmm->base.kernel_alloc_node)) + continue; + + drm_gpuva_remove(va); +@@ -1910,7 +1910,7 @@ nouveau_uvmm_fini(struct nouveau_uvmm *u + + mutex_lock(&cli->mutex); + nouveau_vmm_fini(&uvmm->vmm); +- drm_gpuvm_destroy(&uvmm->umgr); ++ drm_gpuvm_destroy(&uvmm->base); + mutex_unlock(&cli->mutex); + + dma_resv_fini(&uvmm->resv); +--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.h ++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.h +@@ -8,8 +8,8 @@ + #include "nouveau_drv.h" + + struct nouveau_uvmm { ++ struct drm_gpuvm base; + struct nouveau_vmm vmm; +- struct drm_gpuvm umgr; + struct maple_tree region_mt; + struct mutex mutex; + struct dma_resv resv; +@@ -41,10 +41,10 @@ struct nouveau_uvma { + u8 kind; + }; + +-#define uvmm_from_mgr(x) container_of((x), struct nouveau_uvmm, umgr) ++#define uvmm_from_gpuvm(x) container_of((x), struct nouveau_uvmm, base) + #define uvma_from_va(x) container_of((x), struct nouveau_uvma, va) + +-#define to_uvmm(x) uvmm_from_mgr((x)->va.vm) ++#define to_uvmm(x) uvmm_from_gpuvm((x)->va.vm) + + struct nouveau_uvmm_bind_job { + struct nouveau_job base; diff --git a/patches-6.6/034-04-v6.7-drm-gpuvm-Dual-licence-the-drm_gpuvm-code-GPL-2.0-OR-MIT.patch b/patches-6.6/034-04-v6.7-drm-gpuvm-Dual-licence-the-drm_gpuvm-code-GPL-2.0-OR-MIT.patch new file mode 100644 index 0000000..c1b38ad --- /dev/null +++ b/patches-6.6/034-04-v6.7-drm-gpuvm-Dual-licence-the-drm_gpuvm-code-GPL-2.0-OR-MIT.patch @@ -0,0 +1,45 @@ +From f7749a549b4f4db0c02e6b3d3800ea400dd76c12 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= +Date: Tue, 10 Oct 2023 16:27:24 +0200 +Subject: [PATCH] drm/gpuvm: Dual-licence the drm_gpuvm code GPL-2.0 OR MIT +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Dual-licence in order to make it possible for other non-GPL os'es +to re-implement the code. The use of EXPORT_SYMBOL_GPL() is intentionally +left untouched to prevent use of drm_gpuvm as a proxy for non-GPL drivers +to access GPL-only kernel symbols. + +Much of the ideas and algorithms used in the drm_gpuvm code is already +present in one way or another in MIT-licensed code. + +Cc: Danilo Krummrich +Cc: airlied@gmail.com +Cc: daniel@ffwll.ch +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Thomas Hellström +Acked-by: Danilo Krummrich +Reviewed-by: Francois Dugast +Link: https://patchwork.freedesktop.org/patch/msgid/20231010142725.8920-1-thomas.hellstrom@linux.intel.com +--- + drivers/gpu/drm/drm_gpuvm.c | 2 +- + include/drm/drm_gpuvm.h | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/drm_gpuvm.c ++++ b/drivers/gpu/drm/drm_gpuvm.c +@@ -1,4 +1,4 @@ +-// SPDX-License-Identifier: GPL-2.0-only ++// SPDX-License-Identifier: GPL-2.0 OR MIT + /* + * Copyright (c) 2022 Red Hat. + * +--- a/include/drm/drm_gpuvm.h ++++ b/include/drm/drm_gpuvm.h +@@ -1,4 +1,4 @@ +-/* SPDX-License-Identifier: GPL-2.0-only */ ++/* SPDX-License-Identifier: GPL-2.0 OR MIT */ + + #ifndef __DRM_GPUVM_H__ + #define __DRM_GPUVM_H__ diff --git a/patches-6.6/034-05-v6.8-drm-gpuvm-convert-WARN-to-drm_WARN-variants.patch b/patches-6.6/034-05-v6.8-drm-gpuvm-convert-WARN-to-drm_WARN-variants.patch new file mode 100644 index 0000000..64e1719 --- /dev/null +++ b/patches-6.6/034-05-v6.8-drm-gpuvm-convert-WARN-to-drm_WARN-variants.patch @@ -0,0 +1,165 @@ +From 546ca4d35dccaca6613766ed36ccfb2b5bd63bfe Mon Sep 17 00:00:00 2001 +From: Danilo Krummrich +Date: Wed, 8 Nov 2023 01:12:31 +0100 +Subject: [PATCH] drm/gpuvm: convert WARN() to drm_WARN() variants +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Use drm_WARN() and drm_WARN_ON() variants to indicate drivers the +context the failing VM resides in. + +Acked-by: Christian König +Reviewed-by: Boris Brezillon +Reviewed-by: Thomas Hellström +Signed-off-by: Danilo Krummrich +Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-2-dakr@redhat.com +--- + drivers/gpu/drm/drm_gpuvm.c | 32 ++++++++++++++------------ + drivers/gpu/drm/nouveau/nouveau_uvmm.c | 3 ++- + include/drm/drm_gpuvm.h | 7 ++++++ + 3 files changed, 26 insertions(+), 16 deletions(-) + +--- a/drivers/gpu/drm/drm_gpuvm.c ++++ b/drivers/gpu/drm/drm_gpuvm.c +@@ -614,12 +614,12 @@ static int __drm_gpuva_insert(struct drm + static void __drm_gpuva_remove(struct drm_gpuva *va); + + static bool +-drm_gpuvm_check_overflow(u64 addr, u64 range) ++drm_gpuvm_check_overflow(struct drm_gpuvm *gpuvm, u64 addr, u64 range) + { + u64 end; + +- return WARN(check_add_overflow(addr, range, &end), +- "GPUVA address limited to %zu bytes.\n", sizeof(end)); ++ return drm_WARN(gpuvm->drm, check_add_overflow(addr, range, &end), ++ "GPUVA address limited to %zu bytes.\n", sizeof(end)); + } + + static bool +@@ -647,7 +647,7 @@ static bool + drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm, + u64 addr, u64 range) + { +- return !drm_gpuvm_check_overflow(addr, range) && ++ return !drm_gpuvm_check_overflow(gpuvm, addr, range) && + drm_gpuvm_in_mm_range(gpuvm, addr, range) && + !drm_gpuvm_in_kernel_node(gpuvm, addr, range); + } +@@ -656,6 +656,7 @@ drm_gpuvm_range_valid(struct drm_gpuvm * + * drm_gpuvm_init() - initialize a &drm_gpuvm + * @gpuvm: pointer to the &drm_gpuvm to initialize + * @name: the name of the GPU VA space ++ * @drm: the &drm_device this VM resides in + * @start_offset: the start offset of the GPU VA space + * @range: the size of the GPU VA space + * @reserve_offset: the start of the kernel reserved GPU VA area +@@ -668,8 +669,8 @@ drm_gpuvm_range_valid(struct drm_gpuvm * + * &name is expected to be managed by the surrounding driver structures. + */ + void +-drm_gpuvm_init(struct drm_gpuvm *gpuvm, +- const char *name, ++drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name, ++ struct drm_device *drm, + u64 start_offset, u64 range, + u64 reserve_offset, u64 reserve_range, + const struct drm_gpuvm_ops *ops) +@@ -677,20 +678,20 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm, + gpuvm->rb.tree = RB_ROOT_CACHED; + INIT_LIST_HEAD(&gpuvm->rb.list); + +- drm_gpuvm_check_overflow(start_offset, range); +- gpuvm->mm_start = start_offset; +- gpuvm->mm_range = range; +- + gpuvm->name = name ? name : "unknown"; + gpuvm->ops = ops; ++ gpuvm->drm = drm; + +- memset(&gpuvm->kernel_alloc_node, 0, sizeof(struct drm_gpuva)); ++ drm_gpuvm_check_overflow(gpuvm, start_offset, range); ++ gpuvm->mm_start = start_offset; ++ gpuvm->mm_range = range; + ++ memset(&gpuvm->kernel_alloc_node, 0, sizeof(struct drm_gpuva)); + if (reserve_range) { + gpuvm->kernel_alloc_node.va.addr = reserve_offset; + gpuvm->kernel_alloc_node.va.range = reserve_range; + +- if (likely(!drm_gpuvm_check_overflow(reserve_offset, ++ if (likely(!drm_gpuvm_check_overflow(gpuvm, reserve_offset, + reserve_range))) + __drm_gpuva_insert(gpuvm, &gpuvm->kernel_alloc_node); + } +@@ -712,8 +713,8 @@ drm_gpuvm_destroy(struct drm_gpuvm *gpuv + if (gpuvm->kernel_alloc_node.va.range) + __drm_gpuva_remove(&gpuvm->kernel_alloc_node); + +- WARN(!RB_EMPTY_ROOT(&gpuvm->rb.tree.rb_root), +- "GPUVA tree is not empty, potentially leaking memory."); ++ drm_WARN(gpuvm->drm, !RB_EMPTY_ROOT(&gpuvm->rb.tree.rb_root), ++ "GPUVA tree is not empty, potentially leaking memory.\n"); + } + EXPORT_SYMBOL_GPL(drm_gpuvm_destroy); + +@@ -795,7 +796,8 @@ drm_gpuva_remove(struct drm_gpuva *va) + struct drm_gpuvm *gpuvm = va->vm; + + if (unlikely(va == &gpuvm->kernel_alloc_node)) { +- WARN(1, "Can't destroy kernel reserved node.\n"); ++ drm_WARN(gpuvm->drm, 1, ++ "Can't destroy kernel reserved node.\n"); + return; + } + +--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c ++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c +@@ -1808,6 +1808,7 @@ int + nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli, + u64 kernel_managed_addr, u64 kernel_managed_size) + { ++ struct drm_device *drm = cli->drm->dev; + int ret; + u64 kernel_managed_end = kernel_managed_addr + kernel_managed_size; + +@@ -1836,7 +1837,7 @@ nouveau_uvmm_init(struct nouveau_uvmm *u + uvmm->kernel_managed_addr = kernel_managed_addr; + uvmm->kernel_managed_size = kernel_managed_size; + +- drm_gpuvm_init(&uvmm->base, cli->name, ++ drm_gpuvm_init(&uvmm->base, cli->name, drm, + NOUVEAU_VA_SPACE_START, + NOUVEAU_VA_SPACE_END, + kernel_managed_addr, kernel_managed_size, +--- a/include/drm/drm_gpuvm.h ++++ b/include/drm/drm_gpuvm.h +@@ -29,6 +29,7 @@ + #include + #include + ++#include + #include + + struct drm_gpuvm; +@@ -202,6 +203,11 @@ struct drm_gpuvm { + const char *name; + + /** ++ * @drm: the &drm_device this VM lives in ++ */ ++ struct drm_device *drm; ++ ++ /** + * @mm_start: start of the VA space + */ + u64 mm_start; +@@ -241,6 +247,7 @@ struct drm_gpuvm { + }; + + void drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name, ++ struct drm_device *drm, + u64 start_offset, u64 range, + u64 reserve_offset, u64 reserve_range, + const struct drm_gpuvm_ops *ops); diff --git a/patches-6.6/034-06-v6.8-drm-gpuvm-export-drm_gpuvm_range_valid.patch b/patches-6.6/034-06-v6.8-drm-gpuvm-export-drm_gpuvm_range_valid.patch new file mode 100644 index 0000000..cead71a --- /dev/null +++ b/patches-6.6/034-06-v6.8-drm-gpuvm-export-drm_gpuvm_range_valid.patch @@ -0,0 +1,61 @@ +From 9297cfc9405bc6b60540b8b8aaf930b7e449e15a Mon Sep 17 00:00:00 2001 +From: Danilo Krummrich +Date: Wed, 8 Nov 2023 01:12:33 +0100 +Subject: [PATCH] drm/gpuvm: export drm_gpuvm_range_valid() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Drivers may use this function to validate userspace requests in advance, +hence export it. + +Acked-by: Christian König +Reviewed-by: Thomas Hellström +Reviewed-by: Boris Brezillon +Signed-off-by: Danilo Krummrich +Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-4-dakr@redhat.com +--- + drivers/gpu/drm/drm_gpuvm.c | 14 +++++++++++++- + include/drm/drm_gpuvm.h | 1 + + 2 files changed, 14 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/drm_gpuvm.c ++++ b/drivers/gpu/drm/drm_gpuvm.c +@@ -643,7 +643,18 @@ drm_gpuvm_in_kernel_node(struct drm_gpuv + return krange && addr < kend && kstart < end; + } + +-static bool ++/** ++ * drm_gpuvm_range_valid() - checks whether the given range is valid for the ++ * given &drm_gpuvm ++ * @gpuvm: the GPUVM to check the range for ++ * @addr: the base address ++ * @range: the range starting from the base address ++ * ++ * Checks whether the range is within the GPUVM's managed boundaries. ++ * ++ * Returns: true for a valid range, false otherwise ++ */ ++bool + drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm, + u64 addr, u64 range) + { +@@ -651,6 +662,7 @@ drm_gpuvm_range_valid(struct drm_gpuvm * + drm_gpuvm_in_mm_range(gpuvm, addr, range) && + !drm_gpuvm_in_kernel_node(gpuvm, addr, range); + } ++EXPORT_SYMBOL_GPL(drm_gpuvm_range_valid); + + /** + * drm_gpuvm_init() - initialize a &drm_gpuvm +--- a/include/drm/drm_gpuvm.h ++++ b/include/drm/drm_gpuvm.h +@@ -253,6 +253,7 @@ void drm_gpuvm_init(struct drm_gpuvm *gp + const struct drm_gpuvm_ops *ops); + void drm_gpuvm_destroy(struct drm_gpuvm *gpuvm); + ++bool drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm, u64 addr, u64 range); + bool drm_gpuvm_interval_empty(struct drm_gpuvm *gpuvm, u64 addr, u64 range); + + static inline struct drm_gpuva * diff --git a/patches-6.6/034-07-v6.8-drm-nouveau-make-use-of-drm_gpuvm_range_valid.patch b/patches-6.6/034-07-v6.8-drm-nouveau-make-use-of-drm_gpuvm_range_valid.patch new file mode 100644 index 0000000..3d790e6 --- /dev/null +++ b/patches-6.6/034-07-v6.8-drm-nouveau-make-use-of-drm_gpuvm_range_valid.patch @@ -0,0 +1,66 @@ +From b41e297abd2347075ec640daf0e5da576e3d7418 Mon Sep 17 00:00:00 2001 +From: Danilo Krummrich +Date: Wed, 8 Nov 2023 01:12:34 +0100 +Subject: [PATCH] drm/nouveau: make use of drm_gpuvm_range_valid() + +Use drm_gpuvm_range_valid() in order to validate userspace requests. + +Reviewed-by: Dave Airlie +Signed-off-by: Danilo Krummrich +Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-5-dakr@redhat.com +--- + drivers/gpu/drm/nouveau/nouveau_uvmm.c | 17 +---------------- + drivers/gpu/drm/nouveau/nouveau_uvmm.h | 3 --- + 2 files changed, 1 insertion(+), 19 deletions(-) + +--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c ++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c +@@ -929,25 +929,13 @@ nouveau_uvmm_sm_unmap_cleanup(struct nou + static int + nouveau_uvmm_validate_range(struct nouveau_uvmm *uvmm, u64 addr, u64 range) + { +- u64 end = addr + range; +- u64 kernel_managed_end = uvmm->kernel_managed_addr + +- uvmm->kernel_managed_size; +- + if (addr & ~PAGE_MASK) + return -EINVAL; + + if (range & ~PAGE_MASK) + return -EINVAL; + +- if (end <= addr) +- return -EINVAL; +- +- if (addr < NOUVEAU_VA_SPACE_START || +- end > NOUVEAU_VA_SPACE_END) +- return -EINVAL; +- +- if (addr < kernel_managed_end && +- end > uvmm->kernel_managed_addr) ++ if (!drm_gpuvm_range_valid(&uvmm->base, addr, range)) + return -EINVAL; + + return 0; +@@ -1834,9 +1822,6 @@ nouveau_uvmm_init(struct nouveau_uvmm *u + goto out_unlock; + } + +- uvmm->kernel_managed_addr = kernel_managed_addr; +- uvmm->kernel_managed_size = kernel_managed_size; +- + drm_gpuvm_init(&uvmm->base, cli->name, drm, + NOUVEAU_VA_SPACE_START, + NOUVEAU_VA_SPACE_END, +--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.h ++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.h +@@ -14,9 +14,6 @@ struct nouveau_uvmm { + struct mutex mutex; + struct dma_resv resv; + +- u64 kernel_managed_addr; +- u64 kernel_managed_size; +- + bool disabled; + }; + diff --git a/patches-6.6/034-08-v6.8-drm-gpuvm-add-common-dma-resv-per-struct-drm_gpuvm.patch b/patches-6.6/034-08-v6.8-drm-gpuvm-add-common-dma-resv-per-struct-drm_gpuvm.patch new file mode 100644 index 0000000..c562650 --- /dev/null +++ b/patches-6.6/034-08-v6.8-drm-gpuvm-add-common-dma-resv-per-struct-drm_gpuvm.patch @@ -0,0 +1,205 @@ +From bbe8458037e74b9887ba2f0f0b8084a13ade3a90 Mon Sep 17 00:00:00 2001 +From: Danilo Krummrich +Date: Wed, 8 Nov 2023 01:12:35 +0100 +Subject: [PATCH] drm/gpuvm: add common dma-resv per struct drm_gpuvm +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Provide a common dma-resv for GEM objects not being used outside of this +GPU-VM. This is used in a subsequent patch to generalize dma-resv, +external and evicted object handling and GEM validation. + +Acked-by: Christian König +Reviewed-by: Boris Brezillon +Reviewed-by: Thomas Hellström +Signed-off-by: Danilo Krummrich +Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-6-dakr@redhat.com +--- + drivers/gpu/drm/drm_gpuvm.c | 53 ++++++++++++++++++++++++++ + drivers/gpu/drm/nouveau/nouveau_uvmm.c | 13 ++++++- + include/drm/drm_gpuvm.h | 33 ++++++++++++++++ + 3 files changed, 97 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/drm_gpuvm.c ++++ b/drivers/gpu/drm/drm_gpuvm.c +@@ -61,6 +61,15 @@ + * contained within struct drm_gpuva already. Hence, for inserting &drm_gpuva + * entries from within dma-fence signalling critical sections it is enough to + * pre-allocate the &drm_gpuva structures. ++ * ++ * &drm_gem_objects which are private to a single VM can share a common ++ * &dma_resv in order to improve locking efficiency (e.g. with &drm_exec). ++ * For this purpose drivers must pass a &drm_gem_object to drm_gpuvm_init(), in ++ * the following called 'resv object', which serves as the container of the ++ * GPUVM's shared &dma_resv. This resv object can be a driver specific ++ * &drm_gem_object, such as the &drm_gem_object containing the root page table, ++ * but it can also be a 'dummy' object, which can be allocated with ++ * drm_gpuvm_resv_object_alloc(). + */ + + /** +@@ -664,11 +673,49 @@ drm_gpuvm_range_valid(struct drm_gpuvm * + } + EXPORT_SYMBOL_GPL(drm_gpuvm_range_valid); + ++static void ++drm_gpuvm_gem_object_free(struct drm_gem_object *obj) ++{ ++ drm_gem_object_release(obj); ++ kfree(obj); ++} ++ ++static const struct drm_gem_object_funcs drm_gpuvm_object_funcs = { ++ .free = drm_gpuvm_gem_object_free, ++}; ++ ++/** ++ * drm_gpuvm_resv_object_alloc() - allocate a dummy &drm_gem_object ++ * @drm: the drivers &drm_device ++ * ++ * Allocates a dummy &drm_gem_object which can be passed to drm_gpuvm_init() in ++ * order to serve as root GEM object providing the &drm_resv shared across ++ * &drm_gem_objects local to a single GPUVM. ++ * ++ * Returns: the &drm_gem_object on success, NULL on failure ++ */ ++struct drm_gem_object * ++drm_gpuvm_resv_object_alloc(struct drm_device *drm) ++{ ++ struct drm_gem_object *obj; ++ ++ obj = kzalloc(sizeof(*obj), GFP_KERNEL); ++ if (!obj) ++ return NULL; ++ ++ obj->funcs = &drm_gpuvm_object_funcs; ++ drm_gem_private_object_init(drm, obj, 0); ++ ++ return obj; ++} ++EXPORT_SYMBOL_GPL(drm_gpuvm_resv_object_alloc); ++ + /** + * drm_gpuvm_init() - initialize a &drm_gpuvm + * @gpuvm: pointer to the &drm_gpuvm to initialize + * @name: the name of the GPU VA space + * @drm: the &drm_device this VM resides in ++ * @r_obj: the resv &drm_gem_object providing the GPUVM's common &dma_resv + * @start_offset: the start offset of the GPU VA space + * @range: the size of the GPU VA space + * @reserve_offset: the start of the kernel reserved GPU VA area +@@ -683,6 +730,7 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_range_valid) + void + drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name, + struct drm_device *drm, ++ struct drm_gem_object *r_obj, + u64 start_offset, u64 range, + u64 reserve_offset, u64 reserve_range, + const struct drm_gpuvm_ops *ops) +@@ -693,6 +741,9 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm, + gpuvm->name = name ? name : "unknown"; + gpuvm->ops = ops; + gpuvm->drm = drm; ++ gpuvm->r_obj = r_obj; ++ ++ drm_gem_object_get(r_obj); + + drm_gpuvm_check_overflow(gpuvm, start_offset, range); + gpuvm->mm_start = start_offset; +@@ -727,6 +778,8 @@ drm_gpuvm_destroy(struct drm_gpuvm *gpuv + + drm_WARN(gpuvm->drm, !RB_EMPTY_ROOT(&gpuvm->rb.tree.rb_root), + "GPUVA tree is not empty, potentially leaking memory.\n"); ++ ++ drm_gem_object_put(gpuvm->r_obj); + } + EXPORT_SYMBOL_GPL(drm_gpuvm_destroy); + +--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c ++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c +@@ -1797,8 +1797,9 @@ nouveau_uvmm_init(struct nouveau_uvmm *u + u64 kernel_managed_addr, u64 kernel_managed_size) + { + struct drm_device *drm = cli->drm->dev; +- int ret; ++ struct drm_gem_object *r_obj; + u64 kernel_managed_end = kernel_managed_addr + kernel_managed_size; ++ int ret; + + mutex_init(&uvmm->mutex); + dma_resv_init(&uvmm->resv); +@@ -1822,11 +1823,19 @@ nouveau_uvmm_init(struct nouveau_uvmm *u + goto out_unlock; + } + +- drm_gpuvm_init(&uvmm->base, cli->name, drm, ++ r_obj = drm_gpuvm_resv_object_alloc(drm); ++ if (!r_obj) { ++ ret = -ENOMEM; ++ goto out_unlock; ++ } ++ ++ drm_gpuvm_init(&uvmm->base, cli->name, drm, r_obj, + NOUVEAU_VA_SPACE_START, + NOUVEAU_VA_SPACE_END, + kernel_managed_addr, kernel_managed_size, + NULL); ++ /* GPUVM takes care from here on. */ ++ drm_gem_object_put(r_obj); + + ret = nvif_vmm_ctor(&cli->mmu, "uvmm", + cli->vmm.vmm.object.oclass, RAW, +--- a/include/drm/drm_gpuvm.h ++++ b/include/drm/drm_gpuvm.h +@@ -244,10 +244,16 @@ struct drm_gpuvm { + * @ops: &drm_gpuvm_ops providing the split/merge steps to drivers + */ + const struct drm_gpuvm_ops *ops; ++ ++ /** ++ * @r_obj: Resv GEM object; representing the GPUVM's common &dma_resv. ++ */ ++ struct drm_gem_object *r_obj; + }; + + void drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name, + struct drm_device *drm, ++ struct drm_gem_object *r_obj, + u64 start_offset, u64 range, + u64 reserve_offset, u64 reserve_range, + const struct drm_gpuvm_ops *ops); +@@ -256,6 +262,33 @@ void drm_gpuvm_destroy(struct drm_gpuvm + bool drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm, u64 addr, u64 range); + bool drm_gpuvm_interval_empty(struct drm_gpuvm *gpuvm, u64 addr, u64 range); + ++struct drm_gem_object * ++drm_gpuvm_resv_object_alloc(struct drm_device *drm); ++ ++/** ++ * drm_gpuvm_resv() - returns the &drm_gpuvm's &dma_resv ++ * @gpuvm__: the &drm_gpuvm ++ * ++ * Returns: a pointer to the &drm_gpuvm's shared &dma_resv ++ */ ++#define drm_gpuvm_resv(gpuvm__) ((gpuvm__)->r_obj->resv) ++ ++/** ++ * drm_gpuvm_resv_obj() - returns the &drm_gem_object holding the &drm_gpuvm's ++ * &dma_resv ++ * @gpuvm__: the &drm_gpuvm ++ * ++ * Returns: a pointer to the &drm_gem_object holding the &drm_gpuvm's shared ++ * &dma_resv ++ */ ++#define drm_gpuvm_resv_obj(gpuvm__) ((gpuvm__)->r_obj) ++ ++#define drm_gpuvm_resv_held(gpuvm__) \ ++ dma_resv_held(drm_gpuvm_resv(gpuvm__)) ++ ++#define drm_gpuvm_resv_assert_held(gpuvm__) \ ++ dma_resv_assert_held(drm_gpuvm_resv(gpuvm__)) ++ + static inline struct drm_gpuva * + __drm_gpuva_next(struct drm_gpuva *va) + { diff --git a/patches-6.6/034-09-v6.8-drm-nouveau-make-use-of-the-GPUVM-s-shared-dma-resv.patch b/patches-6.6/034-09-v6.8-drm-nouveau-make-use-of-the-GPUVM-s-shared-dma-resv.patch new file mode 100644 index 0000000..90d3fec --- /dev/null +++ b/patches-6.6/034-09-v6.8-drm-nouveau-make-use-of-the-GPUVM-s-shared-dma-resv.patch @@ -0,0 +1,140 @@ +From 6118411428a393fb0868bad9025d71875418058b Mon Sep 17 00:00:00 2001 +From: Danilo Krummrich +Date: Wed, 8 Nov 2023 01:12:36 +0100 +Subject: [PATCH] drm/nouveau: make use of the GPUVM's shared dma-resv + +DRM GEM objects private to a single GPUVM can use a shared dma-resv. +Make use of the shared dma-resv of GPUVM rather than a driver specific +one. + +The shared dma-resv originates from a "root" GEM object serving as +container for the dma-resv to make it compatible with drm_exec. + +In order to make sure the object proving the shared dma-resv can't be +freed up before the objects making use of it, let every such GEM object +take a reference on it. + +Reviewed-by: Dave Airlie +Signed-off-by: Danilo Krummrich +Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-7-dakr@redhat.com +--- + drivers/gpu/drm/nouveau/nouveau_bo.c | 11 +++++++++-- + drivers/gpu/drm/nouveau/nouveau_bo.h | 5 +++++ + drivers/gpu/drm/nouveau/nouveau_gem.c | 10 ++++++++-- + drivers/gpu/drm/nouveau/nouveau_uvmm.c | 7 ++----- + drivers/gpu/drm/nouveau/nouveau_uvmm.h | 1 - + 5 files changed, 24 insertions(+), 10 deletions(-) + +--- a/drivers/gpu/drm/nouveau/nouveau_bo.c ++++ b/drivers/gpu/drm/nouveau/nouveau_bo.c +@@ -148,10 +148,17 @@ nouveau_bo_del_ttm(struct ttm_buffer_obj + * If nouveau_bo_new() allocated this buffer, the GEM object was never + * initialized, so don't attempt to release it. + */ +- if (bo->base.dev) ++ if (bo->base.dev) { ++ /* Gem objects not being shared with other VMs get their ++ * dma_resv from a root GEM object. ++ */ ++ if (nvbo->no_share) ++ drm_gem_object_put(nvbo->r_obj); ++ + drm_gem_object_release(&bo->base); +- else ++ } else { + dma_resv_fini(&bo->base._resv); ++ } + + kfree(nvbo); + } +--- a/drivers/gpu/drm/nouveau/nouveau_bo.h ++++ b/drivers/gpu/drm/nouveau/nouveau_bo.h +@@ -26,6 +26,11 @@ struct nouveau_bo { + struct list_head entry; + int pbbo_index; + bool validate_mapped; ++ ++ /* Root GEM object we derive the dma_resv of in case this BO is not ++ * shared between VMs. ++ */ ++ struct drm_gem_object *r_obj; + bool no_share; + + /* GPU address space is independent of CPU word size */ +--- a/drivers/gpu/drm/nouveau/nouveau_gem.c ++++ b/drivers/gpu/drm/nouveau/nouveau_gem.c +@@ -111,7 +111,8 @@ nouveau_gem_object_open(struct drm_gem_o + if (vmm->vmm.object.oclass < NVIF_CLASS_VMM_NV50) + return 0; + +- if (nvbo->no_share && uvmm && &uvmm->resv != nvbo->bo.base.resv) ++ if (nvbo->no_share && uvmm && ++ drm_gpuvm_resv(&uvmm->base) != nvbo->bo.base.resv) + return -EPERM; + + ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL); +@@ -245,7 +246,7 @@ nouveau_gem_new(struct nouveau_cli *cli, + if (unlikely(!uvmm)) + return -EINVAL; + +- resv = &uvmm->resv; ++ resv = drm_gpuvm_resv(&uvmm->base); + } + + if (!(domain & (NOUVEAU_GEM_DOMAIN_VRAM | NOUVEAU_GEM_DOMAIN_GART))) +@@ -288,6 +289,11 @@ nouveau_gem_new(struct nouveau_cli *cli, + if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) + nvbo->valid_domains &= domain; + ++ if (nvbo->no_share) { ++ nvbo->r_obj = drm_gpuvm_resv_obj(&uvmm->base); ++ drm_gem_object_get(nvbo->r_obj); ++ } ++ + *pnvbo = nvbo; + return 0; + } +--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c ++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c +@@ -1802,7 +1802,6 @@ nouveau_uvmm_init(struct nouveau_uvmm *u + int ret; + + mutex_init(&uvmm->mutex); +- dma_resv_init(&uvmm->resv); + mt_init_flags(&uvmm->region_mt, MT_FLAGS_LOCK_EXTERN); + mt_set_external_lock(&uvmm->region_mt, &uvmm->mutex); + +@@ -1842,14 +1841,14 @@ nouveau_uvmm_init(struct nouveau_uvmm *u + kernel_managed_addr, kernel_managed_size, + NULL, 0, &cli->uvmm.vmm.vmm); + if (ret) +- goto out_free_gpuva_mgr; ++ goto out_gpuvm_fini; + + cli->uvmm.vmm.cli = cli; + mutex_unlock(&cli->mutex); + + return 0; + +-out_free_gpuva_mgr: ++out_gpuvm_fini: + drm_gpuvm_destroy(&uvmm->base); + out_unlock: + mutex_unlock(&cli->mutex); +@@ -1907,6 +1906,4 @@ nouveau_uvmm_fini(struct nouveau_uvmm *u + nouveau_vmm_fini(&uvmm->vmm); + drm_gpuvm_destroy(&uvmm->base); + mutex_unlock(&cli->mutex); +- +- dma_resv_fini(&uvmm->resv); + } +--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.h ++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.h +@@ -12,7 +12,6 @@ struct nouveau_uvmm { + struct nouveau_vmm vmm; + struct maple_tree region_mt; + struct mutex mutex; +- struct dma_resv resv; + + bool disabled; + }; diff --git a/patches-6.6/034-10-v6.8-drm-gpuvm-add-drm_gpuvm_flags-to-drm_gpuvm.patch b/patches-6.6/034-10-v6.8-drm-gpuvm-add-drm_gpuvm_flags-to-drm_gpuvm.patch new file mode 100644 index 0000000..fd95b76 --- /dev/null +++ b/patches-6.6/034-10-v6.8-drm-gpuvm-add-drm_gpuvm_flags-to-drm_gpuvm.patch @@ -0,0 +1,98 @@ +From 809ef191ee600e8bcbe2f8a769e00d2d54c16094 Mon Sep 17 00:00:00 2001 +From: Danilo Krummrich +Date: Wed, 8 Nov 2023 01:12:37 +0100 +Subject: [PATCH] drm/gpuvm: add drm_gpuvm_flags to drm_gpuvm +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Introduce flags for struct drm_gpuvm, this required by subsequent +commits. + +Acked-by: Christian König +Reviewed-by: Boris Brezillon +Reviewed-by: Thomas Hellström +Signed-off-by: Danilo Krummrich +Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-8-dakr@redhat.com +--- + drivers/gpu/drm/drm_gpuvm.c | 3 +++ + drivers/gpu/drm/nouveau/nouveau_uvmm.c | 2 +- + include/drm/drm_gpuvm.h | 16 ++++++++++++++++ + 3 files changed, 20 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/drm_gpuvm.c ++++ b/drivers/gpu/drm/drm_gpuvm.c +@@ -714,6 +714,7 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_resv_object_ + * drm_gpuvm_init() - initialize a &drm_gpuvm + * @gpuvm: pointer to the &drm_gpuvm to initialize + * @name: the name of the GPU VA space ++ * @flags: the &drm_gpuvm_flags for this GPUVM + * @drm: the &drm_device this VM resides in + * @r_obj: the resv &drm_gem_object providing the GPUVM's common &dma_resv + * @start_offset: the start offset of the GPU VA space +@@ -729,6 +730,7 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_resv_object_ + */ + void + drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name, ++ enum drm_gpuvm_flags flags, + struct drm_device *drm, + struct drm_gem_object *r_obj, + u64 start_offset, u64 range, +@@ -739,6 +741,7 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm, + INIT_LIST_HEAD(&gpuvm->rb.list); + + gpuvm->name = name ? name : "unknown"; ++ gpuvm->flags = flags; + gpuvm->ops = ops; + gpuvm->drm = drm; + gpuvm->r_obj = r_obj; +--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c ++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c +@@ -1828,7 +1828,7 @@ nouveau_uvmm_init(struct nouveau_uvmm *u + goto out_unlock; + } + +- drm_gpuvm_init(&uvmm->base, cli->name, drm, r_obj, ++ drm_gpuvm_init(&uvmm->base, cli->name, 0, drm, r_obj, + NOUVEAU_VA_SPACE_START, + NOUVEAU_VA_SPACE_END, + kernel_managed_addr, kernel_managed_size, +--- a/include/drm/drm_gpuvm.h ++++ b/include/drm/drm_gpuvm.h +@@ -185,6 +185,16 @@ static inline bool drm_gpuva_invalidated + } + + /** ++ * enum drm_gpuvm_flags - flags for struct drm_gpuvm ++ */ ++enum drm_gpuvm_flags { ++ /** ++ * @DRM_GPUVM_USERBITS: user defined bits ++ */ ++ DRM_GPUVM_USERBITS = BIT(0), ++}; ++ ++/** + * struct drm_gpuvm - DRM GPU VA Manager + * + * The DRM GPU VA Manager keeps track of a GPU's virtual address space by using +@@ -203,6 +213,11 @@ struct drm_gpuvm { + const char *name; + + /** ++ * @flags: the &drm_gpuvm_flags of this GPUVM ++ */ ++ enum drm_gpuvm_flags flags; ++ ++ /** + * @drm: the &drm_device this VM lives in + */ + struct drm_device *drm; +@@ -252,6 +267,7 @@ struct drm_gpuvm { + }; + + void drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name, ++ enum drm_gpuvm_flags flags, + struct drm_device *drm, + struct drm_gem_object *r_obj, + u64 start_offset, u64 range, diff --git a/patches-6.6/034-11-v6.8-drm-nouveau-separately-allocate-struct-nouveau_uvmm.patch b/patches-6.6/034-11-v6.8-drm-nouveau-separately-allocate-struct-nouveau_uvmm.patch new file mode 100644 index 0000000..7876d5d --- /dev/null +++ b/patches-6.6/034-11-v6.8-drm-nouveau-separately-allocate-struct-nouveau_uvmm.patch @@ -0,0 +1,219 @@ +From 266f7618e761c8a6aa89dbfe43cda1b69cdbbf14 Mon Sep 17 00:00:00 2001 +From: Danilo Krummrich +Date: Wed, 8 Nov 2023 01:12:38 +0100 +Subject: [PATCH] drm/nouveau: separately allocate struct nouveau_uvmm + +Allocate struct nouveau_uvmm separately in preparation for subsequent +commits introducing reference counting for struct drm_gpuvm. + +While at it, get rid of nouveau_uvmm_init() as indirection of +nouveau_uvmm_ioctl_vm_init() and perform some minor cleanups. + +Reviewed-by: Dave Airlie +Signed-off-by: Danilo Krummrich +Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-9-dakr@redhat.com +--- + drivers/gpu/drm/nouveau/nouveau_drm.c | 5 +- + drivers/gpu/drm/nouveau/nouveau_drv.h | 10 ++-- + drivers/gpu/drm/nouveau/nouveau_uvmm.c | 63 +++++++++++++------------- + drivers/gpu/drm/nouveau/nouveau_uvmm.h | 4 -- + 4 files changed, 40 insertions(+), 42 deletions(-) + +--- a/drivers/gpu/drm/nouveau/nouveau_drm.c ++++ b/drivers/gpu/drm/nouveau/nouveau_drm.c +@@ -190,6 +190,8 @@ nouveau_cli_work_queue(struct nouveau_cl + static void + nouveau_cli_fini(struct nouveau_cli *cli) + { ++ struct nouveau_uvmm *uvmm = nouveau_cli_uvmm_locked(cli); ++ + /* All our channels are dead now, which means all the fences they + * own are signalled, and all callback functions have been called. + * +@@ -199,7 +201,8 @@ nouveau_cli_fini(struct nouveau_cli *cli + WARN_ON(!list_empty(&cli->worker)); + + usif_client_fini(cli); +- nouveau_uvmm_fini(&cli->uvmm); ++ if (uvmm) ++ nouveau_uvmm_fini(uvmm); + nouveau_sched_entity_fini(&cli->sched_entity); + nouveau_vmm_fini(&cli->svm); + nouveau_vmm_fini(&cli->vmm); +--- a/drivers/gpu/drm/nouveau/nouveau_drv.h ++++ b/drivers/gpu/drm/nouveau/nouveau_drv.h +@@ -93,7 +93,10 @@ struct nouveau_cli { + struct nvif_mmu mmu; + struct nouveau_vmm vmm; + struct nouveau_vmm svm; +- struct nouveau_uvmm uvmm; ++ struct { ++ struct nouveau_uvmm *ptr; ++ bool disabled; ++ } uvmm; + + struct nouveau_sched_entity sched_entity; + +@@ -121,10 +124,7 @@ struct nouveau_cli_work { + static inline struct nouveau_uvmm * + nouveau_cli_uvmm(struct nouveau_cli *cli) + { +- if (!cli || !cli->uvmm.vmm.cli) +- return NULL; +- +- return &cli->uvmm; ++ return cli ? cli->uvmm.ptr : NULL; + } + + static inline struct nouveau_uvmm * +--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c ++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c +@@ -1636,18 +1636,6 @@ err_free: + return ret; + } + +-int +-nouveau_uvmm_ioctl_vm_init(struct drm_device *dev, +- void *data, +- struct drm_file *file_priv) +-{ +- struct nouveau_cli *cli = nouveau_cli(file_priv); +- struct drm_nouveau_vm_init *init = data; +- +- return nouveau_uvmm_init(&cli->uvmm, cli, init->kernel_managed_addr, +- init->kernel_managed_size); +-} +- + static int + nouveau_uvmm_vm_bind(struct nouveau_uvmm_bind_job_args *args) + { +@@ -1793,17 +1781,25 @@ nouveau_uvmm_bo_unmap_all(struct nouveau + } + + int +-nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli, +- u64 kernel_managed_addr, u64 kernel_managed_size) ++nouveau_uvmm_ioctl_vm_init(struct drm_device *dev, ++ void *data, ++ struct drm_file *file_priv) + { ++ struct nouveau_uvmm *uvmm; ++ struct nouveau_cli *cli = nouveau_cli(file_priv); + struct drm_device *drm = cli->drm->dev; + struct drm_gem_object *r_obj; +- u64 kernel_managed_end = kernel_managed_addr + kernel_managed_size; ++ struct drm_nouveau_vm_init *init = data; ++ u64 kernel_managed_end; + int ret; + +- mutex_init(&uvmm->mutex); +- mt_init_flags(&uvmm->region_mt, MT_FLAGS_LOCK_EXTERN); +- mt_set_external_lock(&uvmm->region_mt, &uvmm->mutex); ++ if (check_add_overflow(init->kernel_managed_addr, ++ init->kernel_managed_size, ++ &kernel_managed_end)) ++ return -EINVAL; ++ ++ if (kernel_managed_end > NOUVEAU_VA_SPACE_END) ++ return -EINVAL; + + mutex_lock(&cli->mutex); + +@@ -1812,44 +1808,49 @@ nouveau_uvmm_init(struct nouveau_uvmm *u + goto out_unlock; + } + +- if (kernel_managed_end <= kernel_managed_addr) { +- ret = -EINVAL; +- goto out_unlock; +- } +- +- if (kernel_managed_end > NOUVEAU_VA_SPACE_END) { +- ret = -EINVAL; ++ uvmm = kzalloc(sizeof(*uvmm), GFP_KERNEL); ++ if (!uvmm) { ++ ret = -ENOMEM; + goto out_unlock; + } + + r_obj = drm_gpuvm_resv_object_alloc(drm); + if (!r_obj) { ++ kfree(uvmm); + ret = -ENOMEM; + goto out_unlock; + } + ++ mutex_init(&uvmm->mutex); ++ mt_init_flags(&uvmm->region_mt, MT_FLAGS_LOCK_EXTERN); ++ mt_set_external_lock(&uvmm->region_mt, &uvmm->mutex); ++ + drm_gpuvm_init(&uvmm->base, cli->name, 0, drm, r_obj, + NOUVEAU_VA_SPACE_START, + NOUVEAU_VA_SPACE_END, +- kernel_managed_addr, kernel_managed_size, ++ init->kernel_managed_addr, ++ init->kernel_managed_size, + NULL); + /* GPUVM takes care from here on. */ + drm_gem_object_put(r_obj); + + ret = nvif_vmm_ctor(&cli->mmu, "uvmm", + cli->vmm.vmm.object.oclass, RAW, +- kernel_managed_addr, kernel_managed_size, +- NULL, 0, &cli->uvmm.vmm.vmm); ++ init->kernel_managed_addr, ++ init->kernel_managed_size, ++ NULL, 0, &uvmm->vmm.vmm); + if (ret) + goto out_gpuvm_fini; + +- cli->uvmm.vmm.cli = cli; ++ uvmm->vmm.cli = cli; ++ cli->uvmm.ptr = uvmm; + mutex_unlock(&cli->mutex); + + return 0; + + out_gpuvm_fini: + drm_gpuvm_destroy(&uvmm->base); ++ kfree(uvmm); + out_unlock: + mutex_unlock(&cli->mutex); + return ret; +@@ -1864,9 +1865,6 @@ nouveau_uvmm_fini(struct nouveau_uvmm *u + struct nouveau_sched_entity *entity = &cli->sched_entity; + struct drm_gpuva *va, *next; + +- if (!cli) +- return; +- + rmb(); /* for list_empty to work without lock */ + wait_event(entity->job.wq, list_empty(&entity->job.list.head)); + +@@ -1905,5 +1903,6 @@ nouveau_uvmm_fini(struct nouveau_uvmm *u + mutex_lock(&cli->mutex); + nouveau_vmm_fini(&uvmm->vmm); + drm_gpuvm_destroy(&uvmm->base); ++ kfree(uvmm); + mutex_unlock(&cli->mutex); + } +--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.h ++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.h +@@ -12,8 +12,6 @@ struct nouveau_uvmm { + struct nouveau_vmm vmm; + struct maple_tree region_mt; + struct mutex mutex; +- +- bool disabled; + }; + + struct nouveau_uvma_region { +@@ -78,8 +76,6 @@ struct nouveau_uvmm_bind_job_args { + + #define to_uvmm_bind_job(job) container_of((job), struct nouveau_uvmm_bind_job, base) + +-int nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli, +- u64 kernel_managed_addr, u64 kernel_managed_size); + void nouveau_uvmm_fini(struct nouveau_uvmm *uvmm); + + void nouveau_uvmm_bo_map_all(struct nouveau_bo *nvbov, struct nouveau_mem *mem); diff --git a/patches-6.6/034-12-v6.8-drm-gpuvm-reference-count-drm_gpuvm-structures.patch b/patches-6.6/034-12-v6.8-drm-gpuvm-reference-count-drm_gpuvm-structures.patch new file mode 100644 index 0000000..5c5d1d2 --- /dev/null +++ b/patches-6.6/034-12-v6.8-drm-gpuvm-reference-count-drm_gpuvm-structures.patch @@ -0,0 +1,221 @@ +From 8af72338dd81d1f8667e0240bd28f5fc98b3f20d Mon Sep 17 00:00:00 2001 +From: Danilo Krummrich +Date: Wed, 8 Nov 2023 01:12:39 +0100 +Subject: [PATCH] drm/gpuvm: reference count drm_gpuvm structures +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Implement reference counting for struct drm_gpuvm. + +Acked-by: Christian König +Reviewed-by: Thomas Hellström +Reviewed-by: Boris Brezillon +Signed-off-by: Danilo Krummrich +Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-10-dakr@redhat.com +--- + drivers/gpu/drm/drm_gpuvm.c | 56 +++++++++++++++++++++----- + drivers/gpu/drm/nouveau/nouveau_uvmm.c | 20 ++++++--- + include/drm/drm_gpuvm.h | 31 +++++++++++++- + 3 files changed, 90 insertions(+), 17 deletions(-) + +--- a/drivers/gpu/drm/drm_gpuvm.c ++++ b/drivers/gpu/drm/drm_gpuvm.c +@@ -740,6 +740,8 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm, + gpuvm->rb.tree = RB_ROOT_CACHED; + INIT_LIST_HEAD(&gpuvm->rb.list); + ++ kref_init(&gpuvm->kref); ++ + gpuvm->name = name ? name : "unknown"; + gpuvm->flags = flags; + gpuvm->ops = ops; +@@ -764,15 +766,8 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm, + } + EXPORT_SYMBOL_GPL(drm_gpuvm_init); + +-/** +- * drm_gpuvm_destroy() - cleanup a &drm_gpuvm +- * @gpuvm: pointer to the &drm_gpuvm to clean up +- * +- * Note that it is a bug to call this function on a manager that still +- * holds GPU VA mappings. +- */ +-void +-drm_gpuvm_destroy(struct drm_gpuvm *gpuvm) ++static void ++drm_gpuvm_fini(struct drm_gpuvm *gpuvm) + { + gpuvm->name = NULL; + +@@ -784,7 +779,35 @@ drm_gpuvm_destroy(struct drm_gpuvm *gpuv + + drm_gem_object_put(gpuvm->r_obj); + } +-EXPORT_SYMBOL_GPL(drm_gpuvm_destroy); ++ ++static void ++drm_gpuvm_free(struct kref *kref) ++{ ++ struct drm_gpuvm *gpuvm = container_of(kref, struct drm_gpuvm, kref); ++ ++ drm_gpuvm_fini(gpuvm); ++ ++ if (drm_WARN_ON(gpuvm->drm, !gpuvm->ops->vm_free)) ++ return; ++ ++ gpuvm->ops->vm_free(gpuvm); ++} ++ ++/** ++ * drm_gpuvm_put() - drop a struct drm_gpuvm reference ++ * @gpuvm: the &drm_gpuvm to release the reference of ++ * ++ * This releases a reference to @gpuvm. ++ * ++ * This function may be called from atomic context. ++ */ ++void ++drm_gpuvm_put(struct drm_gpuvm *gpuvm) ++{ ++ if (gpuvm) ++ kref_put(&gpuvm->kref, drm_gpuvm_free); ++} ++EXPORT_SYMBOL_GPL(drm_gpuvm_put); + + static int + __drm_gpuva_insert(struct drm_gpuvm *gpuvm, +@@ -833,11 +856,21 @@ drm_gpuva_insert(struct drm_gpuvm *gpuvm + { + u64 addr = va->va.addr; + u64 range = va->va.range; ++ int ret; + + if (unlikely(!drm_gpuvm_range_valid(gpuvm, addr, range))) + return -EINVAL; + +- return __drm_gpuva_insert(gpuvm, va); ++ ret = __drm_gpuva_insert(gpuvm, va); ++ if (likely(!ret)) ++ /* Take a reference of the GPUVM for the successfully inserted ++ * drm_gpuva. We can't take the reference in ++ * __drm_gpuva_insert() itself, since we don't want to increse ++ * the reference count for the GPUVM's kernel_alloc_node. ++ */ ++ drm_gpuvm_get(gpuvm); ++ ++ return ret; + } + EXPORT_SYMBOL_GPL(drm_gpuva_insert); + +@@ -870,6 +903,7 @@ drm_gpuva_remove(struct drm_gpuva *va) + } + + __drm_gpuva_remove(va); ++ drm_gpuvm_put(va->vm); + } + EXPORT_SYMBOL_GPL(drm_gpuva_remove); + +--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c ++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c +@@ -1780,6 +1780,18 @@ nouveau_uvmm_bo_unmap_all(struct nouveau + } + } + ++static void ++nouveau_uvmm_free(struct drm_gpuvm *gpuvm) ++{ ++ struct nouveau_uvmm *uvmm = uvmm_from_gpuvm(gpuvm); ++ ++ kfree(uvmm); ++} ++ ++static const struct drm_gpuvm_ops gpuvm_ops = { ++ .vm_free = nouveau_uvmm_free, ++}; ++ + int + nouveau_uvmm_ioctl_vm_init(struct drm_device *dev, + void *data, +@@ -1830,7 +1842,7 @@ nouveau_uvmm_ioctl_vm_init(struct drm_de + NOUVEAU_VA_SPACE_END, + init->kernel_managed_addr, + init->kernel_managed_size, +- NULL); ++ &gpuvm_ops); + /* GPUVM takes care from here on. */ + drm_gem_object_put(r_obj); + +@@ -1849,8 +1861,7 @@ nouveau_uvmm_ioctl_vm_init(struct drm_de + return 0; + + out_gpuvm_fini: +- drm_gpuvm_destroy(&uvmm->base); +- kfree(uvmm); ++ drm_gpuvm_put(&uvmm->base); + out_unlock: + mutex_unlock(&cli->mutex); + return ret; +@@ -1902,7 +1913,6 @@ nouveau_uvmm_fini(struct nouveau_uvmm *u + + mutex_lock(&cli->mutex); + nouveau_vmm_fini(&uvmm->vmm); +- drm_gpuvm_destroy(&uvmm->base); +- kfree(uvmm); ++ drm_gpuvm_put(&uvmm->base); + mutex_unlock(&cli->mutex); + } +--- a/include/drm/drm_gpuvm.h ++++ b/include/drm/drm_gpuvm.h +@@ -248,6 +248,11 @@ struct drm_gpuvm { + } rb; + + /** ++ * @kref: reference count of this object ++ */ ++ struct kref kref; ++ ++ /** + * @kernel_alloc_node: + * + * &drm_gpuva representing the address space cutout reserved for +@@ -273,7 +278,23 @@ void drm_gpuvm_init(struct drm_gpuvm *gp + u64 start_offset, u64 range, + u64 reserve_offset, u64 reserve_range, + const struct drm_gpuvm_ops *ops); +-void drm_gpuvm_destroy(struct drm_gpuvm *gpuvm); ++ ++/** ++ * drm_gpuvm_get() - acquire a struct drm_gpuvm reference ++ * @gpuvm: the &drm_gpuvm to acquire the reference of ++ * ++ * This function acquires an additional reference to @gpuvm. It is illegal to ++ * call this without already holding a reference. No locks required. ++ */ ++static inline struct drm_gpuvm * ++drm_gpuvm_get(struct drm_gpuvm *gpuvm) ++{ ++ kref_get(&gpuvm->kref); ++ ++ return gpuvm; ++} ++ ++void drm_gpuvm_put(struct drm_gpuvm *gpuvm); + + bool drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm, u64 addr, u64 range); + bool drm_gpuvm_interval_empty(struct drm_gpuvm *gpuvm, u64 addr, u64 range); +@@ -674,6 +695,14 @@ static inline void drm_gpuva_init_from_o + */ + struct drm_gpuvm_ops { + /** ++ * @vm_free: called when the last reference of a struct drm_gpuvm is ++ * dropped ++ * ++ * This callback is mandatory. ++ */ ++ void (*vm_free)(struct drm_gpuvm *gpuvm); ++ ++ /** + * @op_alloc: called when the &drm_gpuvm allocates + * a struct drm_gpuva_op + * diff --git a/patches-6.6/034-13-v6.8-drm-gpuvm-add-an-abstraction-for-a-VM-BO-combination.patch b/patches-6.6/034-13-v6.8-drm-gpuvm-add-an-abstraction-for-a-VM-BO-combination.patch new file mode 100644 index 0000000..37aa2f6 --- /dev/null +++ b/patches-6.6/034-13-v6.8-drm-gpuvm-add-an-abstraction-for-a-VM-BO-combination.patch @@ -0,0 +1,1036 @@ +From 94bc2249f08e141fb4aa120bfdc392c7a5e78211 Mon Sep 17 00:00:00 2001 +From: Danilo Krummrich +Date: Wed, 8 Nov 2023 01:12:40 +0100 +Subject: [PATCH] drm/gpuvm: add an abstraction for a VM / BO combination +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Add an abstraction layer between the drm_gpuva mappings of a particular +drm_gem_object and this GEM object itself. The abstraction represents a +combination of a drm_gem_object and drm_gpuvm. The drm_gem_object holds +a list of drm_gpuvm_bo structures (the structure representing this +abstraction), while each drm_gpuvm_bo contains list of mappings of this +GEM object. + +This has multiple advantages: + +1) We can use the drm_gpuvm_bo structure to attach it to various lists + of the drm_gpuvm. This is useful for tracking external and evicted + objects per VM, which is introduced in subsequent patches. + +2) Finding mappings of a certain drm_gem_object mapped in a certain + drm_gpuvm becomes much cheaper. + +3) Drivers can derive and extend the structure to easily represent + driver specific states of a BO for a certain GPUVM. + +The idea of this abstraction was taken from amdgpu, hence the credit for +this idea goes to the developers of amdgpu. + +Cc: Christian König +Acked-by: Christian König +Reviewed-by: Thomas Hellström +Reviewed-by: Boris Brezillon +Signed-off-by: Danilo Krummrich +Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-11-dakr@redhat.com +--- + drivers/gpu/drm/drm_gpuvm.c | 340 +++++++++++++++++++++---- + drivers/gpu/drm/nouveau/nouveau_uvmm.c | 63 +++-- + include/drm/drm_gem.h | 32 +-- + include/drm/drm_gpuvm.h | 185 +++++++++++++- + 4 files changed, 534 insertions(+), 86 deletions(-) + +--- a/drivers/gpu/drm/drm_gpuvm.c ++++ b/drivers/gpu/drm/drm_gpuvm.c +@@ -70,6 +70,18 @@ + * &drm_gem_object, such as the &drm_gem_object containing the root page table, + * but it can also be a 'dummy' object, which can be allocated with + * drm_gpuvm_resv_object_alloc(). ++ * ++ * In order to connect a struct drm_gpuva its backing &drm_gem_object each ++ * &drm_gem_object maintains a list of &drm_gpuvm_bo structures, and each ++ * &drm_gpuvm_bo contains a list of &drm_gpuva structures. ++ * ++ * A &drm_gpuvm_bo is an abstraction that represents a combination of a ++ * &drm_gpuvm and a &drm_gem_object. Every such combination should be unique. ++ * This is ensured by the API through drm_gpuvm_bo_obtain() and ++ * drm_gpuvm_bo_obtain_prealloc() which first look into the corresponding ++ * &drm_gem_object list of &drm_gpuvm_bos for an existing instance of this ++ * particular combination. If not existent a new instance is created and linked ++ * to the &drm_gem_object. + */ + + /** +@@ -395,21 +407,28 @@ + /** + * DOC: Locking + * +- * Generally, the GPU VA manager does not take care of locking itself, it is +- * the drivers responsibility to take care about locking. Drivers might want to +- * protect the following operations: inserting, removing and iterating +- * &drm_gpuva objects as well as generating all kinds of operations, such as +- * split / merge or prefetch. +- * +- * The GPU VA manager also does not take care of the locking of the backing +- * &drm_gem_object buffers GPU VA lists by itself; drivers are responsible to +- * enforce mutual exclusion using either the GEMs dma_resv lock or alternatively +- * a driver specific external lock. For the latter see also +- * drm_gem_gpuva_set_lock(). +- * +- * However, the GPU VA manager contains lockdep checks to ensure callers of its +- * API hold the corresponding lock whenever the &drm_gem_objects GPU VA list is +- * accessed by functions such as drm_gpuva_link() or drm_gpuva_unlink(). ++ * In terms of managing &drm_gpuva entries DRM GPUVM does not take care of ++ * locking itself, it is the drivers responsibility to take care about locking. ++ * Drivers might want to protect the following operations: inserting, removing ++ * and iterating &drm_gpuva objects as well as generating all kinds of ++ * operations, such as split / merge or prefetch. ++ * ++ * DRM GPUVM also does not take care of the locking of the backing ++ * &drm_gem_object buffers GPU VA lists and &drm_gpuvm_bo abstractions by ++ * itself; drivers are responsible to enforce mutual exclusion using either the ++ * GEMs dma_resv lock or alternatively a driver specific external lock. For the ++ * latter see also drm_gem_gpuva_set_lock(). ++ * ++ * However, DRM GPUVM contains lockdep checks to ensure callers of its API hold ++ * the corresponding lock whenever the &drm_gem_objects GPU VA list is accessed ++ * by functions such as drm_gpuva_link() or drm_gpuva_unlink(), but also ++ * drm_gpuvm_bo_obtain() and drm_gpuvm_bo_put(). ++ * ++ * The latter is required since on creation and destruction of a &drm_gpuvm_bo ++ * the &drm_gpuvm_bo is attached / removed from the &drm_gem_objects gpuva list. ++ * Subsequent calls to drm_gpuvm_bo_obtain() for the same &drm_gpuvm and ++ * &drm_gem_object must be able to observe previous creations and destructions ++ * of &drm_gpuvm_bos in order to keep instances unique. + */ + + /** +@@ -439,6 +458,7 @@ + * { + * struct drm_gpuva_ops *ops; + * struct drm_gpuva_op *op ++ * struct drm_gpuvm_bo *vm_bo; + * + * driver_lock_va_space(); + * ops = drm_gpuvm_sm_map_ops_create(gpuvm, addr, range, +@@ -446,6 +466,10 @@ + * if (IS_ERR(ops)) + * return PTR_ERR(ops); + * ++ * vm_bo = drm_gpuvm_bo_obtain(gpuvm, obj); ++ * if (IS_ERR(vm_bo)) ++ * return PTR_ERR(vm_bo); ++ * + * drm_gpuva_for_each_op(op, ops) { + * struct drm_gpuva *va; + * +@@ -458,7 +482,7 @@ + * + * driver_vm_map(); + * drm_gpuva_map(gpuvm, va, &op->map); +- * drm_gpuva_link(va); ++ * drm_gpuva_link(va, vm_bo); + * + * break; + * case DRM_GPUVA_OP_REMAP: { +@@ -485,11 +509,11 @@ + * driver_vm_remap(); + * drm_gpuva_remap(prev, next, &op->remap); + * +- * drm_gpuva_unlink(va); + * if (prev) +- * drm_gpuva_link(prev); ++ * drm_gpuva_link(prev, va->vm_bo); + * if (next) +- * drm_gpuva_link(next); ++ * drm_gpuva_link(next, va->vm_bo); ++ * drm_gpuva_unlink(va); + * + * break; + * } +@@ -505,6 +529,7 @@ + * break; + * } + * } ++ * drm_gpuvm_bo_put(vm_bo); + * driver_unlock_va_space(); + * + * return 0; +@@ -514,6 +539,7 @@ + * + * struct driver_context { + * struct drm_gpuvm *gpuvm; ++ * struct drm_gpuvm_bo *vm_bo; + * struct drm_gpuva *new_va; + * struct drm_gpuva *prev_va; + * struct drm_gpuva *next_va; +@@ -534,6 +560,7 @@ + * struct drm_gem_object *obj, u64 offset) + * { + * struct driver_context ctx; ++ * struct drm_gpuvm_bo *vm_bo; + * struct drm_gpuva_ops *ops; + * struct drm_gpuva_op *op; + * int ret = 0; +@@ -543,16 +570,23 @@ + * ctx.new_va = kzalloc(sizeof(*ctx.new_va), GFP_KERNEL); + * ctx.prev_va = kzalloc(sizeof(*ctx.prev_va), GFP_KERNEL); + * ctx.next_va = kzalloc(sizeof(*ctx.next_va), GFP_KERNEL); +- * if (!ctx.new_va || !ctx.prev_va || !ctx.next_va) { ++ * ctx.vm_bo = drm_gpuvm_bo_create(gpuvm, obj); ++ * if (!ctx.new_va || !ctx.prev_va || !ctx.next_va || !vm_bo) { + * ret = -ENOMEM; + * goto out; + * } + * ++ * // Typically protected with a driver specific GEM gpuva lock ++ * // used in the fence signaling path for drm_gpuva_link() and ++ * // drm_gpuva_unlink(), hence pre-allocate. ++ * ctx.vm_bo = drm_gpuvm_bo_obtain_prealloc(ctx.vm_bo); ++ * + * driver_lock_va_space(); + * ret = drm_gpuvm_sm_map(gpuvm, &ctx, addr, range, obj, offset); + * driver_unlock_va_space(); + * + * out: ++ * drm_gpuvm_bo_put(ctx.vm_bo); + * kfree(ctx.new_va); + * kfree(ctx.prev_va); + * kfree(ctx.next_va); +@@ -565,7 +599,7 @@ + * + * drm_gpuva_map(ctx->vm, ctx->new_va, &op->map); + * +- * drm_gpuva_link(ctx->new_va); ++ * drm_gpuva_link(ctx->new_va, ctx->vm_bo); + * + * // prevent the new GPUVA from being freed in + * // driver_mapping_create() +@@ -577,22 +611,23 @@ + * int driver_gpuva_remap(struct drm_gpuva_op *op, void *__ctx) + * { + * struct driver_context *ctx = __ctx; ++ * struct drm_gpuva *va = op->remap.unmap->va; + * + * drm_gpuva_remap(ctx->prev_va, ctx->next_va, &op->remap); + * +- * drm_gpuva_unlink(op->remap.unmap->va); +- * kfree(op->remap.unmap->va); +- * + * if (op->remap.prev) { +- * drm_gpuva_link(ctx->prev_va); ++ * drm_gpuva_link(ctx->prev_va, va->vm_bo); + * ctx->prev_va = NULL; + * } + * + * if (op->remap.next) { +- * drm_gpuva_link(ctx->next_va); ++ * drm_gpuva_link(ctx->next_va, va->vm_bo); + * ctx->next_va = NULL; + * } + * ++ * drm_gpuva_unlink(va); ++ * kfree(va); ++ * + * return 0; + * } + * +@@ -809,6 +844,199 @@ drm_gpuvm_put(struct drm_gpuvm *gpuvm) + } + EXPORT_SYMBOL_GPL(drm_gpuvm_put); + ++/** ++ * drm_gpuvm_bo_create() - create a new instance of struct drm_gpuvm_bo ++ * @gpuvm: The &drm_gpuvm the @obj is mapped in. ++ * @obj: The &drm_gem_object being mapped in the @gpuvm. ++ * ++ * If provided by the driver, this function uses the &drm_gpuvm_ops ++ * vm_bo_alloc() callback to allocate. ++ * ++ * Returns: a pointer to the &drm_gpuvm_bo on success, NULL on failure ++ */ ++struct drm_gpuvm_bo * ++drm_gpuvm_bo_create(struct drm_gpuvm *gpuvm, ++ struct drm_gem_object *obj) ++{ ++ const struct drm_gpuvm_ops *ops = gpuvm->ops; ++ struct drm_gpuvm_bo *vm_bo; ++ ++ if (ops && ops->vm_bo_alloc) ++ vm_bo = ops->vm_bo_alloc(); ++ else ++ vm_bo = kzalloc(sizeof(*vm_bo), GFP_KERNEL); ++ ++ if (unlikely(!vm_bo)) ++ return NULL; ++ ++ vm_bo->vm = drm_gpuvm_get(gpuvm); ++ vm_bo->obj = obj; ++ drm_gem_object_get(obj); ++ ++ kref_init(&vm_bo->kref); ++ INIT_LIST_HEAD(&vm_bo->list.gpuva); ++ INIT_LIST_HEAD(&vm_bo->list.entry.gem); ++ ++ return vm_bo; ++} ++EXPORT_SYMBOL_GPL(drm_gpuvm_bo_create); ++ ++static void ++drm_gpuvm_bo_destroy(struct kref *kref) ++{ ++ struct drm_gpuvm_bo *vm_bo = container_of(kref, struct drm_gpuvm_bo, ++ kref); ++ struct drm_gpuvm *gpuvm = vm_bo->vm; ++ const struct drm_gpuvm_ops *ops = gpuvm->ops; ++ struct drm_gem_object *obj = vm_bo->obj; ++ bool lock = !drm_gpuvm_resv_protected(gpuvm); ++ ++ if (!lock) ++ drm_gpuvm_resv_assert_held(gpuvm); ++ ++ drm_gem_gpuva_assert_lock_held(obj); ++ list_del(&vm_bo->list.entry.gem); ++ ++ if (ops && ops->vm_bo_free) ++ ops->vm_bo_free(vm_bo); ++ else ++ kfree(vm_bo); ++ ++ drm_gpuvm_put(gpuvm); ++ drm_gem_object_put(obj); ++} ++ ++/** ++ * drm_gpuvm_bo_put() - drop a struct drm_gpuvm_bo reference ++ * @vm_bo: the &drm_gpuvm_bo to release the reference of ++ * ++ * This releases a reference to @vm_bo. ++ * ++ * If the reference count drops to zero, the &gpuvm_bo is destroyed, which ++ * includes removing it from the GEMs gpuva list. Hence, if a call to this ++ * function can potentially let the reference count drop to zero the caller must ++ * hold the dma-resv or driver specific GEM gpuva lock. ++ * ++ * This function may only be called from non-atomic context. ++ */ ++void ++drm_gpuvm_bo_put(struct drm_gpuvm_bo *vm_bo) ++{ ++ might_sleep(); ++ ++ if (vm_bo) ++ kref_put(&vm_bo->kref, drm_gpuvm_bo_destroy); ++} ++EXPORT_SYMBOL_GPL(drm_gpuvm_bo_put); ++ ++static struct drm_gpuvm_bo * ++__drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm, ++ struct drm_gem_object *obj) ++{ ++ struct drm_gpuvm_bo *vm_bo; ++ ++ drm_gem_gpuva_assert_lock_held(obj); ++ drm_gem_for_each_gpuvm_bo(vm_bo, obj) ++ if (vm_bo->vm == gpuvm) ++ return vm_bo; ++ ++ return NULL; ++} ++ ++/** ++ * drm_gpuvm_bo_find() - find the &drm_gpuvm_bo for the given ++ * &drm_gpuvm and &drm_gem_object ++ * @gpuvm: The &drm_gpuvm the @obj is mapped in. ++ * @obj: The &drm_gem_object being mapped in the @gpuvm. ++ * ++ * Find the &drm_gpuvm_bo representing the combination of the given ++ * &drm_gpuvm and &drm_gem_object. If found, increases the reference ++ * count of the &drm_gpuvm_bo accordingly. ++ * ++ * Returns: a pointer to the &drm_gpuvm_bo on success, NULL on failure ++ */ ++struct drm_gpuvm_bo * ++drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm, ++ struct drm_gem_object *obj) ++{ ++ struct drm_gpuvm_bo *vm_bo = __drm_gpuvm_bo_find(gpuvm, obj); ++ ++ return vm_bo ? drm_gpuvm_bo_get(vm_bo) : NULL; ++} ++EXPORT_SYMBOL_GPL(drm_gpuvm_bo_find); ++ ++/** ++ * drm_gpuvm_bo_obtain() - obtains and instance of the &drm_gpuvm_bo for the ++ * given &drm_gpuvm and &drm_gem_object ++ * @gpuvm: The &drm_gpuvm the @obj is mapped in. ++ * @obj: The &drm_gem_object being mapped in the @gpuvm. ++ * ++ * Find the &drm_gpuvm_bo representing the combination of the given ++ * &drm_gpuvm and &drm_gem_object. If found, increases the reference ++ * count of the &drm_gpuvm_bo accordingly. If not found, allocates a new ++ * &drm_gpuvm_bo. ++ * ++ * A new &drm_gpuvm_bo is added to the GEMs gpuva list. ++ * ++ * Returns: a pointer to the &drm_gpuvm_bo on success, an ERR_PTR on failure ++ */ ++struct drm_gpuvm_bo * ++drm_gpuvm_bo_obtain(struct drm_gpuvm *gpuvm, ++ struct drm_gem_object *obj) ++{ ++ struct drm_gpuvm_bo *vm_bo; ++ ++ vm_bo = drm_gpuvm_bo_find(gpuvm, obj); ++ if (vm_bo) ++ return vm_bo; ++ ++ vm_bo = drm_gpuvm_bo_create(gpuvm, obj); ++ if (!vm_bo) ++ return ERR_PTR(-ENOMEM); ++ ++ drm_gem_gpuva_assert_lock_held(obj); ++ list_add_tail(&vm_bo->list.entry.gem, &obj->gpuva.list); ++ ++ return vm_bo; ++} ++EXPORT_SYMBOL_GPL(drm_gpuvm_bo_obtain); ++ ++/** ++ * drm_gpuvm_bo_obtain_prealloc() - obtains and instance of the &drm_gpuvm_bo ++ * for the given &drm_gpuvm and &drm_gem_object ++ * @__vm_bo: A pre-allocated struct drm_gpuvm_bo. ++ * ++ * Find the &drm_gpuvm_bo representing the combination of the given ++ * &drm_gpuvm and &drm_gem_object. If found, increases the reference ++ * count of the found &drm_gpuvm_bo accordingly, while the @__vm_bo reference ++ * count is decreased. If not found @__vm_bo is returned without further ++ * increase of the reference count. ++ * ++ * A new &drm_gpuvm_bo is added to the GEMs gpuva list. ++ * ++ * Returns: a pointer to the found &drm_gpuvm_bo or @__vm_bo if no existing ++ * &drm_gpuvm_bo was found ++ */ ++struct drm_gpuvm_bo * ++drm_gpuvm_bo_obtain_prealloc(struct drm_gpuvm_bo *__vm_bo) ++{ ++ struct drm_gpuvm *gpuvm = __vm_bo->vm; ++ struct drm_gem_object *obj = __vm_bo->obj; ++ struct drm_gpuvm_bo *vm_bo; ++ ++ vm_bo = drm_gpuvm_bo_find(gpuvm, obj); ++ if (vm_bo) { ++ drm_gpuvm_bo_put(__vm_bo); ++ return vm_bo; ++ } ++ ++ drm_gem_gpuva_assert_lock_held(obj); ++ list_add_tail(&__vm_bo->list.entry.gem, &obj->gpuva.list); ++ ++ return __vm_bo; ++} ++EXPORT_SYMBOL_GPL(drm_gpuvm_bo_obtain_prealloc); ++ + static int + __drm_gpuva_insert(struct drm_gpuvm *gpuvm, + struct drm_gpuva *va) +@@ -910,24 +1138,33 @@ EXPORT_SYMBOL_GPL(drm_gpuva_remove); + /** + * drm_gpuva_link() - link a &drm_gpuva + * @va: the &drm_gpuva to link ++ * @vm_bo: the &drm_gpuvm_bo to add the &drm_gpuva to + * +- * This adds the given &va to the GPU VA list of the &drm_gem_object it is +- * associated with. ++ * This adds the given &va to the GPU VA list of the &drm_gpuvm_bo and the ++ * &drm_gpuvm_bo to the &drm_gem_object it is associated with. ++ * ++ * For every &drm_gpuva entry added to the &drm_gpuvm_bo an additional ++ * reference of the latter is taken. + * + * This function expects the caller to protect the GEM's GPUVA list against +- * concurrent access using the GEMs dma_resv lock. ++ * concurrent access using either the GEMs dma_resv lock or a driver specific ++ * lock set through drm_gem_gpuva_set_lock(). + */ + void +-drm_gpuva_link(struct drm_gpuva *va) ++drm_gpuva_link(struct drm_gpuva *va, struct drm_gpuvm_bo *vm_bo) + { + struct drm_gem_object *obj = va->gem.obj; ++ struct drm_gpuvm *gpuvm = va->vm; + + if (unlikely(!obj)) + return; + +- drm_gem_gpuva_assert_lock_held(obj); ++ drm_WARN_ON(gpuvm->drm, obj != vm_bo->obj); + +- list_add_tail(&va->gem.entry, &obj->gpuva.list); ++ va->vm_bo = drm_gpuvm_bo_get(vm_bo); ++ ++ drm_gem_gpuva_assert_lock_held(obj); ++ list_add_tail(&va->gem.entry, &vm_bo->list.gpuva); + } + EXPORT_SYMBOL_GPL(drm_gpuva_link); + +@@ -938,20 +1175,31 @@ EXPORT_SYMBOL_GPL(drm_gpuva_link); + * This removes the given &va from the GPU VA list of the &drm_gem_object it is + * associated with. + * ++ * This removes the given &va from the GPU VA list of the &drm_gpuvm_bo and ++ * the &drm_gpuvm_bo from the &drm_gem_object it is associated with in case ++ * this call unlinks the last &drm_gpuva from the &drm_gpuvm_bo. ++ * ++ * For every &drm_gpuva entry removed from the &drm_gpuvm_bo a reference of ++ * the latter is dropped. ++ * + * This function expects the caller to protect the GEM's GPUVA list against +- * concurrent access using the GEMs dma_resv lock. ++ * concurrent access using either the GEMs dma_resv lock or a driver specific ++ * lock set through drm_gem_gpuva_set_lock(). + */ + void + drm_gpuva_unlink(struct drm_gpuva *va) + { + struct drm_gem_object *obj = va->gem.obj; ++ struct drm_gpuvm_bo *vm_bo = va->vm_bo; + + if (unlikely(!obj)) + return; + + drm_gem_gpuva_assert_lock_held(obj); +- + list_del_init(&va->gem.entry); ++ ++ va->vm_bo = NULL; ++ drm_gpuvm_bo_put(vm_bo); + } + EXPORT_SYMBOL_GPL(drm_gpuva_unlink); + +@@ -1096,10 +1344,10 @@ drm_gpuva_remap(struct drm_gpuva *prev, + struct drm_gpuva *next, + struct drm_gpuva_op_remap *op) + { +- struct drm_gpuva *curr = op->unmap->va; +- struct drm_gpuvm *gpuvm = curr->vm; ++ struct drm_gpuva *va = op->unmap->va; ++ struct drm_gpuvm *gpuvm = va->vm; + +- drm_gpuva_remove(curr); ++ drm_gpuva_remove(va); + + if (op->prev) { + drm_gpuva_init_from_op(prev, op->prev); +@@ -1741,9 +1989,8 @@ err_free_ops: + EXPORT_SYMBOL_GPL(drm_gpuvm_prefetch_ops_create); + + /** +- * drm_gpuvm_gem_unmap_ops_create() - creates the &drm_gpuva_ops to unmap a GEM +- * @gpuvm: the &drm_gpuvm representing the GPU VA space +- * @obj: the &drm_gem_object to unmap ++ * drm_gpuvm_bo_unmap_ops_create() - creates the &drm_gpuva_ops to unmap a GEM ++ * @vm_bo: the &drm_gpuvm_bo abstraction + * + * This function creates a list of operations to perform unmapping for every + * GPUVA attached to a GEM. +@@ -1760,15 +2007,14 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_prefetch_ops + * Returns: a pointer to the &drm_gpuva_ops on success, an ERR_PTR on failure + */ + struct drm_gpuva_ops * +-drm_gpuvm_gem_unmap_ops_create(struct drm_gpuvm *gpuvm, +- struct drm_gem_object *obj) ++drm_gpuvm_bo_unmap_ops_create(struct drm_gpuvm_bo *vm_bo) + { + struct drm_gpuva_ops *ops; + struct drm_gpuva_op *op; + struct drm_gpuva *va; + int ret; + +- drm_gem_gpuva_assert_lock_held(obj); ++ drm_gem_gpuva_assert_lock_held(vm_bo->obj); + + ops = kzalloc(sizeof(*ops), GFP_KERNEL); + if (!ops) +@@ -1776,8 +2022,8 @@ drm_gpuvm_gem_unmap_ops_create(struct dr + + INIT_LIST_HEAD(&ops->list); + +- drm_gem_for_each_gpuva(va, obj) { +- op = gpuva_op_alloc(gpuvm); ++ drm_gpuvm_bo_for_each_va(va, vm_bo) { ++ op = gpuva_op_alloc(vm_bo->vm); + if (!op) { + ret = -ENOMEM; + goto err_free_ops; +@@ -1791,10 +2037,10 @@ drm_gpuvm_gem_unmap_ops_create(struct dr + return ops; + + err_free_ops: +- drm_gpuva_ops_free(gpuvm, ops); ++ drm_gpuva_ops_free(vm_bo->vm, ops); + return ERR_PTR(ret); + } +-EXPORT_SYMBOL_GPL(drm_gpuvm_gem_unmap_ops_create); ++EXPORT_SYMBOL_GPL(drm_gpuvm_bo_unmap_ops_create); + + /** + * drm_gpuva_ops_free() - free the given &drm_gpuva_ops +--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c ++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c +@@ -62,6 +62,8 @@ struct bind_job_op { + enum vm_bind_op op; + u32 flags; + ++ struct drm_gpuvm_bo *vm_bo; ++ + struct { + u64 addr; + u64 range; +@@ -1101,22 +1103,28 @@ bind_validate_region(struct nouveau_job + } + + static void +-bind_link_gpuvas(struct drm_gpuva_ops *ops, struct nouveau_uvma_prealloc *new) ++bind_link_gpuvas(struct bind_job_op *bop) + { ++ struct nouveau_uvma_prealloc *new = &bop->new; ++ struct drm_gpuvm_bo *vm_bo = bop->vm_bo; ++ struct drm_gpuva_ops *ops = bop->ops; + struct drm_gpuva_op *op; + + drm_gpuva_for_each_op(op, ops) { + switch (op->op) { + case DRM_GPUVA_OP_MAP: +- drm_gpuva_link(&new->map->va); ++ drm_gpuva_link(&new->map->va, vm_bo); + break; +- case DRM_GPUVA_OP_REMAP: ++ case DRM_GPUVA_OP_REMAP: { ++ struct drm_gpuva *va = op->remap.unmap->va; ++ + if (op->remap.prev) +- drm_gpuva_link(&new->prev->va); ++ drm_gpuva_link(&new->prev->va, va->vm_bo); + if (op->remap.next) +- drm_gpuva_link(&new->next->va); +- drm_gpuva_unlink(op->remap.unmap->va); ++ drm_gpuva_link(&new->next->va, va->vm_bo); ++ drm_gpuva_unlink(va); + break; ++ } + case DRM_GPUVA_OP_UNMAP: + drm_gpuva_unlink(op->unmap.va); + break; +@@ -1138,10 +1146,17 @@ nouveau_uvmm_bind_job_submit(struct nouv + + list_for_each_op(op, &bind_job->ops) { + if (op->op == OP_MAP) { +- op->gem.obj = drm_gem_object_lookup(job->file_priv, +- op->gem.handle); +- if (!op->gem.obj) ++ struct drm_gem_object *obj = op->gem.obj = ++ drm_gem_object_lookup(job->file_priv, ++ op->gem.handle); ++ if (!obj) + return -ENOENT; ++ ++ dma_resv_lock(obj->resv, NULL); ++ op->vm_bo = drm_gpuvm_bo_obtain(&uvmm->base, obj); ++ dma_resv_unlock(obj->resv); ++ if (IS_ERR(op->vm_bo)) ++ return PTR_ERR(op->vm_bo); + } + + ret = bind_validate_op(job, op); +@@ -1352,7 +1367,7 @@ nouveau_uvmm_bind_job_submit(struct nouv + case OP_UNMAP_SPARSE: + case OP_MAP: + case OP_UNMAP: +- bind_link_gpuvas(op->ops, &op->new); ++ bind_link_gpuvas(op); + break; + default: + break; +@@ -1499,6 +1514,12 @@ nouveau_uvmm_bind_job_free_work_fn(struc + if (!IS_ERR_OR_NULL(op->ops)) + drm_gpuva_ops_free(&uvmm->base, op->ops); + ++ if (!IS_ERR_OR_NULL(op->vm_bo)) { ++ dma_resv_lock(obj->resv, NULL); ++ drm_gpuvm_bo_put(op->vm_bo); ++ dma_resv_unlock(obj->resv); ++ } ++ + if (obj) + drm_gem_object_put(obj); + } +@@ -1752,15 +1773,18 @@ void + nouveau_uvmm_bo_map_all(struct nouveau_bo *nvbo, struct nouveau_mem *mem) + { + struct drm_gem_object *obj = &nvbo->bo.base; ++ struct drm_gpuvm_bo *vm_bo; + struct drm_gpuva *va; + + dma_resv_assert_held(obj->resv); + +- drm_gem_for_each_gpuva(va, obj) { +- struct nouveau_uvma *uvma = uvma_from_va(va); ++ drm_gem_for_each_gpuvm_bo(vm_bo, obj) { ++ drm_gpuvm_bo_for_each_va(va, vm_bo) { ++ struct nouveau_uvma *uvma = uvma_from_va(va); + +- nouveau_uvma_map(uvma, mem); +- drm_gpuva_invalidate(va, false); ++ nouveau_uvma_map(uvma, mem); ++ drm_gpuva_invalidate(va, false); ++ } + } + } + +@@ -1768,15 +1792,18 @@ void + nouveau_uvmm_bo_unmap_all(struct nouveau_bo *nvbo) + { + struct drm_gem_object *obj = &nvbo->bo.base; ++ struct drm_gpuvm_bo *vm_bo; + struct drm_gpuva *va; + + dma_resv_assert_held(obj->resv); + +- drm_gem_for_each_gpuva(va, obj) { +- struct nouveau_uvma *uvma = uvma_from_va(va); ++ drm_gem_for_each_gpuvm_bo(vm_bo, obj) { ++ drm_gpuvm_bo_for_each_va(va, vm_bo) { ++ struct nouveau_uvma *uvma = uvma_from_va(va); + +- nouveau_uvma_unmap(uvma); +- drm_gpuva_invalidate(va, true); ++ nouveau_uvma_unmap(uvma); ++ drm_gpuva_invalidate(va, true); ++ } + } + } + +--- a/include/drm/drm_gem.h ++++ b/include/drm/drm_gem.h +@@ -584,7 +584,7 @@ static inline bool drm_gem_object_is_sha + * drm_gem_gpuva_init() - initialize the gpuva list of a GEM object + * @obj: the &drm_gem_object + * +- * This initializes the &drm_gem_object's &drm_gpuva list. ++ * This initializes the &drm_gem_object's &drm_gpuvm_bo list. + * + * Calling this function is only necessary for drivers intending to support the + * &drm_driver_feature DRIVER_GEM_GPUVA. +@@ -597,28 +597,28 @@ static inline void drm_gem_gpuva_init(st + } + + /** +- * drm_gem_for_each_gpuva() - iternator to walk over a list of gpuvas +- * @entry__: &drm_gpuva structure to assign to in each iteration step +- * @obj__: the &drm_gem_object the &drm_gpuvas to walk are associated with ++ * drm_gem_for_each_gpuvm_bo() - iterator to walk over a list of &drm_gpuvm_bo ++ * @entry__: &drm_gpuvm_bo structure to assign to in each iteration step ++ * @obj__: the &drm_gem_object the &drm_gpuvm_bo to walk are associated with + * +- * This iterator walks over all &drm_gpuva structures associated with the +- * &drm_gpuva_manager. ++ * This iterator walks over all &drm_gpuvm_bo structures associated with the ++ * &drm_gem_object. + */ +-#define drm_gem_for_each_gpuva(entry__, obj__) \ +- list_for_each_entry(entry__, &(obj__)->gpuva.list, gem.entry) ++#define drm_gem_for_each_gpuvm_bo(entry__, obj__) \ ++ list_for_each_entry(entry__, &(obj__)->gpuva.list, list.entry.gem) + + /** +- * drm_gem_for_each_gpuva_safe() - iternator to safely walk over a list of +- * gpuvas +- * @entry__: &drm_gpuva structure to assign to in each iteration step +- * @next__: &next &drm_gpuva to store the next step +- * @obj__: the &drm_gem_object the &drm_gpuvas to walk are associated with ++ * drm_gem_for_each_gpuvm_bo_safe() - iterator to safely walk over a list of ++ * &drm_gpuvm_bo ++ * @entry__: &drm_gpuvm_bostructure to assign to in each iteration step ++ * @next__: &next &drm_gpuvm_bo to store the next step ++ * @obj__: the &drm_gem_object the &drm_gpuvm_bo to walk are associated with + * +- * This iterator walks over all &drm_gpuva structures associated with the ++ * This iterator walks over all &drm_gpuvm_bo structures associated with the + * &drm_gem_object. It is implemented with list_for_each_entry_safe(), hence + * it is save against removal of elements. + */ +-#define drm_gem_for_each_gpuva_safe(entry__, next__, obj__) \ +- list_for_each_entry_safe(entry__, next__, &(obj__)->gpuva.list, gem.entry) ++#define drm_gem_for_each_gpuvm_bo_safe(entry__, next__, obj__) \ ++ list_for_each_entry_safe(entry__, next__, &(obj__)->gpuva.list, list.entry.gem) + + #endif /* __DRM_GEM_H__ */ +--- a/include/drm/drm_gpuvm.h ++++ b/include/drm/drm_gpuvm.h +@@ -25,6 +25,7 @@ + * OTHER DEALINGS IN THE SOFTWARE. + */ + ++#include + #include + #include + #include +@@ -33,6 +34,7 @@ + #include + + struct drm_gpuvm; ++struct drm_gpuvm_bo; + struct drm_gpuvm_ops; + + /** +@@ -74,6 +76,12 @@ struct drm_gpuva { + struct drm_gpuvm *vm; + + /** ++ * @vm_bo: the &drm_gpuvm_bo abstraction for the mapped ++ * &drm_gem_object ++ */ ++ struct drm_gpuvm_bo *vm_bo; ++ ++ /** + * @flags: the &drm_gpuva_flags for this mapping + */ + enum drm_gpuva_flags flags; +@@ -108,7 +116,7 @@ struct drm_gpuva { + struct drm_gem_object *obj; + + /** +- * @entry: the &list_head to attach this object to a &drm_gem_object ++ * @entry: the &list_head to attach this object to a &drm_gpuvm_bo + */ + struct list_head entry; + } gem; +@@ -141,7 +149,7 @@ struct drm_gpuva { + int drm_gpuva_insert(struct drm_gpuvm *gpuvm, struct drm_gpuva *va); + void drm_gpuva_remove(struct drm_gpuva *va); + +-void drm_gpuva_link(struct drm_gpuva *va); ++void drm_gpuva_link(struct drm_gpuva *va, struct drm_gpuvm_bo *vm_bo); + void drm_gpuva_unlink(struct drm_gpuva *va); + + struct drm_gpuva *drm_gpuva_find(struct drm_gpuvm *gpuvm, +@@ -189,9 +197,15 @@ static inline bool drm_gpuva_invalidated + */ + enum drm_gpuvm_flags { + /** ++ * @DRM_GPUVM_RESV_PROTECTED: GPUVM is protected externally by the ++ * GPUVM's &dma_resv lock ++ */ ++ DRM_GPUVM_RESV_PROTECTED = BIT(0), ++ ++ /** + * @DRM_GPUVM_USERBITS: user defined bits + */ +- DRM_GPUVM_USERBITS = BIT(0), ++ DRM_GPUVM_USERBITS = BIT(1), + }; + + /** +@@ -303,6 +317,19 @@ struct drm_gem_object * + drm_gpuvm_resv_object_alloc(struct drm_device *drm); + + /** ++ * drm_gpuvm_resv_protected() - indicates whether &DRM_GPUVM_RESV_PROTECTED is ++ * set ++ * @gpuvm: the &drm_gpuvm ++ * ++ * Returns: true if &DRM_GPUVM_RESV_PROTECTED is set, false otherwise. ++ */ ++static inline bool ++drm_gpuvm_resv_protected(struct drm_gpuvm *gpuvm) ++{ ++ return gpuvm->flags & DRM_GPUVM_RESV_PROTECTED; ++} ++ ++/** + * drm_gpuvm_resv() - returns the &drm_gpuvm's &dma_resv + * @gpuvm__: the &drm_gpuvm + * +@@ -326,6 +353,12 @@ drm_gpuvm_resv_object_alloc(struct drm_d + #define drm_gpuvm_resv_assert_held(gpuvm__) \ + dma_resv_assert_held(drm_gpuvm_resv(gpuvm__)) + ++#define drm_gpuvm_resv_held(gpuvm__) \ ++ dma_resv_held(drm_gpuvm_resv(gpuvm__)) ++ ++#define drm_gpuvm_resv_assert_held(gpuvm__) \ ++ dma_resv_assert_held(drm_gpuvm_resv(gpuvm__)) ++ + static inline struct drm_gpuva * + __drm_gpuva_next(struct drm_gpuva *va) + { +@@ -405,6 +438,125 @@ __drm_gpuva_next(struct drm_gpuva *va) + list_for_each_entry_safe(va__, next__, &(gpuvm__)->rb.list, rb.entry) + + /** ++ * struct drm_gpuvm_bo - structure representing a &drm_gpuvm and ++ * &drm_gem_object combination ++ * ++ * This structure is an abstraction representing a &drm_gpuvm and ++ * &drm_gem_object combination. It serves as an indirection to accelerate ++ * iterating all &drm_gpuvas within a &drm_gpuvm backed by the same ++ * &drm_gem_object. ++ * ++ * Furthermore it is used cache evicted GEM objects for a certain GPU-VM to ++ * accelerate validation. ++ * ++ * Typically, drivers want to create an instance of a struct drm_gpuvm_bo once ++ * a GEM object is mapped first in a GPU-VM and release the instance once the ++ * last mapping of the GEM object in this GPU-VM is unmapped. ++ */ ++struct drm_gpuvm_bo { ++ /** ++ * @vm: The &drm_gpuvm the @obj is mapped in. This is a reference ++ * counted pointer. ++ */ ++ struct drm_gpuvm *vm; ++ ++ /** ++ * @obj: The &drm_gem_object being mapped in @vm. This is a reference ++ * counted pointer. ++ */ ++ struct drm_gem_object *obj; ++ ++ /** ++ * @kref: The reference count for this &drm_gpuvm_bo. ++ */ ++ struct kref kref; ++ ++ /** ++ * @list: Structure containing all &list_heads. ++ */ ++ struct { ++ /** ++ * @gpuva: The list of linked &drm_gpuvas. ++ * ++ * It is safe to access entries from this list as long as the ++ * GEM's gpuva lock is held. See also struct drm_gem_object. ++ */ ++ struct list_head gpuva; ++ ++ /** ++ * @entry: Structure containing all &list_heads serving as ++ * entry. ++ */ ++ struct { ++ /** ++ * @gem: List entry to attach to the &drm_gem_objects ++ * gpuva list. ++ */ ++ struct list_head gem; ++ } entry; ++ } list; ++}; ++ ++struct drm_gpuvm_bo * ++drm_gpuvm_bo_create(struct drm_gpuvm *gpuvm, ++ struct drm_gem_object *obj); ++ ++struct drm_gpuvm_bo * ++drm_gpuvm_bo_obtain(struct drm_gpuvm *gpuvm, ++ struct drm_gem_object *obj); ++struct drm_gpuvm_bo * ++drm_gpuvm_bo_obtain_prealloc(struct drm_gpuvm_bo *vm_bo); ++ ++/** ++ * drm_gpuvm_bo_get() - acquire a struct drm_gpuvm_bo reference ++ * @vm_bo: the &drm_gpuvm_bo to acquire the reference of ++ * ++ * This function acquires an additional reference to @vm_bo. It is illegal to ++ * call this without already holding a reference. No locks required. ++ */ ++static inline struct drm_gpuvm_bo * ++drm_gpuvm_bo_get(struct drm_gpuvm_bo *vm_bo) ++{ ++ kref_get(&vm_bo->kref); ++ return vm_bo; ++} ++ ++void drm_gpuvm_bo_put(struct drm_gpuvm_bo *vm_bo); ++ ++struct drm_gpuvm_bo * ++drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm, ++ struct drm_gem_object *obj); ++ ++/** ++ * drm_gpuvm_bo_for_each_va() - iterator to walk over a list of &drm_gpuva ++ * @va__: &drm_gpuva structure to assign to in each iteration step ++ * @vm_bo__: the &drm_gpuvm_bo the &drm_gpuva to walk are associated with ++ * ++ * This iterator walks over all &drm_gpuva structures associated with the ++ * &drm_gpuvm_bo. ++ * ++ * The caller must hold the GEM's gpuva lock. ++ */ ++#define drm_gpuvm_bo_for_each_va(va__, vm_bo__) \ ++ list_for_each_entry(va__, &(vm_bo)->list.gpuva, gem.entry) ++ ++/** ++ * drm_gpuvm_bo_for_each_va_safe() - iterator to safely walk over a list of ++ * &drm_gpuva ++ * @va__: &drm_gpuva structure to assign to in each iteration step ++ * @next__: &next &drm_gpuva to store the next step ++ * @vm_bo__: the &drm_gpuvm_bo the &drm_gpuva to walk are associated with ++ * ++ * This iterator walks over all &drm_gpuva structures associated with the ++ * &drm_gpuvm_bo. It is implemented with list_for_each_entry_safe(), hence ++ * it is save against removal of elements. ++ * ++ * The caller must hold the GEM's gpuva lock. ++ */ ++#define drm_gpuvm_bo_for_each_va_safe(va__, next__, vm_bo__) \ ++ list_for_each_entry_safe(va__, next__, &(vm_bo)->list.gpuva, gem.entry) ++ ++/** + * enum drm_gpuva_op_type - GPU VA operation type + * + * Operations to alter the GPU VA mappings tracked by the &drm_gpuvm. +@@ -673,8 +825,7 @@ drm_gpuvm_prefetch_ops_create(struct drm + u64 addr, u64 range); + + struct drm_gpuva_ops * +-drm_gpuvm_gem_unmap_ops_create(struct drm_gpuvm *gpuvm, +- struct drm_gem_object *obj); ++drm_gpuvm_bo_unmap_ops_create(struct drm_gpuvm_bo *vm_bo); + + void drm_gpuva_ops_free(struct drm_gpuvm *gpuvm, + struct drm_gpuva_ops *ops); +@@ -727,6 +878,30 @@ struct drm_gpuvm_ops { + void (*op_free)(struct drm_gpuva_op *op); + + /** ++ * @vm_bo_alloc: called when the &drm_gpuvm allocates ++ * a struct drm_gpuvm_bo ++ * ++ * Some drivers may want to embed struct drm_gpuvm_bo into driver ++ * specific structures. By implementing this callback drivers can ++ * allocate memory accordingly. ++ * ++ * This callback is optional. ++ */ ++ struct drm_gpuvm_bo *(*vm_bo_alloc)(void); ++ ++ /** ++ * @vm_bo_free: called when the &drm_gpuvm frees a ++ * struct drm_gpuvm_bo ++ * ++ * Some drivers may want to embed struct drm_gpuvm_bo into driver ++ * specific structures. By implementing this callback drivers can ++ * free the previously allocated memory accordingly. ++ * ++ * This callback is optional. ++ */ ++ void (*vm_bo_free)(struct drm_gpuvm_bo *vm_bo); ++ ++ /** + * @sm_step_map: called from &drm_gpuvm_sm_map to finally insert the + * mapping once all previous steps were completed + * diff --git a/patches-6.6/034-14-v6.8-drm-gpuvm-track-lock-validate-external-evicted-objects.patch b/patches-6.6/034-14-v6.8-drm-gpuvm-track-lock-validate-external-evicted-objects.patch new file mode 100644 index 0000000..71f0a7e --- /dev/null +++ b/patches-6.6/034-14-v6.8-drm-gpuvm-track-lock-validate-external-evicted-objects.patch @@ -0,0 +1,1052 @@ +From 50c1a36f594bb3dd33f3f9386c5d960cd12327d8 Mon Sep 17 00:00:00 2001 +From: Danilo Krummrich +Date: Wed, 8 Nov 2023 01:12:41 +0100 +Subject: [PATCH] drm/gpuvm: track/lock/validate external/evicted objects +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Currently the DRM GPUVM offers common infrastructure to track GPU VA +allocations and mappings, generically connect GPU VA mappings to their +backing buffers and perform more complex mapping operations on the GPU VA +space. + +However, there are more design patterns commonly used by drivers, which +can potentially be generalized in order to make the DRM GPUVM represent +a basis for GPU-VM implementations. In this context, this patch aims +at generalizing the following elements. + +1) Provide a common dma-resv for GEM objects not being used outside of + this GPU-VM. + +2) Provide tracking of external GEM objects (GEM objects which are + shared with other GPU-VMs). + +3) Provide functions to efficiently lock all GEM objects dma-resv the + GPU-VM contains mappings of. + +4) Provide tracking of evicted GEM objects the GPU-VM contains mappings + of, such that validation of evicted GEM objects is accelerated. + +5) Provide some convinience functions for common patterns. + +Big thanks to Boris Brezillon for his help to figure out locking for +drivers updating the GPU VA space within the fence signalling path. + +Acked-by: Christian König +Reviewed-by: Boris Brezillon +Reviewed-by: Thomas Hellström +Suggested-by: Matthew Brost +Signed-off-by: Danilo Krummrich +Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-12-dakr@redhat.com +--- + drivers/gpu/drm/drm_gpuvm.c | 633 ++++++++++++++++++++++++++++++++++++ + include/drm/drm_gpuvm.h | 250 ++++++++++++++ + 2 files changed, 883 insertions(+) + +--- a/drivers/gpu/drm/drm_gpuvm.c ++++ b/drivers/gpu/drm/drm_gpuvm.c +@@ -82,6 +82,21 @@ + * &drm_gem_object list of &drm_gpuvm_bos for an existing instance of this + * particular combination. If not existent a new instance is created and linked + * to the &drm_gem_object. ++ * ++ * &drm_gpuvm_bo structures, since unique for a given &drm_gpuvm, are also used ++ * as entry for the &drm_gpuvm's lists of external and evicted objects. Those ++ * lists are maintained in order to accelerate locking of dma-resv locks and ++ * validation of evicted objects bound in a &drm_gpuvm. For instance, all ++ * &drm_gem_object's &dma_resv of a given &drm_gpuvm can be locked by calling ++ * drm_gpuvm_exec_lock(). Once locked drivers can call drm_gpuvm_validate() in ++ * order to validate all evicted &drm_gem_objects. It is also possible to lock ++ * additional &drm_gem_objects by providing the corresponding parameters to ++ * drm_gpuvm_exec_lock() as well as open code the &drm_exec loop while making ++ * use of helper functions such as drm_gpuvm_prepare_range() or ++ * drm_gpuvm_prepare_objects(). ++ * ++ * Every bound &drm_gem_object is treated as external object when its &dma_resv ++ * structure is different than the &drm_gpuvm's common &dma_resv structure. + */ + + /** +@@ -429,6 +444,20 @@ + * Subsequent calls to drm_gpuvm_bo_obtain() for the same &drm_gpuvm and + * &drm_gem_object must be able to observe previous creations and destructions + * of &drm_gpuvm_bos in order to keep instances unique. ++ * ++ * The &drm_gpuvm's lists for keeping track of external and evicted objects are ++ * protected against concurrent insertion / removal and iteration internally. ++ * ++ * However, drivers still need ensure to protect concurrent calls to functions ++ * iterating those lists, namely drm_gpuvm_prepare_objects() and ++ * drm_gpuvm_validate(). ++ * ++ * Alternatively, drivers can set the &DRM_GPUVM_RESV_PROTECTED flag to indicate ++ * that the corresponding &dma_resv locks are held in order to protect the ++ * lists. If &DRM_GPUVM_RESV_PROTECTED is set, internal locking is disabled and ++ * the corresponding lockdep checks are enabled. This is an optimization for ++ * drivers which are capable of taking the corresponding &dma_resv locks and ++ * hence do not require internal locking. + */ + + /** +@@ -641,6 +670,201 @@ + * } + */ + ++/** ++ * get_next_vm_bo_from_list() - get the next vm_bo element ++ * @__gpuvm: the &drm_gpuvm ++ * @__list_name: the name of the list we're iterating on ++ * @__local_list: a pointer to the local list used to store already iterated items ++ * @__prev_vm_bo: the previous element we got from get_next_vm_bo_from_list() ++ * ++ * This helper is here to provide lockless list iteration. Lockless as in, the ++ * iterator releases the lock immediately after picking the first element from ++ * the list, so list insertion deletion can happen concurrently. ++ * ++ * Elements popped from the original list are kept in a local list, so removal ++ * and is_empty checks can still happen while we're iterating the list. ++ */ ++#define get_next_vm_bo_from_list(__gpuvm, __list_name, __local_list, __prev_vm_bo) \ ++ ({ \ ++ struct drm_gpuvm_bo *__vm_bo = NULL; \ ++ \ ++ drm_gpuvm_bo_put(__prev_vm_bo); \ ++ \ ++ spin_lock(&(__gpuvm)->__list_name.lock); \ ++ if (!(__gpuvm)->__list_name.local_list) \ ++ (__gpuvm)->__list_name.local_list = __local_list; \ ++ else \ ++ drm_WARN_ON((__gpuvm)->drm, \ ++ (__gpuvm)->__list_name.local_list != __local_list); \ ++ \ ++ while (!list_empty(&(__gpuvm)->__list_name.list)) { \ ++ __vm_bo = list_first_entry(&(__gpuvm)->__list_name.list, \ ++ struct drm_gpuvm_bo, \ ++ list.entry.__list_name); \ ++ if (kref_get_unless_zero(&__vm_bo->kref)) { \ ++ list_move_tail(&(__vm_bo)->list.entry.__list_name, \ ++ __local_list); \ ++ break; \ ++ } else { \ ++ list_del_init(&(__vm_bo)->list.entry.__list_name); \ ++ __vm_bo = NULL; \ ++ } \ ++ } \ ++ spin_unlock(&(__gpuvm)->__list_name.lock); \ ++ \ ++ __vm_bo; \ ++ }) ++ ++/** ++ * for_each_vm_bo_in_list() - internal vm_bo list iterator ++ * @__gpuvm: the &drm_gpuvm ++ * @__list_name: the name of the list we're iterating on ++ * @__local_list: a pointer to the local list used to store already iterated items ++ * @__vm_bo: the struct drm_gpuvm_bo to assign in each iteration step ++ * ++ * This helper is here to provide lockless list iteration. Lockless as in, the ++ * iterator releases the lock immediately after picking the first element from the ++ * list, hence list insertion and deletion can happen concurrently. ++ * ++ * It is not allowed to re-assign the vm_bo pointer from inside this loop. ++ * ++ * Typical use: ++ * ++ * struct drm_gpuvm_bo *vm_bo; ++ * LIST_HEAD(my_local_list); ++ * ++ * ret = 0; ++ * for_each_vm_bo_in_list(gpuvm, , &my_local_list, vm_bo) { ++ * ret = do_something_with_vm_bo(..., vm_bo); ++ * if (ret) ++ * break; ++ * } ++ * // Drop ref in case we break out of the loop. ++ * drm_gpuvm_bo_put(vm_bo); ++ * restore_vm_bo_list(gpuvm, , &my_local_list); ++ * ++ * ++ * Only used for internal list iterations, not meant to be exposed to the outside ++ * world. ++ */ ++#define for_each_vm_bo_in_list(__gpuvm, __list_name, __local_list, __vm_bo) \ ++ for (__vm_bo = get_next_vm_bo_from_list(__gpuvm, __list_name, \ ++ __local_list, NULL); \ ++ __vm_bo; \ ++ __vm_bo = get_next_vm_bo_from_list(__gpuvm, __list_name, \ ++ __local_list, __vm_bo)) ++ ++static void ++__restore_vm_bo_list(struct drm_gpuvm *gpuvm, spinlock_t *lock, ++ struct list_head *list, struct list_head **local_list) ++{ ++ /* Merge back the two lists, moving local list elements to the ++ * head to preserve previous ordering, in case it matters. ++ */ ++ spin_lock(lock); ++ if (*local_list) { ++ list_splice(*local_list, list); ++ *local_list = NULL; ++ } ++ spin_unlock(lock); ++} ++ ++/** ++ * restore_vm_bo_list() - move vm_bo elements back to their original list ++ * @__gpuvm: the &drm_gpuvm ++ * @__list_name: the name of the list we're iterating on ++ * ++ * When we're done iterating a vm_bo list, we should call restore_vm_bo_list() ++ * to restore the original state and let new iterations take place. ++ */ ++#define restore_vm_bo_list(__gpuvm, __list_name) \ ++ __restore_vm_bo_list((__gpuvm), &(__gpuvm)->__list_name.lock, \ ++ &(__gpuvm)->__list_name.list, \ ++ &(__gpuvm)->__list_name.local_list) ++ ++static void ++cond_spin_lock(spinlock_t *lock, bool cond) ++{ ++ if (cond) ++ spin_lock(lock); ++} ++ ++static void ++cond_spin_unlock(spinlock_t *lock, bool cond) ++{ ++ if (cond) ++ spin_unlock(lock); ++} ++ ++static void ++__drm_gpuvm_bo_list_add(struct drm_gpuvm *gpuvm, spinlock_t *lock, ++ struct list_head *entry, struct list_head *list) ++{ ++ cond_spin_lock(lock, !!lock); ++ if (list_empty(entry)) ++ list_add_tail(entry, list); ++ cond_spin_unlock(lock, !!lock); ++} ++ ++/** ++ * drm_gpuvm_bo_list_add() - insert a vm_bo into the given list ++ * @__vm_bo: the &drm_gpuvm_bo ++ * @__list_name: the name of the list to insert into ++ * @__lock: whether to lock with the internal spinlock ++ * ++ * Inserts the given @__vm_bo into the list specified by @__list_name. ++ */ ++#define drm_gpuvm_bo_list_add(__vm_bo, __list_name, __lock) \ ++ __drm_gpuvm_bo_list_add((__vm_bo)->vm, \ ++ __lock ? &(__vm_bo)->vm->__list_name.lock : \ ++ NULL, \ ++ &(__vm_bo)->list.entry.__list_name, \ ++ &(__vm_bo)->vm->__list_name.list) ++ ++static void ++__drm_gpuvm_bo_list_del(struct drm_gpuvm *gpuvm, spinlock_t *lock, ++ struct list_head *entry, bool init) ++{ ++ cond_spin_lock(lock, !!lock); ++ if (init) { ++ if (!list_empty(entry)) ++ list_del_init(entry); ++ } else { ++ list_del(entry); ++ } ++ cond_spin_unlock(lock, !!lock); ++} ++ ++/** ++ * drm_gpuvm_bo_list_del_init() - remove a vm_bo from the given list ++ * @__vm_bo: the &drm_gpuvm_bo ++ * @__list_name: the name of the list to insert into ++ * @__lock: whether to lock with the internal spinlock ++ * ++ * Removes the given @__vm_bo from the list specified by @__list_name. ++ */ ++#define drm_gpuvm_bo_list_del_init(__vm_bo, __list_name, __lock) \ ++ __drm_gpuvm_bo_list_del((__vm_bo)->vm, \ ++ __lock ? &(__vm_bo)->vm->__list_name.lock : \ ++ NULL, \ ++ &(__vm_bo)->list.entry.__list_name, \ ++ true) ++ ++/** ++ * drm_gpuvm_bo_list_del() - remove a vm_bo from the given list ++ * @__vm_bo: the &drm_gpuvm_bo ++ * @__list_name: the name of the list to insert into ++ * @__lock: whether to lock with the internal spinlock ++ * ++ * Removes the given @__vm_bo from the list specified by @__list_name. ++ */ ++#define drm_gpuvm_bo_list_del(__vm_bo, __list_name, __lock) \ ++ __drm_gpuvm_bo_list_del((__vm_bo)->vm, \ ++ __lock ? &(__vm_bo)->vm->__list_name.lock : \ ++ NULL, \ ++ &(__vm_bo)->list.entry.__list_name, \ ++ false) ++ + #define to_drm_gpuva(__node) container_of((__node), struct drm_gpuva, rb.node) + + #define GPUVA_START(node) ((node)->va.addr) +@@ -775,6 +999,12 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm, + gpuvm->rb.tree = RB_ROOT_CACHED; + INIT_LIST_HEAD(&gpuvm->rb.list); + ++ INIT_LIST_HEAD(&gpuvm->extobj.list); ++ spin_lock_init(&gpuvm->extobj.lock); ++ ++ INIT_LIST_HEAD(&gpuvm->evict.list); ++ spin_lock_init(&gpuvm->evict.lock); ++ + kref_init(&gpuvm->kref); + + gpuvm->name = name ? name : "unknown"; +@@ -812,6 +1042,11 @@ drm_gpuvm_fini(struct drm_gpuvm *gpuvm) + drm_WARN(gpuvm->drm, !RB_EMPTY_ROOT(&gpuvm->rb.tree.rb_root), + "GPUVA tree is not empty, potentially leaking memory.\n"); + ++ drm_WARN(gpuvm->drm, !list_empty(&gpuvm->extobj.list), ++ "Extobj list should be empty.\n"); ++ drm_WARN(gpuvm->drm, !list_empty(&gpuvm->evict.list), ++ "Evict list should be empty.\n"); ++ + drm_gem_object_put(gpuvm->r_obj); + } + +@@ -844,6 +1079,343 @@ drm_gpuvm_put(struct drm_gpuvm *gpuvm) + } + EXPORT_SYMBOL_GPL(drm_gpuvm_put); + ++static int ++__drm_gpuvm_prepare_objects(struct drm_gpuvm *gpuvm, ++ struct drm_exec *exec, ++ unsigned int num_fences) ++{ ++ struct drm_gpuvm_bo *vm_bo; ++ LIST_HEAD(extobjs); ++ int ret = 0; ++ ++ for_each_vm_bo_in_list(gpuvm, extobj, &extobjs, vm_bo) { ++ ret = drm_exec_prepare_obj(exec, vm_bo->obj, num_fences); ++ if (ret) ++ break; ++ } ++ /* Drop ref in case we break out of the loop. */ ++ drm_gpuvm_bo_put(vm_bo); ++ restore_vm_bo_list(gpuvm, extobj); ++ ++ return ret; ++} ++ ++static int ++drm_gpuvm_prepare_objects_locked(struct drm_gpuvm *gpuvm, ++ struct drm_exec *exec, ++ unsigned int num_fences) ++{ ++ struct drm_gpuvm_bo *vm_bo; ++ int ret = 0; ++ ++ drm_gpuvm_resv_assert_held(gpuvm); ++ list_for_each_entry(vm_bo, &gpuvm->extobj.list, list.entry.extobj) { ++ ret = drm_exec_prepare_obj(exec, vm_bo->obj, num_fences); ++ if (ret) ++ break; ++ ++ if (vm_bo->evicted) ++ drm_gpuvm_bo_list_add(vm_bo, evict, false); ++ } ++ ++ return ret; ++} ++ ++/** ++ * drm_gpuvm_prepare_objects() - prepare all assoiciated BOs ++ * @gpuvm: the &drm_gpuvm ++ * @exec: the &drm_exec locking context ++ * @num_fences: the amount of &dma_fences to reserve ++ * ++ * Calls drm_exec_prepare_obj() for all &drm_gem_objects the given ++ * &drm_gpuvm contains mappings of. ++ * ++ * Using this function directly, it is the drivers responsibility to call ++ * drm_exec_init() and drm_exec_fini() accordingly. ++ * ++ * Note: This function is safe against concurrent insertion and removal of ++ * external objects, however it is not safe against concurrent usage itself. ++ * ++ * Drivers need to make sure to protect this case with either an outer VM lock ++ * or by calling drm_gpuvm_prepare_vm() before this function within the ++ * drm_exec_until_all_locked() loop, such that the GPUVM's dma-resv lock ensures ++ * mutual exclusion. ++ * ++ * Returns: 0 on success, negative error code on failure. ++ */ ++int ++drm_gpuvm_prepare_objects(struct drm_gpuvm *gpuvm, ++ struct drm_exec *exec, ++ unsigned int num_fences) ++{ ++ if (drm_gpuvm_resv_protected(gpuvm)) ++ return drm_gpuvm_prepare_objects_locked(gpuvm, exec, ++ num_fences); ++ else ++ return __drm_gpuvm_prepare_objects(gpuvm, exec, num_fences); ++} ++EXPORT_SYMBOL_GPL(drm_gpuvm_prepare_objects); ++ ++/** ++ * drm_gpuvm_prepare_range() - prepare all BOs mapped within a given range ++ * @gpuvm: the &drm_gpuvm ++ * @exec: the &drm_exec locking context ++ * @addr: the start address within the VA space ++ * @range: the range to iterate within the VA space ++ * @num_fences: the amount of &dma_fences to reserve ++ * ++ * Calls drm_exec_prepare_obj() for all &drm_gem_objects mapped between @addr ++ * and @addr + @range. ++ * ++ * Returns: 0 on success, negative error code on failure. ++ */ ++int ++drm_gpuvm_prepare_range(struct drm_gpuvm *gpuvm, struct drm_exec *exec, ++ u64 addr, u64 range, unsigned int num_fences) ++{ ++ struct drm_gpuva *va; ++ u64 end = addr + range; ++ int ret; ++ ++ drm_gpuvm_for_each_va_range(va, gpuvm, addr, end) { ++ struct drm_gem_object *obj = va->gem.obj; ++ ++ ret = drm_exec_prepare_obj(exec, obj, num_fences); ++ if (ret) ++ return ret; ++ } ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(drm_gpuvm_prepare_range); ++ ++/** ++ * drm_gpuvm_exec_lock() - lock all dma-resv of all assoiciated BOs ++ * @vm_exec: the &drm_gpuvm_exec wrapper ++ * ++ * Acquires all dma-resv locks of all &drm_gem_objects the given ++ * &drm_gpuvm contains mappings of. ++ * ++ * Addionally, when calling this function with struct drm_gpuvm_exec::extra ++ * being set the driver receives the given @fn callback to lock additional ++ * dma-resv in the context of the &drm_gpuvm_exec instance. Typically, drivers ++ * would call drm_exec_prepare_obj() from within this callback. ++ * ++ * Returns: 0 on success, negative error code on failure. ++ */ ++int ++drm_gpuvm_exec_lock(struct drm_gpuvm_exec *vm_exec) ++{ ++ struct drm_gpuvm *gpuvm = vm_exec->vm; ++ struct drm_exec *exec = &vm_exec->exec; ++ unsigned int num_fences = vm_exec->num_fences; ++ int ret; ++ ++ drm_exec_init(exec, vm_exec->flags); ++ ++ drm_exec_until_all_locked(exec) { ++ ret = drm_gpuvm_prepare_vm(gpuvm, exec, num_fences); ++ drm_exec_retry_on_contention(exec); ++ if (ret) ++ goto err; ++ ++ ret = drm_gpuvm_prepare_objects(gpuvm, exec, num_fences); ++ drm_exec_retry_on_contention(exec); ++ if (ret) ++ goto err; ++ ++ if (vm_exec->extra.fn) { ++ ret = vm_exec->extra.fn(vm_exec); ++ drm_exec_retry_on_contention(exec); ++ if (ret) ++ goto err; ++ } ++ } ++ ++ return 0; ++ ++err: ++ drm_exec_fini(exec); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(drm_gpuvm_exec_lock); ++ ++static int ++fn_lock_array(struct drm_gpuvm_exec *vm_exec) ++{ ++ struct { ++ struct drm_gem_object **objs; ++ unsigned int num_objs; ++ } *args = vm_exec->extra.priv; ++ ++ return drm_exec_prepare_array(&vm_exec->exec, args->objs, ++ args->num_objs, vm_exec->num_fences); ++} ++ ++/** ++ * drm_gpuvm_exec_lock_array() - lock all dma-resv of all assoiciated BOs ++ * @vm_exec: the &drm_gpuvm_exec wrapper ++ * @objs: additional &drm_gem_objects to lock ++ * @num_objs: the number of additional &drm_gem_objects to lock ++ * ++ * Acquires all dma-resv locks of all &drm_gem_objects the given &drm_gpuvm ++ * contains mappings of, plus the ones given through @objs. ++ * ++ * Returns: 0 on success, negative error code on failure. ++ */ ++int ++drm_gpuvm_exec_lock_array(struct drm_gpuvm_exec *vm_exec, ++ struct drm_gem_object **objs, ++ unsigned int num_objs) ++{ ++ struct { ++ struct drm_gem_object **objs; ++ unsigned int num_objs; ++ } args; ++ ++ args.objs = objs; ++ args.num_objs = num_objs; ++ ++ vm_exec->extra.fn = fn_lock_array; ++ vm_exec->extra.priv = &args; ++ ++ return drm_gpuvm_exec_lock(vm_exec); ++} ++EXPORT_SYMBOL_GPL(drm_gpuvm_exec_lock_array); ++ ++/** ++ * drm_gpuvm_exec_lock_range() - prepare all BOs mapped within a given range ++ * @vm_exec: the &drm_gpuvm_exec wrapper ++ * @addr: the start address within the VA space ++ * @range: the range to iterate within the VA space ++ * ++ * Acquires all dma-resv locks of all &drm_gem_objects mapped between @addr and ++ * @addr + @range. ++ * ++ * Returns: 0 on success, negative error code on failure. ++ */ ++int ++drm_gpuvm_exec_lock_range(struct drm_gpuvm_exec *vm_exec, ++ u64 addr, u64 range) ++{ ++ struct drm_gpuvm *gpuvm = vm_exec->vm; ++ struct drm_exec *exec = &vm_exec->exec; ++ int ret; ++ ++ drm_exec_init(exec, vm_exec->flags); ++ ++ drm_exec_until_all_locked(exec) { ++ ret = drm_gpuvm_prepare_range(gpuvm, exec, addr, range, ++ vm_exec->num_fences); ++ drm_exec_retry_on_contention(exec); ++ if (ret) ++ goto err; ++ } ++ ++ return ret; ++ ++err: ++ drm_exec_fini(exec); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(drm_gpuvm_exec_lock_range); ++ ++static int ++__drm_gpuvm_validate(struct drm_gpuvm *gpuvm, struct drm_exec *exec) ++{ ++ const struct drm_gpuvm_ops *ops = gpuvm->ops; ++ struct drm_gpuvm_bo *vm_bo; ++ LIST_HEAD(evict); ++ int ret = 0; ++ ++ for_each_vm_bo_in_list(gpuvm, evict, &evict, vm_bo) { ++ ret = ops->vm_bo_validate(vm_bo, exec); ++ if (ret) ++ break; ++ } ++ /* Drop ref in case we break out of the loop. */ ++ drm_gpuvm_bo_put(vm_bo); ++ restore_vm_bo_list(gpuvm, evict); ++ ++ return ret; ++} ++ ++static int ++drm_gpuvm_validate_locked(struct drm_gpuvm *gpuvm, struct drm_exec *exec) ++{ ++ const struct drm_gpuvm_ops *ops = gpuvm->ops; ++ struct drm_gpuvm_bo *vm_bo, *next; ++ int ret = 0; ++ ++ drm_gpuvm_resv_assert_held(gpuvm); ++ ++ list_for_each_entry_safe(vm_bo, next, &gpuvm->evict.list, ++ list.entry.evict) { ++ ret = ops->vm_bo_validate(vm_bo, exec); ++ if (ret) ++ break; ++ ++ dma_resv_assert_held(vm_bo->obj->resv); ++ if (!vm_bo->evicted) ++ drm_gpuvm_bo_list_del_init(vm_bo, evict, false); ++ } ++ ++ return ret; ++} ++ ++/** ++ * drm_gpuvm_validate() - validate all BOs marked as evicted ++ * @gpuvm: the &drm_gpuvm to validate evicted BOs ++ * @exec: the &drm_exec instance used for locking the GPUVM ++ * ++ * Calls the &drm_gpuvm_ops::vm_bo_validate callback for all evicted buffer ++ * objects being mapped in the given &drm_gpuvm. ++ * ++ * Returns: 0 on success, negative error code on failure. ++ */ ++int ++drm_gpuvm_validate(struct drm_gpuvm *gpuvm, struct drm_exec *exec) ++{ ++ const struct drm_gpuvm_ops *ops = gpuvm->ops; ++ ++ if (unlikely(!ops || !ops->vm_bo_validate)) ++ return -EOPNOTSUPP; ++ ++ if (drm_gpuvm_resv_protected(gpuvm)) ++ return drm_gpuvm_validate_locked(gpuvm, exec); ++ else ++ return __drm_gpuvm_validate(gpuvm, exec); ++} ++EXPORT_SYMBOL_GPL(drm_gpuvm_validate); ++ ++/** ++ * drm_gpuvm_resv_add_fence - add fence to private and all extobj ++ * dma-resv ++ * @gpuvm: the &drm_gpuvm to add a fence to ++ * @exec: the &drm_exec locking context ++ * @fence: fence to add ++ * @private_usage: private dma-resv usage ++ * @extobj_usage: extobj dma-resv usage ++ */ ++void ++drm_gpuvm_resv_add_fence(struct drm_gpuvm *gpuvm, ++ struct drm_exec *exec, ++ struct dma_fence *fence, ++ enum dma_resv_usage private_usage, ++ enum dma_resv_usage extobj_usage) ++{ ++ struct drm_gem_object *obj; ++ unsigned long index; ++ ++ drm_exec_for_each_locked_object(exec, index, obj) { ++ dma_resv_assert_held(obj->resv); ++ dma_resv_add_fence(obj->resv, fence, ++ drm_gpuvm_is_extobj(gpuvm, obj) ? ++ extobj_usage : private_usage); ++ } ++} ++EXPORT_SYMBOL_GPL(drm_gpuvm_resv_add_fence); ++ + /** + * drm_gpuvm_bo_create() - create a new instance of struct drm_gpuvm_bo + * @gpuvm: The &drm_gpuvm the @obj is mapped in. +@@ -877,6 +1449,9 @@ drm_gpuvm_bo_create(struct drm_gpuvm *gp + INIT_LIST_HEAD(&vm_bo->list.gpuva); + INIT_LIST_HEAD(&vm_bo->list.entry.gem); + ++ INIT_LIST_HEAD(&vm_bo->list.entry.extobj); ++ INIT_LIST_HEAD(&vm_bo->list.entry.evict); ++ + return vm_bo; + } + EXPORT_SYMBOL_GPL(drm_gpuvm_bo_create); +@@ -894,6 +1469,9 @@ drm_gpuvm_bo_destroy(struct kref *kref) + if (!lock) + drm_gpuvm_resv_assert_held(gpuvm); + ++ drm_gpuvm_bo_list_del(vm_bo, extobj, lock); ++ drm_gpuvm_bo_list_del(vm_bo, evict, lock); ++ + drm_gem_gpuva_assert_lock_held(obj); + list_del(&vm_bo->list.entry.gem); + +@@ -1037,6 +1615,61 @@ drm_gpuvm_bo_obtain_prealloc(struct drm_ + } + EXPORT_SYMBOL_GPL(drm_gpuvm_bo_obtain_prealloc); + ++/** ++ * drm_gpuvm_bo_extobj_add() - adds the &drm_gpuvm_bo to its &drm_gpuvm's ++ * extobj list ++ * @vm_bo: The &drm_gpuvm_bo to add to its &drm_gpuvm's the extobj list. ++ * ++ * Adds the given @vm_bo to its &drm_gpuvm's extobj list if not on the list ++ * already and if the corresponding &drm_gem_object is an external object, ++ * actually. ++ */ ++void ++drm_gpuvm_bo_extobj_add(struct drm_gpuvm_bo *vm_bo) ++{ ++ struct drm_gpuvm *gpuvm = vm_bo->vm; ++ bool lock = !drm_gpuvm_resv_protected(gpuvm); ++ ++ if (!lock) ++ drm_gpuvm_resv_assert_held(gpuvm); ++ ++ if (drm_gpuvm_is_extobj(gpuvm, vm_bo->obj)) ++ drm_gpuvm_bo_list_add(vm_bo, extobj, lock); ++} ++EXPORT_SYMBOL_GPL(drm_gpuvm_bo_extobj_add); ++ ++/** ++ * drm_gpuvm_bo_evict() - add / remove a &drm_gpuvm_bo to / from the &drm_gpuvms ++ * evicted list ++ * @vm_bo: the &drm_gpuvm_bo to add or remove ++ * @evict: indicates whether the object is evicted ++ * ++ * Adds a &drm_gpuvm_bo to or removes it from the &drm_gpuvms evicted list. ++ */ ++void ++drm_gpuvm_bo_evict(struct drm_gpuvm_bo *vm_bo, bool evict) ++{ ++ struct drm_gpuvm *gpuvm = vm_bo->vm; ++ struct drm_gem_object *obj = vm_bo->obj; ++ bool lock = !drm_gpuvm_resv_protected(gpuvm); ++ ++ dma_resv_assert_held(obj->resv); ++ vm_bo->evicted = evict; ++ ++ /* Can't add external objects to the evicted list directly if not using ++ * internal spinlocks, since in this case the evicted list is protected ++ * with the VM's common dma-resv lock. ++ */ ++ if (drm_gpuvm_is_extobj(gpuvm, obj) && !lock) ++ return; ++ ++ if (evict) ++ drm_gpuvm_bo_list_add(vm_bo, evict, lock); ++ else ++ drm_gpuvm_bo_list_del_init(vm_bo, evict, lock); ++} ++EXPORT_SYMBOL_GPL(drm_gpuvm_bo_evict); ++ + static int + __drm_gpuva_insert(struct drm_gpuvm *gpuvm, + struct drm_gpuva *va) +--- a/include/drm/drm_gpuvm.h ++++ b/include/drm/drm_gpuvm.h +@@ -32,6 +32,7 @@ + + #include + #include ++#include + + struct drm_gpuvm; + struct drm_gpuvm_bo; +@@ -283,6 +284,50 @@ struct drm_gpuvm { + * @r_obj: Resv GEM object; representing the GPUVM's common &dma_resv. + */ + struct drm_gem_object *r_obj; ++ ++ /** ++ * @extobj: structure holding the extobj list ++ */ ++ struct { ++ /** ++ * @list: &list_head storing &drm_gpuvm_bos serving as ++ * external object ++ */ ++ struct list_head list; ++ ++ /** ++ * @local_list: pointer to the local list temporarily storing ++ * entries from the external object list ++ */ ++ struct list_head *local_list; ++ ++ /** ++ * @lock: spinlock to protect the extobj list ++ */ ++ spinlock_t lock; ++ } extobj; ++ ++ /** ++ * @evict: structure holding the evict list and evict list lock ++ */ ++ struct { ++ /** ++ * @list: &list_head storing &drm_gpuvm_bos currently being ++ * evicted ++ */ ++ struct list_head list; ++ ++ /** ++ * @local_list: pointer to the local list temporarily storing ++ * entries from the evicted object list ++ */ ++ struct list_head *local_list; ++ ++ /** ++ * @lock: spinlock to protect the evict list ++ */ ++ spinlock_t lock; ++ } evict; + }; + + void drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name, +@@ -359,6 +404,22 @@ drm_gpuvm_resv_protected(struct drm_gpuv + #define drm_gpuvm_resv_assert_held(gpuvm__) \ + dma_resv_assert_held(drm_gpuvm_resv(gpuvm__)) + ++/** ++ * drm_gpuvm_is_extobj() - indicates whether the given &drm_gem_object is an ++ * external object ++ * @gpuvm: the &drm_gpuvm to check ++ * @obj: the &drm_gem_object to check ++ * ++ * Returns: true if the &drm_gem_object &dma_resv differs from the ++ * &drm_gpuvms &dma_resv, false otherwise ++ */ ++static inline bool ++drm_gpuvm_is_extobj(struct drm_gpuvm *gpuvm, ++ struct drm_gem_object *obj) ++{ ++ return obj && obj->resv != drm_gpuvm_resv(gpuvm); ++} ++ + static inline struct drm_gpuva * + __drm_gpuva_next(struct drm_gpuva *va) + { +@@ -438,6 +499,144 @@ __drm_gpuva_next(struct drm_gpuva *va) + list_for_each_entry_safe(va__, next__, &(gpuvm__)->rb.list, rb.entry) + + /** ++ * struct drm_gpuvm_exec - &drm_gpuvm abstraction of &drm_exec ++ * ++ * This structure should be created on the stack as &drm_exec should be. ++ * ++ * Optionally, @extra can be set in order to lock additional &drm_gem_objects. ++ */ ++struct drm_gpuvm_exec { ++ /** ++ * @exec: the &drm_exec structure ++ */ ++ struct drm_exec exec; ++ ++ /** ++ * @flags: the flags for the struct drm_exec ++ */ ++ uint32_t flags; ++ ++ /** ++ * @vm: the &drm_gpuvm to lock its DMA reservations ++ */ ++ struct drm_gpuvm *vm; ++ ++ /** ++ * @num_fences: the number of fences to reserve for the &dma_resv of the ++ * locked &drm_gem_objects ++ */ ++ unsigned int num_fences; ++ ++ /** ++ * @extra: Callback and corresponding private data for the driver to ++ * lock arbitrary additional &drm_gem_objects. ++ */ ++ struct { ++ /** ++ * @fn: The driver callback to lock additional &drm_gem_objects. ++ */ ++ int (*fn)(struct drm_gpuvm_exec *vm_exec); ++ ++ /** ++ * @priv: driver private data for the @fn callback ++ */ ++ void *priv; ++ } extra; ++}; ++ ++/** ++ * drm_gpuvm_prepare_vm() - prepare the GPUVMs common dma-resv ++ * @gpuvm: the &drm_gpuvm ++ * @exec: the &drm_exec context ++ * @num_fences: the amount of &dma_fences to reserve ++ * ++ * Calls drm_exec_prepare_obj() for the GPUVMs dummy &drm_gem_object. ++ * ++ * Using this function directly, it is the drivers responsibility to call ++ * drm_exec_init() and drm_exec_fini() accordingly. ++ * ++ * Returns: 0 on success, negative error code on failure. ++ */ ++static inline int ++drm_gpuvm_prepare_vm(struct drm_gpuvm *gpuvm, ++ struct drm_exec *exec, ++ unsigned int num_fences) ++{ ++ return drm_exec_prepare_obj(exec, gpuvm->r_obj, num_fences); ++} ++ ++int drm_gpuvm_prepare_objects(struct drm_gpuvm *gpuvm, ++ struct drm_exec *exec, ++ unsigned int num_fences); ++ ++int drm_gpuvm_prepare_range(struct drm_gpuvm *gpuvm, ++ struct drm_exec *exec, ++ u64 addr, u64 range, ++ unsigned int num_fences); ++ ++int drm_gpuvm_exec_lock(struct drm_gpuvm_exec *vm_exec); ++ ++int drm_gpuvm_exec_lock_array(struct drm_gpuvm_exec *vm_exec, ++ struct drm_gem_object **objs, ++ unsigned int num_objs); ++ ++int drm_gpuvm_exec_lock_range(struct drm_gpuvm_exec *vm_exec, ++ u64 addr, u64 range); ++ ++/** ++ * drm_gpuvm_exec_unlock() - lock all dma-resv of all assoiciated BOs ++ * @vm_exec: the &drm_gpuvm_exec wrapper ++ * ++ * Releases all dma-resv locks of all &drm_gem_objects previously acquired ++ * through drm_gpuvm_exec_lock() or its variants. ++ * ++ * Returns: 0 on success, negative error code on failure. ++ */ ++static inline void ++drm_gpuvm_exec_unlock(struct drm_gpuvm_exec *vm_exec) ++{ ++ drm_exec_fini(&vm_exec->exec); ++} ++ ++int drm_gpuvm_validate(struct drm_gpuvm *gpuvm, struct drm_exec *exec); ++void drm_gpuvm_resv_add_fence(struct drm_gpuvm *gpuvm, ++ struct drm_exec *exec, ++ struct dma_fence *fence, ++ enum dma_resv_usage private_usage, ++ enum dma_resv_usage extobj_usage); ++ ++/** ++ * drm_gpuvm_exec_resv_add_fence() ++ * @vm_exec: the &drm_gpuvm_exec wrapper ++ * @fence: fence to add ++ * @private_usage: private dma-resv usage ++ * @extobj_usage: extobj dma-resv usage ++ * ++ * See drm_gpuvm_resv_add_fence(). ++ */ ++static inline void ++drm_gpuvm_exec_resv_add_fence(struct drm_gpuvm_exec *vm_exec, ++ struct dma_fence *fence, ++ enum dma_resv_usage private_usage, ++ enum dma_resv_usage extobj_usage) ++{ ++ drm_gpuvm_resv_add_fence(vm_exec->vm, &vm_exec->exec, fence, ++ private_usage, extobj_usage); ++} ++ ++/** ++ * drm_gpuvm_exec_validate() ++ * @vm_exec: the &drm_gpuvm_exec wrapper ++ * ++ * See drm_gpuvm_validate(). ++ */ ++static inline int ++drm_gpuvm_exec_validate(struct drm_gpuvm_exec *vm_exec) ++{ ++ return drm_gpuvm_validate(vm_exec->vm, &vm_exec->exec); ++} ++ ++/** + * struct drm_gpuvm_bo - structure representing a &drm_gpuvm and + * &drm_gem_object combination + * +@@ -467,6 +666,12 @@ struct drm_gpuvm_bo { + struct drm_gem_object *obj; + + /** ++ * @evicted: Indicates whether the &drm_gem_object is evicted; field ++ * protected by the &drm_gem_object's dma-resv lock. ++ */ ++ bool evicted; ++ ++ /** + * @kref: The reference count for this &drm_gpuvm_bo. + */ + struct kref kref; +@@ -493,6 +698,18 @@ struct drm_gpuvm_bo { + * gpuva list. + */ + struct list_head gem; ++ ++ /** ++ * @evict: List entry to attach to the &drm_gpuvms ++ * extobj list. ++ */ ++ struct list_head extobj; ++ ++ /** ++ * @evict: List entry to attach to the &drm_gpuvms evict ++ * list. ++ */ ++ struct list_head evict; + } entry; + } list; + }; +@@ -527,6 +744,27 @@ struct drm_gpuvm_bo * + drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm, + struct drm_gem_object *obj); + ++void drm_gpuvm_bo_evict(struct drm_gpuvm_bo *vm_bo, bool evict); ++ ++/** ++ * drm_gpuvm_bo_gem_evict() ++ * @obj: the &drm_gem_object ++ * @evict: indicates whether @obj is evicted ++ * ++ * See drm_gpuvm_bo_evict(). ++ */ ++static inline void ++drm_gpuvm_bo_gem_evict(struct drm_gem_object *obj, bool evict) ++{ ++ struct drm_gpuvm_bo *vm_bo; ++ ++ drm_gem_gpuva_assert_lock_held(obj); ++ drm_gem_for_each_gpuvm_bo(vm_bo, obj) ++ drm_gpuvm_bo_evict(vm_bo, evict); ++} ++ ++void drm_gpuvm_bo_extobj_add(struct drm_gpuvm_bo *vm_bo); ++ + /** + * drm_gpuvm_bo_for_each_va() - iterator to walk over a list of &drm_gpuva + * @va__: &drm_gpuva structure to assign to in each iteration step +@@ -902,6 +1140,18 @@ struct drm_gpuvm_ops { + void (*vm_bo_free)(struct drm_gpuvm_bo *vm_bo); + + /** ++ * @vm_bo_validate: called from drm_gpuvm_validate() ++ * ++ * Drivers receive this callback for every evicted &drm_gem_object being ++ * mapped in the corresponding &drm_gpuvm. ++ * ++ * Typically, drivers would call their driver specific variant of ++ * ttm_bo_validate() from within this callback. ++ */ ++ int (*vm_bo_validate)(struct drm_gpuvm_bo *vm_bo, ++ struct drm_exec *exec); ++ ++ /** + * @sm_step_map: called from &drm_gpuvm_sm_map to finally insert the + * mapping once all previous steps were completed + * diff --git a/patches-6.6/034-15-v6.8-drm-nouveau-use-GPUVM-common-infrastructure.patch b/patches-6.6/034-15-v6.8-drm-nouveau-use-GPUVM-common-infrastructure.patch new file mode 100644 index 0000000..f90fc12 --- /dev/null +++ b/patches-6.6/034-15-v6.8-drm-nouveau-use-GPUVM-common-infrastructure.patch @@ -0,0 +1,448 @@ +From 014f831abcb82738e57c0b00db66dfef0798ed67 Mon Sep 17 00:00:00 2001 +From: Danilo Krummrich +Date: Mon, 13 Nov 2023 23:12:00 +0100 +Subject: [PATCH] drm/nouveau: use GPUVM common infrastructure + +GPUVM provides common infrastructure to track external and evicted GEM +objects as well as locking and validation helpers. + +Especially external and evicted object tracking is a huge improvement +compared to the current brute force approach of iterating all mappings +in order to lock and validate the GPUVM's GEM objects. Hence, make us of +it. + +Signed-off-by: Danilo Krummrich +Reviewed-by: Dave Airlie +Link: https://patchwork.freedesktop.org/patch/msgid/20231113221202.7203-1-dakr@redhat.com +--- + drivers/gpu/drm/nouveau/nouveau_bo.c | 4 +- + drivers/gpu/drm/nouveau/nouveau_exec.c | 57 +++------- + drivers/gpu/drm/nouveau/nouveau_exec.h | 4 - + drivers/gpu/drm/nouveau/nouveau_sched.c | 9 +- + drivers/gpu/drm/nouveau/nouveau_sched.h | 7 +- + drivers/gpu/drm/nouveau/nouveau_uvmm.c | 134 +++++++++++++----------- + 6 files changed, 100 insertions(+), 115 deletions(-) + +--- a/drivers/gpu/drm/nouveau/nouveau_bo.c ++++ b/drivers/gpu/drm/nouveau/nouveau_bo.c +@@ -1056,17 +1056,18 @@ nouveau_bo_move(struct ttm_buffer_object + { + struct nouveau_drm *drm = nouveau_bdev(bo->bdev); + struct nouveau_bo *nvbo = nouveau_bo(bo); ++ struct drm_gem_object *obj = &bo->base; + struct ttm_resource *old_reg = bo->resource; + struct nouveau_drm_tile *new_tile = NULL; + int ret = 0; + +- + if (new_reg->mem_type == TTM_PL_TT) { + ret = nouveau_ttm_tt_bind(bo->bdev, bo->ttm, new_reg); + if (ret) + return ret; + } + ++ drm_gpuvm_bo_gem_evict(obj, evict); + nouveau_bo_move_ntfy(bo, new_reg); + ret = ttm_bo_wait_ctx(bo, ctx); + if (ret) +@@ -1131,6 +1132,7 @@ out: + out_ntfy: + if (ret) { + nouveau_bo_move_ntfy(bo, bo->resource); ++ drm_gpuvm_bo_gem_evict(obj, !evict); + } + return ret; + } +--- a/drivers/gpu/drm/nouveau/nouveau_exec.c ++++ b/drivers/gpu/drm/nouveau/nouveau_exec.c +@@ -1,7 +1,5 @@ + // SPDX-License-Identifier: MIT + +-#include +- + #include "nouveau_drv.h" + #include "nouveau_gem.h" + #include "nouveau_mem.h" +@@ -86,14 +84,12 @@ + */ + + static int +-nouveau_exec_job_submit(struct nouveau_job *job) ++nouveau_exec_job_submit(struct nouveau_job *job, ++ struct drm_gpuvm_exec *vme) + { + struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job); + struct nouveau_cli *cli = job->cli; + struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli); +- struct drm_exec *exec = &job->exec; +- struct drm_gem_object *obj; +- unsigned long index; + int ret; + + /* Create a new fence, but do not emit yet. */ +@@ -102,52 +98,29 @@ nouveau_exec_job_submit(struct nouveau_j + return ret; + + nouveau_uvmm_lock(uvmm); +- drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT | +- DRM_EXEC_IGNORE_DUPLICATES); +- drm_exec_until_all_locked(exec) { +- struct drm_gpuva *va; +- +- drm_gpuvm_for_each_va(va, &uvmm->base) { +- if (unlikely(va == &uvmm->base.kernel_alloc_node)) +- continue; +- +- ret = drm_exec_prepare_obj(exec, va->gem.obj, 1); +- drm_exec_retry_on_contention(exec); +- if (ret) +- goto err_uvmm_unlock; +- } ++ ret = drm_gpuvm_exec_lock(vme); ++ if (ret) { ++ nouveau_uvmm_unlock(uvmm); ++ return ret; + } + nouveau_uvmm_unlock(uvmm); + +- drm_exec_for_each_locked_object(exec, index, obj) { +- struct nouveau_bo *nvbo = nouveau_gem_object(obj); +- +- ret = nouveau_bo_validate(nvbo, true, false); +- if (ret) +- goto err_exec_fini; ++ ret = drm_gpuvm_exec_validate(vme); ++ if (ret) { ++ drm_gpuvm_exec_unlock(vme); ++ return ret; + } + + return 0; +- +-err_uvmm_unlock: +- nouveau_uvmm_unlock(uvmm); +-err_exec_fini: +- drm_exec_fini(exec); +- return ret; +- + } + + static void +-nouveau_exec_job_armed_submit(struct nouveau_job *job) ++nouveau_exec_job_armed_submit(struct nouveau_job *job, ++ struct drm_gpuvm_exec *vme) + { +- struct drm_exec *exec = &job->exec; +- struct drm_gem_object *obj; +- unsigned long index; +- +- drm_exec_for_each_locked_object(exec, index, obj) +- dma_resv_add_fence(obj->resv, job->done_fence, job->resv_usage); +- +- drm_exec_fini(exec); ++ drm_gpuvm_exec_resv_add_fence(vme, job->done_fence, ++ job->resv_usage, job->resv_usage); ++ drm_gpuvm_exec_unlock(vme); + } + + static struct dma_fence * +--- a/drivers/gpu/drm/nouveau/nouveau_exec.h ++++ b/drivers/gpu/drm/nouveau/nouveau_exec.h +@@ -3,16 +3,12 @@ + #ifndef __NOUVEAU_EXEC_H__ + #define __NOUVEAU_EXEC_H__ + +-#include +- + #include "nouveau_drv.h" + #include "nouveau_sched.h" + + struct nouveau_exec_job_args { + struct drm_file *file_priv; + struct nouveau_sched_entity *sched_entity; +- +- struct drm_exec exec; + struct nouveau_channel *chan; + + struct { +--- a/drivers/gpu/drm/nouveau/nouveau_sched.c ++++ b/drivers/gpu/drm/nouveau/nouveau_sched.c +@@ -263,6 +263,11 @@ nouveau_job_submit(struct nouveau_job *j + { + struct nouveau_sched_entity *entity = to_nouveau_sched_entity(job->base.entity); + struct dma_fence *done_fence = NULL; ++ struct drm_gpuvm_exec vm_exec = { ++ .vm = &nouveau_cli_uvmm(job->cli)->base, ++ .flags = DRM_EXEC_IGNORE_DUPLICATES, ++ .num_fences = 1, ++ }; + int ret; + + ret = nouveau_job_add_deps(job); +@@ -282,7 +287,7 @@ nouveau_job_submit(struct nouveau_job *j + * successfully. + */ + if (job->ops->submit) { +- ret = job->ops->submit(job); ++ ret = job->ops->submit(job, &vm_exec); + if (ret) + goto err_cleanup; + } +@@ -315,7 +320,7 @@ nouveau_job_submit(struct nouveau_job *j + set_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &job->done_fence->flags); + + if (job->ops->armed_submit) +- job->ops->armed_submit(job); ++ job->ops->armed_submit(job, &vm_exec); + + nouveau_job_fence_attach(job); + +--- a/drivers/gpu/drm/nouveau/nouveau_sched.h ++++ b/drivers/gpu/drm/nouveau/nouveau_sched.h +@@ -5,7 +5,7 @@ + + #include + +-#include ++#include + #include + + #include "nouveau_drv.h" +@@ -54,7 +54,6 @@ struct nouveau_job { + struct drm_file *file_priv; + struct nouveau_cli *cli; + +- struct drm_exec exec; + enum dma_resv_usage resv_usage; + struct dma_fence *done_fence; + +@@ -76,8 +75,8 @@ struct nouveau_job { + /* If .submit() returns without any error, it is guaranteed that + * armed_submit() is called. + */ +- int (*submit)(struct nouveau_job *); +- void (*armed_submit)(struct nouveau_job *); ++ int (*submit)(struct nouveau_job *, struct drm_gpuvm_exec *); ++ void (*armed_submit)(struct nouveau_job *, struct drm_gpuvm_exec *); + struct dma_fence *(*run)(struct nouveau_job *); + void (*free)(struct nouveau_job *); + enum drm_gpu_sched_stat (*timeout)(struct nouveau_job *); +--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c ++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c +@@ -438,8 +438,9 @@ nouveau_uvma_region_complete(struct nouv + static void + op_map_prepare_unwind(struct nouveau_uvma *uvma) + { ++ struct drm_gpuva *va = &uvma->va; + nouveau_uvma_gem_put(uvma); +- drm_gpuva_remove(&uvma->va); ++ drm_gpuva_remove(va); + nouveau_uvma_free(uvma); + } + +@@ -468,6 +469,7 @@ nouveau_uvmm_sm_prepare_unwind(struct no + break; + case DRM_GPUVA_OP_REMAP: { + struct drm_gpuva_op_remap *r = &op->remap; ++ struct drm_gpuva *va = r->unmap->va; + + if (r->next) + op_map_prepare_unwind(new->next); +@@ -475,7 +477,7 @@ nouveau_uvmm_sm_prepare_unwind(struct no + if (r->prev) + op_map_prepare_unwind(new->prev); + +- op_unmap_prepare_unwind(r->unmap->va); ++ op_unmap_prepare_unwind(va); + break; + } + case DRM_GPUVA_OP_UNMAP: +@@ -634,6 +636,7 @@ nouveau_uvmm_sm_prepare(struct nouveau_u + goto unwind; + } + } ++ + break; + } + case DRM_GPUVA_OP_REMAP: { +@@ -1135,12 +1138,53 @@ bind_link_gpuvas(struct bind_job_op *bop + } + + static int +-nouveau_uvmm_bind_job_submit(struct nouveau_job *job) ++bind_lock_validate(struct nouveau_job *job, struct drm_exec *exec, ++ unsigned int num_fences) ++{ ++ struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job); ++ struct bind_job_op *op; ++ int ret; ++ ++ list_for_each_op(op, &bind_job->ops) { ++ struct drm_gpuva_op *va_op; ++ ++ if (!op->ops) ++ continue; ++ ++ drm_gpuva_for_each_op(va_op, op->ops) { ++ struct drm_gem_object *obj = op_gem_obj(va_op); ++ ++ if (unlikely(!obj)) ++ continue; ++ ++ ret = drm_exec_prepare_obj(exec, obj, num_fences); ++ if (ret) ++ return ret; ++ ++ /* Don't validate GEMs backing mappings we're about to ++ * unmap, it's not worth the effort. ++ */ ++ if (va_op->op == DRM_GPUVA_OP_UNMAP) ++ continue; ++ ++ ret = nouveau_bo_validate(nouveau_gem_object(obj), ++ true, false); ++ if (ret) ++ return ret; ++ } ++ } ++ ++ return 0; ++} ++ ++static int ++nouveau_uvmm_bind_job_submit(struct nouveau_job *job, ++ struct drm_gpuvm_exec *vme) + { + struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(job->cli); + struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job); + struct nouveau_sched_entity *entity = job->entity; +- struct drm_exec *exec = &job->exec; ++ struct drm_exec *exec = &vme->exec; + struct bind_job_op *op; + int ret; + +@@ -1157,6 +1201,8 @@ nouveau_uvmm_bind_job_submit(struct nouv + dma_resv_unlock(obj->resv); + if (IS_ERR(op->vm_bo)) + return PTR_ERR(op->vm_bo); ++ ++ drm_gpuvm_bo_extobj_add(op->vm_bo); + } + + ret = bind_validate_op(job, op); +@@ -1179,6 +1225,7 @@ nouveau_uvmm_bind_job_submit(struct nouv + * unwind all GPU VA space changes on failure. + */ + nouveau_uvmm_lock(uvmm); ++ + list_for_each_op(op, &bind_job->ops) { + switch (op->op) { + case OP_MAP_SPARSE: +@@ -1290,55 +1337,13 @@ nouveau_uvmm_bind_job_submit(struct nouv + } + } + +- drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT | +- DRM_EXEC_IGNORE_DUPLICATES); ++ drm_exec_init(exec, vme->flags); + drm_exec_until_all_locked(exec) { +- list_for_each_op(op, &bind_job->ops) { +- struct drm_gpuva_op *va_op; +- +- if (IS_ERR_OR_NULL(op->ops)) +- continue; +- +- drm_gpuva_for_each_op(va_op, op->ops) { +- struct drm_gem_object *obj = op_gem_obj(va_op); +- +- if (unlikely(!obj)) +- continue; +- +- ret = drm_exec_prepare_obj(exec, obj, 1); +- drm_exec_retry_on_contention(exec); +- if (ret) { +- op = list_last_op(&bind_job->ops); +- goto unwind; +- } +- } +- } +- } +- +- list_for_each_op(op, &bind_job->ops) { +- struct drm_gpuva_op *va_op; +- +- if (IS_ERR_OR_NULL(op->ops)) +- continue; +- +- drm_gpuva_for_each_op(va_op, op->ops) { +- struct drm_gem_object *obj = op_gem_obj(va_op); +- +- if (unlikely(!obj)) +- continue; +- +- /* Don't validate GEMs backing mappings we're about to +- * unmap, it's not worth the effort. +- */ +- if (unlikely(va_op->op == DRM_GPUVA_OP_UNMAP)) +- continue; +- +- ret = nouveau_bo_validate(nouveau_gem_object(obj), +- true, false); +- if (ret) { +- op = list_last_op(&bind_job->ops); +- goto unwind; +- } ++ ret = bind_lock_validate(job, exec, vme->num_fences); ++ drm_exec_retry_on_contention(exec); ++ if (ret) { ++ op = list_last_op(&bind_job->ops); ++ goto unwind; + } + } + +@@ -1413,21 +1418,17 @@ unwind: + } + + nouveau_uvmm_unlock(uvmm); +- drm_exec_fini(exec); ++ drm_gpuvm_exec_unlock(vme); + return ret; + } + + static void +-nouveau_uvmm_bind_job_armed_submit(struct nouveau_job *job) ++nouveau_uvmm_bind_job_armed_submit(struct nouveau_job *job, ++ struct drm_gpuvm_exec *vme) + { +- struct drm_exec *exec = &job->exec; +- struct drm_gem_object *obj; +- unsigned long index; +- +- drm_exec_for_each_locked_object(exec, index, obj) +- dma_resv_add_fence(obj->resv, job->done_fence, job->resv_usage); +- +- drm_exec_fini(exec); ++ drm_gpuvm_exec_resv_add_fence(vme, job->done_fence, ++ job->resv_usage, job->resv_usage); ++ drm_gpuvm_exec_unlock(vme); + } + + static struct dma_fence * +@@ -1815,8 +1816,17 @@ nouveau_uvmm_free(struct drm_gpuvm *gpuv + kfree(uvmm); + } + ++static int ++nouveau_uvmm_bo_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) ++{ ++ struct nouveau_bo *nvbo = nouveau_gem_object(vm_bo->obj); ++ ++ return nouveau_bo_validate(nvbo, true, false); ++} ++ + static const struct drm_gpuvm_ops gpuvm_ops = { + .vm_free = nouveau_uvmm_free, ++ .vm_bo_validate = nouveau_uvmm_bo_validate, + }; + + int diff --git a/patches-6.6/034-16-v6.8-drm-gpuvm-Helper-to-get-range-of-unmap-from-a-remap-op..patch b/patches-6.6/034-16-v6.8-drm-gpuvm-Helper-to-get-range-of-unmap-from-a-remap-op..patch new file mode 100644 index 0000000..10cbd78 --- /dev/null +++ b/patches-6.6/034-16-v6.8-drm-gpuvm-Helper-to-get-range-of-unmap-from-a-remap-op..patch @@ -0,0 +1,60 @@ +From a191f73d85484f804284674c14f2d9f572c18adb Mon Sep 17 00:00:00 2001 +From: Donald Robson +Date: Wed, 22 Nov 2023 16:34:23 +0000 +Subject: [PATCH] drm/gpuvm: Helper to get range of unmap from a remap op. + +Determining the start and range of the unmap stage of a remap op is a +common piece of code currently implemented by multiple drivers. Add a +helper for this. + +Changes since v7: +- Renamed helper to drm_gpuva_op_remap_to_unmap_range() +- Improved documentation + +Changes since v6: +- Remove use of __always_inline + +Signed-off-by: Donald Robson +Signed-off-by: Sarah Walker +Reviewed-by: Danilo Krummrich +Link: https://lore.kernel.org/r/8a0a5b5eeec459d3c60fcdaa5a638ad14a18a59e.1700668843.git.donald.robson@imgtec.com +Signed-off-by: Maxime Ripard +--- + include/drm/drm_gpuvm.h | 28 ++++++++++++++++++++++++++++ + 1 file changed, 28 insertions(+) + +--- a/include/drm/drm_gpuvm.h ++++ b/include/drm/drm_gpuvm.h +@@ -1213,4 +1213,32 @@ void drm_gpuva_remap(struct drm_gpuva *p + + void drm_gpuva_unmap(struct drm_gpuva_op_unmap *op); + ++/** ++ * drm_gpuva_op_remap_to_unmap_range() - Helper to get the start and range of ++ * the unmap stage of a remap op. ++ * @op: Remap op. ++ * @start_addr: Output pointer for the start of the required unmap. ++ * @range: Output pointer for the length of the required unmap. ++ * ++ * The given start address and range will be set such that they represent the ++ * range of the address space that was previously covered by the mapping being ++ * re-mapped, but is now empty. ++ */ ++static inline void ++drm_gpuva_op_remap_to_unmap_range(const struct drm_gpuva_op_remap *op, ++ u64 *start_addr, u64 *range) ++{ ++ const u64 va_start = op->prev ? ++ op->prev->va.addr + op->prev->va.range : ++ op->unmap->va->va.addr; ++ const u64 va_end = op->next ? ++ op->next->va.addr : ++ op->unmap->va->va.addr + op->unmap->va->va.range; ++ ++ if (start_addr) ++ *start_addr = va_start; ++ if (range) ++ *range = va_end - va_start; ++} ++ + #endif /* __DRM_GPUVM_H__ */ diff --git a/patches-6.6/034-17-v6.8-drm-gpuvm-Fix-deprecated-license-identifier.patch b/patches-6.6/034-17-v6.8-drm-gpuvm-Fix-deprecated-license-identifier.patch new file mode 100644 index 0000000..f7686f8 --- /dev/null +++ b/patches-6.6/034-17-v6.8-drm-gpuvm-Fix-deprecated-license-identifier.patch @@ -0,0 +1,41 @@ +From b9c02e1052650af56d4487efa5fade3fb70e3653 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= +Date: Mon, 6 Nov 2023 12:48:27 +0100 +Subject: [PATCH] drm/gpuvm: Fix deprecated license identifier +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +"GPL-2.0-only" in the license header was incorrectly changed to the +now deprecated "GPL-2.0". Fix. + +Cc: Maxime Ripard +Cc: Danilo Krummrich +Reported-by: David Edelsohn +Closes: https://lore.kernel.org/dri-devel/5lfrhdpkwhpgzipgngojs3tyqfqbesifzu5nf4l5q3nhfdhcf2@25nmiq7tfrew/T/#m5c356d68815711eea30dd94cc6f7ea8cd4344fe3 +Fixes: f7749a549b4f ("drm/gpuvm: Dual-licence the drm_gpuvm code GPL-2.0 OR MIT") +Signed-off-by: Thomas Hellström +Acked-by: Maxime Ripard +Acked-by: Danilo Krummrich +Link: https://patchwork.freedesktop.org/patch/msgid/20231106114827.62492-1-thomas.hellstrom@linux.intel.com +--- + drivers/gpu/drm/drm_gpuvm.c | 2 +- + include/drm/drm_gpuvm.h | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/drm_gpuvm.c ++++ b/drivers/gpu/drm/drm_gpuvm.c +@@ -1,4 +1,4 @@ +-// SPDX-License-Identifier: GPL-2.0 OR MIT ++// SPDX-License-Identifier: GPL-2.0-only OR MIT + /* + * Copyright (c) 2022 Red Hat. + * +--- a/include/drm/drm_gpuvm.h ++++ b/include/drm/drm_gpuvm.h +@@ -1,4 +1,4 @@ +-/* SPDX-License-Identifier: GPL-2.0 OR MIT */ ++/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ + + #ifndef __DRM_GPUVM_H__ + #define __DRM_GPUVM_H__ diff --git a/patches-6.6/034-18-v6.8-drm-gpuvm-fall-back-to-drm_exec_lock_obj.patch b/patches-6.6/034-18-v6.8-drm-gpuvm-fall-back-to-drm_exec_lock_obj.patch new file mode 100644 index 0000000..c38b2aa --- /dev/null +++ b/patches-6.6/034-18-v6.8-drm-gpuvm-fall-back-to-drm_exec_lock_obj.patch @@ -0,0 +1,142 @@ +From e759f2ca29d918d3db57a61cdf838025beb03465 Mon Sep 17 00:00:00 2001 +From: Danilo Krummrich +Date: Wed, 29 Nov 2023 23:08:00 +0100 +Subject: [PATCH] drm/gpuvm: fall back to drm_exec_lock_obj() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Fall back to drm_exec_lock_obj() if num_fences is zero for the +drm_gpuvm_prepare_* function family. + +Otherwise dma_resv_reserve_fences() would actually allocate slots even +though num_fences is zero. + +Cc: Christian König +Acked-by: Donald Robson +Signed-off-by: Danilo Krummrich +Link: https://patchwork.freedesktop.org/patch/msgid/20231129220835.297885-2-dakr@redhat.com +--- + drivers/gpu/drm/drm_gpuvm.c | 43 ++++++++++++++++++++++++++++++++----- + include/drm/drm_gpuvm.h | 23 +++----------------- + 2 files changed, 41 insertions(+), 25 deletions(-) + +--- a/drivers/gpu/drm/drm_gpuvm.c ++++ b/drivers/gpu/drm/drm_gpuvm.c +@@ -1080,6 +1080,37 @@ drm_gpuvm_put(struct drm_gpuvm *gpuvm) + EXPORT_SYMBOL_GPL(drm_gpuvm_put); + + static int ++exec_prepare_obj(struct drm_exec *exec, struct drm_gem_object *obj, ++ unsigned int num_fences) ++{ ++ return num_fences ? drm_exec_prepare_obj(exec, obj, num_fences) : ++ drm_exec_lock_obj(exec, obj); ++} ++ ++/** ++ * drm_gpuvm_prepare_vm() - prepare the GPUVMs common dma-resv ++ * @gpuvm: the &drm_gpuvm ++ * @exec: the &drm_exec context ++ * @num_fences: the amount of &dma_fences to reserve ++ * ++ * Calls drm_exec_prepare_obj() for the GPUVMs dummy &drm_gem_object; if ++ * @num_fences is zero drm_exec_lock_obj() is called instead. ++ * ++ * Using this function directly, it is the drivers responsibility to call ++ * drm_exec_init() and drm_exec_fini() accordingly. ++ * ++ * Returns: 0 on success, negative error code on failure. ++ */ ++int ++drm_gpuvm_prepare_vm(struct drm_gpuvm *gpuvm, ++ struct drm_exec *exec, ++ unsigned int num_fences) ++{ ++ return exec_prepare_obj(exec, gpuvm->r_obj, num_fences); ++} ++EXPORT_SYMBOL_GPL(drm_gpuvm_prepare_vm); ++ ++static int + __drm_gpuvm_prepare_objects(struct drm_gpuvm *gpuvm, + struct drm_exec *exec, + unsigned int num_fences) +@@ -1089,7 +1120,7 @@ __drm_gpuvm_prepare_objects(struct drm_g + int ret = 0; + + for_each_vm_bo_in_list(gpuvm, extobj, &extobjs, vm_bo) { +- ret = drm_exec_prepare_obj(exec, vm_bo->obj, num_fences); ++ ret = exec_prepare_obj(exec, vm_bo->obj, num_fences); + if (ret) + break; + } +@@ -1110,7 +1141,7 @@ drm_gpuvm_prepare_objects_locked(struct + + drm_gpuvm_resv_assert_held(gpuvm); + list_for_each_entry(vm_bo, &gpuvm->extobj.list, list.entry.extobj) { +- ret = drm_exec_prepare_obj(exec, vm_bo->obj, num_fences); ++ ret = exec_prepare_obj(exec, vm_bo->obj, num_fences); + if (ret) + break; + +@@ -1128,7 +1159,8 @@ drm_gpuvm_prepare_objects_locked(struct + * @num_fences: the amount of &dma_fences to reserve + * + * Calls drm_exec_prepare_obj() for all &drm_gem_objects the given +- * &drm_gpuvm contains mappings of. ++ * &drm_gpuvm contains mappings of; if @num_fences is zero drm_exec_lock_obj() ++ * is called instead. + * + * Using this function directly, it is the drivers responsibility to call + * drm_exec_init() and drm_exec_fini() accordingly. +@@ -1165,7 +1197,8 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_prepare_obje + * @num_fences: the amount of &dma_fences to reserve + * + * Calls drm_exec_prepare_obj() for all &drm_gem_objects mapped between @addr +- * and @addr + @range. ++ * and @addr + @range; if @num_fences is zero drm_exec_lock_obj() is called ++ * instead. + * + * Returns: 0 on success, negative error code on failure. + */ +@@ -1180,7 +1213,7 @@ drm_gpuvm_prepare_range(struct drm_gpuvm + drm_gpuvm_for_each_va_range(va, gpuvm, addr, end) { + struct drm_gem_object *obj = va->gem.obj; + +- ret = drm_exec_prepare_obj(exec, obj, num_fences); ++ ret = exec_prepare_obj(exec, obj, num_fences); + if (ret) + return ret; + } +--- a/include/drm/drm_gpuvm.h ++++ b/include/drm/drm_gpuvm.h +@@ -544,26 +544,9 @@ struct drm_gpuvm_exec { + } extra; + }; + +-/** +- * drm_gpuvm_prepare_vm() - prepare the GPUVMs common dma-resv +- * @gpuvm: the &drm_gpuvm +- * @exec: the &drm_exec context +- * @num_fences: the amount of &dma_fences to reserve +- * +- * Calls drm_exec_prepare_obj() for the GPUVMs dummy &drm_gem_object. +- * +- * Using this function directly, it is the drivers responsibility to call +- * drm_exec_init() and drm_exec_fini() accordingly. +- * +- * Returns: 0 on success, negative error code on failure. +- */ +-static inline int +-drm_gpuvm_prepare_vm(struct drm_gpuvm *gpuvm, +- struct drm_exec *exec, +- unsigned int num_fences) +-{ +- return drm_exec_prepare_obj(exec, gpuvm->r_obj, num_fences); +-} ++int drm_gpuvm_prepare_vm(struct drm_gpuvm *gpuvm, ++ struct drm_exec *exec, ++ unsigned int num_fences); + + int drm_gpuvm_prepare_objects(struct drm_gpuvm *gpuvm, + struct drm_exec *exec, diff --git a/patches-6.6/034-19-v6.8-drm-gpuvm-Let-drm_gpuvm_bo_put-report-when-the-vm_bo.patch b/patches-6.6/034-19-v6.8-drm-gpuvm-Let-drm_gpuvm_bo_put-report-when-the-vm_bo.patch new file mode 100644 index 0000000..b58d296 --- /dev/null +++ b/patches-6.6/034-19-v6.8-drm-gpuvm-Let-drm_gpuvm_bo_put-report-when-the-vm_bo.patch @@ -0,0 +1,59 @@ +From c50a291d621aa7abaa27b05f56d450a388b64948 Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Mon, 4 Dec 2023 16:14:06 +0100 +Subject: [PATCH] drm/gpuvm: Let drm_gpuvm_bo_put() report when the vm_bo + object is destroyed + +Some users need to release resources attached to the vm_bo object when +it's destroyed. In Panthor's case, we need to release the pin ref so +BO pages can be returned to the system when all GPU mappings are gone. + +This could be done through a custom drm_gpuvm::vm_bo_free() hook, but +this has all sort of locking implications that would force us to expose +a drm_gem_shmem_unpin_locked() helper, not to mention the fact that +having a ::vm_bo_free() implementation without a ::vm_bo_alloc() one +seems odd. So let's keep things simple, and extend drm_gpuvm_bo_put() +to report when the object is destroyed. + +Signed-off-by: Boris Brezillon +Reviewed-by: Danilo Krummrich +Link: https://patchwork.freedesktop.org/patch/msgid/20231204151406.1977285-1-boris.brezillon@collabora.com +--- + drivers/gpu/drm/drm_gpuvm.c | 8 ++++++-- + include/drm/drm_gpuvm.h | 2 +- + 2 files changed, 7 insertions(+), 3 deletions(-) + +--- a/drivers/gpu/drm/drm_gpuvm.c ++++ b/drivers/gpu/drm/drm_gpuvm.c +@@ -1529,14 +1529,18 @@ drm_gpuvm_bo_destroy(struct kref *kref) + * hold the dma-resv or driver specific GEM gpuva lock. + * + * This function may only be called from non-atomic context. ++ * ++ * Returns: true if vm_bo was destroyed, false otherwise. + */ +-void ++bool + drm_gpuvm_bo_put(struct drm_gpuvm_bo *vm_bo) + { + might_sleep(); + + if (vm_bo) +- kref_put(&vm_bo->kref, drm_gpuvm_bo_destroy); ++ return !!kref_put(&vm_bo->kref, drm_gpuvm_bo_destroy); ++ ++ return false; + } + EXPORT_SYMBOL_GPL(drm_gpuvm_bo_put); + +--- a/include/drm/drm_gpuvm.h ++++ b/include/drm/drm_gpuvm.h +@@ -721,7 +721,7 @@ drm_gpuvm_bo_get(struct drm_gpuvm_bo *vm + return vm_bo; + } + +-void drm_gpuvm_bo_put(struct drm_gpuvm_bo *vm_bo); ++bool drm_gpuvm_bo_put(struct drm_gpuvm_bo *vm_bo); + + struct drm_gpuvm_bo * + drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm, diff --git a/patches-6.6/034-20-v6.7-drm-sched-Convert-the-GPU-scheduler-to-variable-number-of.patch b/patches-6.6/034-20-v6.7-drm-sched-Convert-the-GPU-scheduler-to-variable-number-of.patch new file mode 100644 index 0000000..5539118 --- /dev/null +++ b/patches-6.6/034-20-v6.7-drm-sched-Convert-the-GPU-scheduler-to-variable-number-of.patch @@ -0,0 +1,405 @@ +From 56e449603f0ac580700621a356d35d5716a62ce5 Mon Sep 17 00:00:00 2001 +From: Luben Tuikov +Date: Sat, 14 Oct 2023 21:15:35 -0400 +Subject: [PATCH] drm/sched: Convert the GPU scheduler to variable number of + run-queues +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The GPU scheduler has now a variable number of run-queues, which are set up at +drm_sched_init() time. This way, each driver announces how many run-queues it +requires (supports) per each GPU scheduler it creates. Note, that run-queues +correspond to scheduler "priorities", thus if the number of run-queues is set +to 1 at drm_sched_init(), then that scheduler supports a single run-queue, +i.e. single "priority". If a driver further sets a single entity per +run-queue, then this creates a 1-to-1 correspondence between a scheduler and +a scheduled entity. + +Cc: Lucas Stach +Cc: Russell King +Cc: Qiang Yu +Cc: Rob Clark +Cc: Abhinav Kumar +Cc: Dmitry Baryshkov +Cc: Danilo Krummrich +Cc: Matthew Brost +Cc: Boris Brezillon +Cc: Alex Deucher +Cc: Christian König +Cc: Emma Anholt +Cc: etnaviv@lists.freedesktop.org +Cc: lima@lists.freedesktop.org +Cc: linux-arm-msm@vger.kernel.org +Cc: freedreno@lists.freedesktop.org +Cc: nouveau@lists.freedesktop.org +Cc: dri-devel@lists.freedesktop.org +Signed-off-by: Luben Tuikov +Acked-by: Christian König +Link: https://lore.kernel.org/r/20231023032251.164775-1-luben.tuikov@amd.com +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 4 +- + drivers/gpu/drm/etnaviv/etnaviv_sched.c | 1 + + drivers/gpu/drm/lima/lima_sched.c | 4 +- + drivers/gpu/drm/msm/msm_ringbuffer.c | 5 +- + drivers/gpu/drm/nouveau/nouveau_sched.c | 1 + + drivers/gpu/drm/panfrost/panfrost_job.c | 1 + + drivers/gpu/drm/scheduler/sched_entity.c | 18 +++++- + drivers/gpu/drm/scheduler/sched_main.c | 74 ++++++++++++++++++---- + drivers/gpu/drm/v3d/v3d_sched.c | 5 ++ + include/drm/gpu_scheduler.h | 9 ++- + 11 files changed, 98 insertions(+), 25 deletions(-) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +@@ -2277,6 +2277,7 @@ static int amdgpu_device_init_schedulers + } + + r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, ++ DRM_SCHED_PRIORITY_COUNT, + ring->num_hw_submission, 0, + timeout, adev->reset_domain->wq, + ring->sched_score, ring->name, +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +@@ -328,8 +328,8 @@ void amdgpu_job_stop_all_jobs_on_sched(s + int i; + + /* Signal all jobs not yet scheduled */ +- for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { +- struct drm_sched_rq *rq = &sched->sched_rq[i]; ++ for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { ++ struct drm_sched_rq *rq = sched->sched_rq[i]; + spin_lock(&rq->lock); + list_for_each_entry(s_entity, &rq->entities, list) { + while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) { +--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c ++++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c +@@ -135,6 +135,7 @@ int etnaviv_sched_init(struct etnaviv_gp + int ret; + + ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops, ++ DRM_SCHED_PRIORITY_COUNT, + etnaviv_hw_jobs_limit, etnaviv_job_hang_limit, + msecs_to_jiffies(500), NULL, NULL, + dev_name(gpu->dev), gpu->dev); +--- a/drivers/gpu/drm/lima/lima_sched.c ++++ b/drivers/gpu/drm/lima/lima_sched.c +@@ -495,7 +495,9 @@ int lima_sched_pipe_init(struct lima_sch + + INIT_WORK(&pipe->recover_work, lima_sched_recover_work); + +- return drm_sched_init(&pipe->base, &lima_sched_ops, 1, ++ return drm_sched_init(&pipe->base, &lima_sched_ops, ++ DRM_SCHED_PRIORITY_COUNT, ++ 1, + lima_job_hang_limit, + msecs_to_jiffies(timeout), NULL, + NULL, name, pipe->ldev->dev); +--- a/drivers/gpu/drm/msm/msm_ringbuffer.c ++++ b/drivers/gpu/drm/msm/msm_ringbuffer.c +@@ -98,8 +98,9 @@ struct msm_ringbuffer *msm_ringbuffer_ne + sched_timeout = MAX_SCHEDULE_TIMEOUT; + + ret = drm_sched_init(&ring->sched, &msm_sched_ops, +- num_hw_submissions, 0, sched_timeout, +- NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev); ++ DRM_SCHED_PRIORITY_COUNT, ++ num_hw_submissions, 0, sched_timeout, ++ NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev); + if (ret) { + goto fail; + } +--- a/drivers/gpu/drm/nouveau/nouveau_sched.c ++++ b/drivers/gpu/drm/nouveau/nouveau_sched.c +@@ -441,6 +441,7 @@ int nouveau_sched_init(struct nouveau_dr + return -ENOMEM; + + return drm_sched_init(sched, &nouveau_sched_ops, ++ DRM_SCHED_PRIORITY_COUNT, + NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit, + NULL, NULL, "nouveau_sched", drm->dev->dev); + } +--- a/drivers/gpu/drm/panfrost/panfrost_job.c ++++ b/drivers/gpu/drm/panfrost/panfrost_job.c +@@ -832,6 +832,7 @@ int panfrost_job_init(struct panfrost_de + + ret = drm_sched_init(&js->queue[j].sched, + &panfrost_sched_ops, ++ DRM_SCHED_PRIORITY_COUNT, + nentries, 0, + msecs_to_jiffies(JOB_TIMEOUT_MS), + pfdev->reset.wq, +--- a/drivers/gpu/drm/scheduler/sched_entity.c ++++ b/drivers/gpu/drm/scheduler/sched_entity.c +@@ -75,8 +75,20 @@ int drm_sched_entity_init(struct drm_sch + RCU_INIT_POINTER(entity->last_scheduled, NULL); + RB_CLEAR_NODE(&entity->rb_tree_node); + +- if(num_sched_list) +- entity->rq = &sched_list[0]->sched_rq[entity->priority]; ++ if (!sched_list[0]->sched_rq) { ++ /* Warn drivers not to do this and to fix their DRM ++ * calling order. ++ */ ++ pr_warn("%s: called with uninitialized scheduler\n", __func__); ++ } else if (num_sched_list) { ++ /* The "priority" of an entity cannot exceed the number ++ * of run-queues of a scheduler. ++ */ ++ if (entity->priority >= sched_list[0]->num_rqs) ++ entity->priority = max_t(u32, sched_list[0]->num_rqs, ++ DRM_SCHED_PRIORITY_MIN); ++ entity->rq = sched_list[0]->sched_rq[entity->priority]; ++ } + + init_completion(&entity->entity_idle); + +@@ -533,7 +545,7 @@ void drm_sched_entity_select_rq(struct d + + spin_lock(&entity->rq_lock); + sched = drm_sched_pick_best(entity->sched_list, entity->num_sched_list); +- rq = sched ? &sched->sched_rq[entity->priority] : NULL; ++ rq = sched ? sched->sched_rq[entity->priority] : NULL; + if (rq != entity->rq) { + drm_sched_rq_remove_entity(entity->rq, entity); + entity->rq = rq; +--- a/drivers/gpu/drm/scheduler/sched_main.c ++++ b/drivers/gpu/drm/scheduler/sched_main.c +@@ -632,8 +632,14 @@ int drm_sched_job_init(struct drm_sched_ + struct drm_sched_entity *entity, + void *owner) + { +- if (!entity->rq) ++ if (!entity->rq) { ++ /* This will most likely be followed by missing frames ++ * or worse--a blank screen--leave a trail in the ++ * logs, so this can be debugged easier. ++ */ ++ drm_err(job->sched, "%s: entity has no rq!\n", __func__); + return -ENOENT; ++ } + + job->entity = entity; + job->s_fence = drm_sched_fence_alloc(entity, owner); +@@ -671,7 +677,7 @@ void drm_sched_job_arm(struct drm_sched_ + sched = entity->rq->sched; + + job->sched = sched; +- job->s_priority = entity->rq - sched->sched_rq; ++ job->s_priority = entity->priority; + job->id = atomic64_inc_return(&sched->job_id_count); + + drm_sched_fence_init(job->s_fence, job->entity); +@@ -888,10 +894,10 @@ drm_sched_select_entity(struct drm_gpu_s + return NULL; + + /* Kernel run queue has higher priority than normal run queue*/ +- for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { ++ for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { + entity = drm_sched_policy == DRM_SCHED_POLICY_FIFO ? +- drm_sched_rq_select_entity_fifo(&sched->sched_rq[i]) : +- drm_sched_rq_select_entity_rr(&sched->sched_rq[i]); ++ drm_sched_rq_select_entity_fifo(sched->sched_rq[i]) : ++ drm_sched_rq_select_entity_rr(sched->sched_rq[i]); + if (entity) + break; + } +@@ -1071,6 +1077,7 @@ static int drm_sched_main(void *param) + * + * @sched: scheduler instance + * @ops: backend operations for this scheduler ++ * @num_rqs: number of runqueues, one for each priority, up to DRM_SCHED_PRIORITY_COUNT + * @hw_submission: number of hw submissions that can be in flight + * @hang_limit: number of times to allow a job to hang before dropping it + * @timeout: timeout value in jiffies for the scheduler +@@ -1084,11 +1091,12 @@ static int drm_sched_main(void *param) + */ + int drm_sched_init(struct drm_gpu_scheduler *sched, + const struct drm_sched_backend_ops *ops, +- unsigned hw_submission, unsigned hang_limit, ++ u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit, + long timeout, struct workqueue_struct *timeout_wq, + atomic_t *score, const char *name, struct device *dev) + { + int i, ret; ++ + sched->ops = ops; + sched->hw_submission_limit = hw_submission; + sched->name = name; +@@ -1097,8 +1105,36 @@ int drm_sched_init(struct drm_gpu_schedu + sched->hang_limit = hang_limit; + sched->score = score ? score : &sched->_score; + sched->dev = dev; +- for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_COUNT; i++) +- drm_sched_rq_init(sched, &sched->sched_rq[i]); ++ ++ if (num_rqs > DRM_SCHED_PRIORITY_COUNT) { ++ /* This is a gross violation--tell drivers what the problem is. ++ */ ++ drm_err(sched, "%s: num_rqs cannot be greater than DRM_SCHED_PRIORITY_COUNT\n", ++ __func__); ++ return -EINVAL; ++ } else if (sched->sched_rq) { ++ /* Not an error, but warn anyway so drivers can ++ * fine-tune their DRM calling order, and return all ++ * is good. ++ */ ++ drm_warn(sched, "%s: scheduler already initialized!\n", __func__); ++ return 0; ++ } ++ ++ sched->sched_rq = kmalloc_array(num_rqs, sizeof(*sched->sched_rq), ++ GFP_KERNEL | __GFP_ZERO); ++ if (!sched->sched_rq) { ++ drm_err(sched, "%s: out of memory for sched_rq\n", __func__); ++ return -ENOMEM; ++ } ++ sched->num_rqs = num_rqs; ++ ret = -ENOMEM; ++ for (i = DRM_SCHED_PRIORITY_MIN; i < sched->num_rqs; i++) { ++ sched->sched_rq[i] = kzalloc(sizeof(*sched->sched_rq[i]), GFP_KERNEL); ++ if (!sched->sched_rq[i]) ++ goto Out_unroll; ++ drm_sched_rq_init(sched, sched->sched_rq[i]); ++ } + + init_waitqueue_head(&sched->wake_up_worker); + init_waitqueue_head(&sched->job_scheduled); +@@ -1115,11 +1151,18 @@ int drm_sched_init(struct drm_gpu_schedu + ret = PTR_ERR(sched->thread); + sched->thread = NULL; + DRM_DEV_ERROR(sched->dev, "Failed to create scheduler for %s.\n", name); +- return ret; ++ goto Out_unroll; + } + + sched->ready = true; + return 0; ++Out_unroll: ++ for (--i ; i >= DRM_SCHED_PRIORITY_MIN; i--) ++ kfree(sched->sched_rq[i]); ++ kfree(sched->sched_rq); ++ sched->sched_rq = NULL; ++ drm_err(sched, "%s: Failed to setup GPU scheduler--out of memory\n", __func__); ++ return ret; + } + EXPORT_SYMBOL(drm_sched_init); + +@@ -1138,8 +1181,8 @@ void drm_sched_fini(struct drm_gpu_sched + if (sched->thread) + kthread_stop(sched->thread); + +- for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { +- struct drm_sched_rq *rq = &sched->sched_rq[i]; ++ for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { ++ struct drm_sched_rq *rq = sched->sched_rq[i]; + + spin_lock(&rq->lock); + list_for_each_entry(s_entity, &rq->entities, list) +@@ -1150,7 +1193,7 @@ void drm_sched_fini(struct drm_gpu_sched + */ + s_entity->stopped = true; + spin_unlock(&rq->lock); +- ++ kfree(sched->sched_rq[i]); + } + + /* Wakeup everyone stuck in drm_sched_entity_flush for this scheduler */ +@@ -1160,6 +1203,8 @@ void drm_sched_fini(struct drm_gpu_sched + cancel_delayed_work_sync(&sched->work_tdr); + + sched->ready = false; ++ kfree(sched->sched_rq); ++ sched->sched_rq = NULL; + } + EXPORT_SYMBOL(drm_sched_fini); + +@@ -1186,9 +1231,10 @@ void drm_sched_increase_karma(struct drm + if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) { + atomic_inc(&bad->karma); + +- for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL; ++ for (i = DRM_SCHED_PRIORITY_MIN; ++ i < min_t(typeof(sched->num_rqs), sched->num_rqs, DRM_SCHED_PRIORITY_KERNEL); + i++) { +- struct drm_sched_rq *rq = &sched->sched_rq[i]; ++ struct drm_sched_rq *rq = sched->sched_rq[i]; + + spin_lock(&rq->lock); + list_for_each_entry_safe(entity, tmp, &rq->entities, list) { +--- a/drivers/gpu/drm/v3d/v3d_sched.c ++++ b/drivers/gpu/drm/v3d/v3d_sched.c +@@ -389,6 +389,7 @@ v3d_sched_init(struct v3d_dev *v3d) + + ret = drm_sched_init(&v3d->queue[V3D_BIN].sched, + &v3d_bin_sched_ops, ++ DRM_SCHED_PRIORITY_COUNT, + hw_jobs_limit, job_hang_limit, + msecs_to_jiffies(hang_limit_ms), NULL, + NULL, "v3d_bin", v3d->drm.dev); +@@ -397,6 +398,7 @@ v3d_sched_init(struct v3d_dev *v3d) + + ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched, + &v3d_render_sched_ops, ++ DRM_SCHED_PRIORITY_COUNT, + hw_jobs_limit, job_hang_limit, + msecs_to_jiffies(hang_limit_ms), NULL, + NULL, "v3d_render", v3d->drm.dev); +@@ -405,6 +407,7 @@ v3d_sched_init(struct v3d_dev *v3d) + + ret = drm_sched_init(&v3d->queue[V3D_TFU].sched, + &v3d_tfu_sched_ops, ++ DRM_SCHED_PRIORITY_COUNT, + hw_jobs_limit, job_hang_limit, + msecs_to_jiffies(hang_limit_ms), NULL, + NULL, "v3d_tfu", v3d->drm.dev); +@@ -414,6 +417,7 @@ v3d_sched_init(struct v3d_dev *v3d) + if (v3d_has_csd(v3d)) { + ret = drm_sched_init(&v3d->queue[V3D_CSD].sched, + &v3d_csd_sched_ops, ++ DRM_SCHED_PRIORITY_COUNT, + hw_jobs_limit, job_hang_limit, + msecs_to_jiffies(hang_limit_ms), NULL, + NULL, "v3d_csd", v3d->drm.dev); +@@ -422,6 +426,7 @@ v3d_sched_init(struct v3d_dev *v3d) + + ret = drm_sched_init(&v3d->queue[V3D_CACHE_CLEAN].sched, + &v3d_cache_clean_sched_ops, ++ DRM_SCHED_PRIORITY_COUNT, + hw_jobs_limit, job_hang_limit, + msecs_to_jiffies(hang_limit_ms), NULL, + NULL, "v3d_cache_clean", v3d->drm.dev); +--- a/include/drm/gpu_scheduler.h ++++ b/include/drm/gpu_scheduler.h +@@ -471,7 +471,9 @@ struct drm_sched_backend_ops { + * @hw_submission_limit: the max size of the hardware queue. + * @timeout: the time after which a job is removed from the scheduler. + * @name: name of the ring for which this scheduler is being used. +- * @sched_rq: priority wise array of run queues. ++ * @num_rqs: Number of run-queues. This is at most DRM_SCHED_PRIORITY_COUNT, ++ * as there's usually one run-queue per priority, but could be less. ++ * @sched_rq: An allocated array of run-queues of size @num_rqs; + * @wake_up_worker: the wait queue on which the scheduler sleeps until a job + * is ready to be scheduled. + * @job_scheduled: once @drm_sched_entity_do_release is called the scheduler +@@ -500,7 +502,8 @@ struct drm_gpu_scheduler { + uint32_t hw_submission_limit; + long timeout; + const char *name; +- struct drm_sched_rq sched_rq[DRM_SCHED_PRIORITY_COUNT]; ++ u32 num_rqs; ++ struct drm_sched_rq **sched_rq; + wait_queue_head_t wake_up_worker; + wait_queue_head_t job_scheduled; + atomic_t hw_rq_count; +@@ -520,7 +523,7 @@ struct drm_gpu_scheduler { + + int drm_sched_init(struct drm_gpu_scheduler *sched, + const struct drm_sched_backend_ops *ops, +- uint32_t hw_submission, unsigned hang_limit, ++ u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit, + long timeout, struct workqueue_struct *timeout_wq, + atomic_t *score, const char *name, struct device *dev); + diff --git a/patches-6.6/034-21-v6.8-drm-sched-Add-drm_sched_wqueue_-helpers.patch b/patches-6.6/034-21-v6.8-drm-sched-Add-drm_sched_wqueue_-helpers.patch new file mode 100644 index 0000000..2385d67 --- /dev/null +++ b/patches-6.6/034-21-v6.8-drm-sched-Add-drm_sched_wqueue_-helpers.patch @@ -0,0 +1,241 @@ +From 35963cf2cd25eeea8bdb4d02853dac1e66fb13a0 Mon Sep 17 00:00:00 2001 +From: Matthew Brost +Date: Mon, 30 Oct 2023 20:24:35 -0700 +Subject: [PATCH] drm/sched: Add drm_sched_wqueue_* helpers + +Add scheduler wqueue ready, stop, and start helpers to hide the +implementation details of the scheduler from the drivers. + +v2: + - s/sched_wqueue/sched_wqueue (Luben) + - Remove the extra white line after the return-statement (Luben) + - update drm_sched_wqueue_ready comment (Luben) + +Cc: Luben Tuikov +Signed-off-by: Matthew Brost +Reviewed-by: Luben Tuikov +Link: https://lore.kernel.org/r/20231031032439.1558703-2-matthew.brost@intel.com +Signed-off-by: Luben Tuikov +--- + .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 2 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 15 +++---- + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 +++--- + drivers/gpu/drm/msm/adreno/adreno_device.c | 6 ++- + drivers/gpu/drm/scheduler/sched_main.c | 39 ++++++++++++++++++- + include/drm/gpu_scheduler.h | 3 ++ + 6 files changed, 59 insertions(+), 18 deletions(-) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +@@ -290,7 +290,7 @@ static int suspend_resume_compute_schedu + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; + +- if (!(ring && ring->sched.thread)) ++ if (!(ring && drm_sched_wqueue_ready(&ring->sched))) + continue; + + /* stop secheduler and drain ring. */ +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +@@ -1671,9 +1671,9 @@ static int amdgpu_debugfs_test_ib_show(s + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { + struct amdgpu_ring *ring = adev->rings[i]; + +- if (!ring || !ring->sched.thread) ++ if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + continue; +- kthread_park(ring->sched.thread); ++ drm_sched_wqueue_stop(&ring->sched); + } + + seq_puts(m, "run ib test:\n"); +@@ -1687,9 +1687,9 @@ static int amdgpu_debugfs_test_ib_show(s + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { + struct amdgpu_ring *ring = adev->rings[i]; + +- if (!ring || !ring->sched.thread) ++ if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + continue; +- kthread_unpark(ring->sched.thread); ++ drm_sched_wqueue_start(&ring->sched); + } + + up_write(&adev->reset_domain->sem); +@@ -1909,7 +1909,8 @@ static int amdgpu_debugfs_ib_preempt(voi + + ring = adev->rings[val]; + +- if (!ring || !ring->funcs->preempt_ib || !ring->sched.thread) ++ if (!ring || !ring->funcs->preempt_ib || ++ !drm_sched_wqueue_ready(&ring->sched)) + return -EINVAL; + + /* the last preemption failed */ +@@ -1927,7 +1928,7 @@ static int amdgpu_debugfs_ib_preempt(voi + goto pro_end; + + /* stop the scheduler */ +- kthread_park(ring->sched.thread); ++ drm_sched_wqueue_stop(&ring->sched); + + /* preempt the IB */ + r = amdgpu_ring_preempt_ib(ring); +@@ -1961,7 +1962,7 @@ static int amdgpu_debugfs_ib_preempt(voi + + failure: + /* restart the scheduler */ +- kthread_unpark(ring->sched.thread); ++ drm_sched_wqueue_start(&ring->sched); + + up_read(&adev->reset_domain->sem); + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +@@ -4613,7 +4613,7 @@ bool amdgpu_device_has_job_running(struc + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { + struct amdgpu_ring *ring = adev->rings[i]; + +- if (!ring || !ring->sched.thread) ++ if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + continue; + + spin_lock(&ring->sched.job_list_lock); +@@ -4755,7 +4755,7 @@ int amdgpu_device_pre_asic_reset(struct + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { + struct amdgpu_ring *ring = adev->rings[i]; + +- if (!ring || !ring->sched.thread) ++ if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + continue; + + /* Clear job fence from fence drv to avoid force_completion +@@ -5295,7 +5295,7 @@ int amdgpu_device_gpu_recover(struct amd + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { + struct amdgpu_ring *ring = tmp_adev->rings[i]; + +- if (!ring || !ring->sched.thread) ++ if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + continue; + + drm_sched_stop(&ring->sched, job ? &job->base : NULL); +@@ -5370,7 +5370,7 @@ skip_hw_reset: + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { + struct amdgpu_ring *ring = tmp_adev->rings[i]; + +- if (!ring || !ring->sched.thread) ++ if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + continue; + + drm_sched_start(&ring->sched, true); +@@ -5696,7 +5696,7 @@ pci_ers_result_t amdgpu_pci_error_detect + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { + struct amdgpu_ring *ring = adev->rings[i]; + +- if (!ring || !ring->sched.thread) ++ if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + continue; + + drm_sched_stop(&ring->sched, NULL); +@@ -5824,7 +5824,7 @@ void amdgpu_pci_resume(struct pci_dev *p + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { + struct amdgpu_ring *ring = adev->rings[i]; + +- if (!ring || !ring->sched.thread) ++ if (!ring || !drm_sched_wqueue_ready(&ring->sched)) + continue; + + drm_sched_start(&ring->sched, true); +--- a/drivers/gpu/drm/msm/adreno/adreno_device.c ++++ b/drivers/gpu/drm/msm/adreno/adreno_device.c +@@ -810,7 +810,8 @@ static void suspend_scheduler(struct msm + */ + for (i = 0; i < gpu->nr_rings; i++) { + struct drm_gpu_scheduler *sched = &gpu->rb[i]->sched; +- kthread_park(sched->thread); ++ ++ drm_sched_wqueue_stop(sched); + } + } + +@@ -820,7 +821,8 @@ static void resume_scheduler(struct msm_ + + for (i = 0; i < gpu->nr_rings; i++) { + struct drm_gpu_scheduler *sched = &gpu->rb[i]->sched; +- kthread_unpark(sched->thread); ++ ++ drm_sched_wqueue_start(sched); + } + } + +--- a/drivers/gpu/drm/scheduler/sched_main.c ++++ b/drivers/gpu/drm/scheduler/sched_main.c +@@ -439,7 +439,7 @@ void drm_sched_stop(struct drm_gpu_sched + { + struct drm_sched_job *s_job, *tmp; + +- kthread_park(sched->thread); ++ drm_sched_wqueue_stop(sched); + + /* + * Reinsert back the bad job here - now it's safe as +@@ -552,7 +552,7 @@ void drm_sched_start(struct drm_gpu_sche + spin_unlock(&sched->job_list_lock); + } + +- kthread_unpark(sched->thread); ++ drm_sched_wqueue_start(sched); + } + EXPORT_SYMBOL(drm_sched_start); + +@@ -1252,3 +1252,38 @@ void drm_sched_increase_karma(struct drm + } + } + EXPORT_SYMBOL(drm_sched_increase_karma); ++ ++/** ++ * drm_sched_wqueue_ready - Is the scheduler ready for submission ++ * ++ * @sched: scheduler instance ++ * ++ * Returns true if submission is ready ++ */ ++bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched) ++{ ++ return !!sched->thread; ++} ++EXPORT_SYMBOL(drm_sched_wqueue_ready); ++ ++/** ++ * drm_sched_wqueue_stop - stop scheduler submission ++ * ++ * @sched: scheduler instance ++ */ ++void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched) ++{ ++ kthread_park(sched->thread); ++} ++EXPORT_SYMBOL(drm_sched_wqueue_stop); ++ ++/** ++ * drm_sched_wqueue_start - start scheduler submission ++ * ++ * @sched: scheduler instance ++ */ ++void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched) ++{ ++ kthread_unpark(sched->thread); ++} ++EXPORT_SYMBOL(drm_sched_wqueue_start); +--- a/include/drm/gpu_scheduler.h ++++ b/include/drm/gpu_scheduler.h +@@ -552,6 +552,9 @@ void drm_sched_entity_modify_sched(struc + + void drm_sched_job_cleanup(struct drm_sched_job *job); + void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched); ++bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched); ++void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched); ++void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched); + void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad); + void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery); + void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched); diff --git a/patches-6.6/034-22-v6.8-drm-sched-Convert-drm-scheduler-to-use-a-work-queue-rathe.patch b/patches-6.6/034-22-v6.8-drm-sched-Convert-drm-scheduler-to-use-a-work-queue-rathe.patch new file mode 100644 index 0000000..4d231f3 --- /dev/null +++ b/patches-6.6/034-22-v6.8-drm-sched-Convert-drm-scheduler-to-use-a-work-queue-rathe.patch @@ -0,0 +1,507 @@ +From a6149f0393699308fb00149be913044977bceb56 Mon Sep 17 00:00:00 2001 +From: Matthew Brost +Date: Mon, 30 Oct 2023 20:24:36 -0700 +Subject: [PATCH] drm/sched: Convert drm scheduler to use a work queue rather + than kthread + +In Xe, the new Intel GPU driver, a choice has made to have a 1 to 1 +mapping between a drm_gpu_scheduler and drm_sched_entity. At first this +seems a bit odd but let us explain the reasoning below. + +1. In Xe the submission order from multiple drm_sched_entity is not +guaranteed to be the same completion even if targeting the same hardware +engine. This is because in Xe we have a firmware scheduler, the GuC, +which allowed to reorder, timeslice, and preempt submissions. If a using +shared drm_gpu_scheduler across multiple drm_sched_entity, the TDR falls +apart as the TDR expects submission order == completion order. Using a +dedicated drm_gpu_scheduler per drm_sched_entity solve this problem. + +2. In Xe submissions are done via programming a ring buffer (circular +buffer), a drm_gpu_scheduler provides a limit on number of jobs, if the +limit of number jobs is set to RING_SIZE / MAX_SIZE_PER_JOB we get flow +control on the ring for free. + +A problem with this design is currently a drm_gpu_scheduler uses a +kthread for submission / job cleanup. This doesn't scale if a large +number of drm_gpu_scheduler are used. To work around the scaling issue, +use a worker rather than kthread for submission / job cleanup. + +v2: + - (Rob Clark) Fix msm build + - Pass in run work queue +v3: + - (Boris) don't have loop in worker +v4: + - (Tvrtko) break out submit ready, stop, start helpers into own patch +v5: + - (Boris) default to ordered work queue +v6: + - (Luben / checkpatch) fix alignment in msm_ringbuffer.c + - (Luben) s/drm_sched_submit_queue/drm_sched_wqueue_enqueue + - (Luben) Update comment for drm_sched_wqueue_enqueue + - (Luben) Positive check for submit_wq in drm_sched_init + - (Luben) s/alloc_submit_wq/own_submit_wq +v7: + - (Luben) s/drm_sched_wqueue_enqueue/drm_sched_run_job_queue +v8: + - (Luben) Adjust var names / comments + +Signed-off-by: Matthew Brost +Reviewed-by: Luben Tuikov +Link: https://lore.kernel.org/r/20231031032439.1558703-3-matthew.brost@intel.com +Signed-off-by: Luben Tuikov +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- + drivers/gpu/drm/etnaviv/etnaviv_sched.c | 2 +- + drivers/gpu/drm/lima/lima_sched.c | 2 +- + drivers/gpu/drm/msm/msm_ringbuffer.c | 2 +- + drivers/gpu/drm/nouveau/nouveau_sched.c | 2 +- + drivers/gpu/drm/panfrost/panfrost_job.c | 2 +- + drivers/gpu/drm/scheduler/sched_main.c | 131 +++++++++++---------- + drivers/gpu/drm/v3d/v3d_sched.c | 10 +- + include/drm/gpu_scheduler.h | 14 ++- + 9 files changed, 86 insertions(+), 81 deletions(-) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +@@ -2276,7 +2276,7 @@ static int amdgpu_device_init_schedulers + break; + } + +- r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, ++ r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL, + DRM_SCHED_PRIORITY_COUNT, + ring->num_hw_submission, 0, + timeout, adev->reset_domain->wq, +--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c ++++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c +@@ -134,7 +134,7 @@ int etnaviv_sched_init(struct etnaviv_gp + { + int ret; + +- ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops, ++ ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops, NULL, + DRM_SCHED_PRIORITY_COUNT, + etnaviv_hw_jobs_limit, etnaviv_job_hang_limit, + msecs_to_jiffies(500), NULL, NULL, +--- a/drivers/gpu/drm/lima/lima_sched.c ++++ b/drivers/gpu/drm/lima/lima_sched.c +@@ -495,7 +495,7 @@ int lima_sched_pipe_init(struct lima_sch + + INIT_WORK(&pipe->recover_work, lima_sched_recover_work); + +- return drm_sched_init(&pipe->base, &lima_sched_ops, ++ return drm_sched_init(&pipe->base, &lima_sched_ops, NULL, + DRM_SCHED_PRIORITY_COUNT, + 1, + lima_job_hang_limit, +--- a/drivers/gpu/drm/msm/msm_ringbuffer.c ++++ b/drivers/gpu/drm/msm/msm_ringbuffer.c +@@ -97,7 +97,7 @@ struct msm_ringbuffer *msm_ringbuffer_ne + /* currently managing hangcheck ourselves: */ + sched_timeout = MAX_SCHEDULE_TIMEOUT; + +- ret = drm_sched_init(&ring->sched, &msm_sched_ops, ++ ret = drm_sched_init(&ring->sched, &msm_sched_ops, NULL, + DRM_SCHED_PRIORITY_COUNT, + num_hw_submissions, 0, sched_timeout, + NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev); +--- a/drivers/gpu/drm/nouveau/nouveau_sched.c ++++ b/drivers/gpu/drm/nouveau/nouveau_sched.c +@@ -440,7 +440,7 @@ int nouveau_sched_init(struct nouveau_dr + if (!drm->sched_wq) + return -ENOMEM; + +- return drm_sched_init(sched, &nouveau_sched_ops, ++ return drm_sched_init(sched, &nouveau_sched_ops, NULL, + DRM_SCHED_PRIORITY_COUNT, + NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit, + NULL, NULL, "nouveau_sched", drm->dev->dev); +--- a/drivers/gpu/drm/panfrost/panfrost_job.c ++++ b/drivers/gpu/drm/panfrost/panfrost_job.c +@@ -831,7 +831,7 @@ int panfrost_job_init(struct panfrost_de + js->queue[j].fence_context = dma_fence_context_alloc(1); + + ret = drm_sched_init(&js->queue[j].sched, +- &panfrost_sched_ops, ++ &panfrost_sched_ops, NULL, + DRM_SCHED_PRIORITY_COUNT, + nentries, 0, + msecs_to_jiffies(JOB_TIMEOUT_MS), +--- a/drivers/gpu/drm/scheduler/sched_main.c ++++ b/drivers/gpu/drm/scheduler/sched_main.c +@@ -48,7 +48,6 @@ + * through the jobs entity pointer. + */ + +-#include + #include + #include + #include +@@ -257,6 +256,16 @@ drm_sched_rq_select_entity_fifo(struct d + } + + /** ++ * drm_sched_run_job_queue - enqueue run-job work ++ * @sched: scheduler instance ++ */ ++static void drm_sched_run_job_queue(struct drm_gpu_scheduler *sched) ++{ ++ if (!READ_ONCE(sched->pause_submit)) ++ queue_work(sched->submit_wq, &sched->work_run_job); ++} ++ ++/** + * drm_sched_job_done - complete a job + * @s_job: pointer to the job which is done + * +@@ -275,7 +284,7 @@ static void drm_sched_job_done(struct dr + dma_fence_get(&s_fence->finished); + drm_sched_fence_finished(s_fence, result); + dma_fence_put(&s_fence->finished); +- wake_up_interruptible(&sched->wake_up_worker); ++ drm_sched_run_job_queue(sched); + } + + /** +@@ -874,7 +883,7 @@ static bool drm_sched_can_queue(struct d + void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched) + { + if (drm_sched_can_queue(sched)) +- wake_up_interruptible(&sched->wake_up_worker); ++ drm_sched_run_job_queue(sched); + } + + /** +@@ -985,60 +994,41 @@ drm_sched_pick_best(struct drm_gpu_sched + EXPORT_SYMBOL(drm_sched_pick_best); + + /** +- * drm_sched_blocked - check if the scheduler is blocked ++ * drm_sched_run_job_work - main scheduler thread + * +- * @sched: scheduler instance +- * +- * Returns true if blocked, otherwise false. ++ * @w: run job work + */ +-static bool drm_sched_blocked(struct drm_gpu_scheduler *sched) ++static void drm_sched_run_job_work(struct work_struct *w) + { +- if (kthread_should_park()) { +- kthread_parkme(); +- return true; +- } +- +- return false; +-} +- +-/** +- * drm_sched_main - main scheduler thread +- * +- * @param: scheduler instance +- * +- * Returns 0. +- */ +-static int drm_sched_main(void *param) +-{ +- struct drm_gpu_scheduler *sched = (struct drm_gpu_scheduler *)param; ++ struct drm_gpu_scheduler *sched = ++ container_of(w, struct drm_gpu_scheduler, work_run_job); ++ struct drm_sched_entity *entity; ++ struct drm_sched_job *cleanup_job; + int r; + +- sched_set_fifo_low(current); ++ if (READ_ONCE(sched->pause_submit)) ++ return; + +- while (!kthread_should_stop()) { +- struct drm_sched_entity *entity = NULL; +- struct drm_sched_fence *s_fence; +- struct drm_sched_job *sched_job; +- struct dma_fence *fence; +- struct drm_sched_job *cleanup_job = NULL; ++ cleanup_job = drm_sched_get_cleanup_job(sched); ++ entity = drm_sched_select_entity(sched); + +- wait_event_interruptible(sched->wake_up_worker, +- (cleanup_job = drm_sched_get_cleanup_job(sched)) || +- (!drm_sched_blocked(sched) && +- (entity = drm_sched_select_entity(sched))) || +- kthread_should_stop()); ++ if (!entity && !cleanup_job) ++ return; /* No more work */ + +- if (cleanup_job) +- sched->ops->free_job(cleanup_job); ++ if (cleanup_job) ++ sched->ops->free_job(cleanup_job); + +- if (!entity) +- continue; ++ if (entity) { ++ struct dma_fence *fence; ++ struct drm_sched_fence *s_fence; ++ struct drm_sched_job *sched_job; + + sched_job = drm_sched_entity_pop_job(entity); +- + if (!sched_job) { + complete_all(&entity->entity_idle); +- continue; ++ if (!cleanup_job) ++ return; /* No more work */ ++ goto again; + } + + s_fence = sched_job->s_fence; +@@ -1069,7 +1059,9 @@ static int drm_sched_main(void *param) + + wake_up(&sched->job_scheduled); + } +- return 0; ++ ++again: ++ drm_sched_run_job_queue(sched); + } + + /** +@@ -1077,6 +1069,8 @@ static int drm_sched_main(void *param) + * + * @sched: scheduler instance + * @ops: backend operations for this scheduler ++ * @submit_wq: workqueue to use for submission. If NULL, an ordered wq is ++ * allocated and used + * @num_rqs: number of runqueues, one for each priority, up to DRM_SCHED_PRIORITY_COUNT + * @hw_submission: number of hw submissions that can be in flight + * @hang_limit: number of times to allow a job to hang before dropping it +@@ -1091,6 +1085,7 @@ static int drm_sched_main(void *param) + */ + int drm_sched_init(struct drm_gpu_scheduler *sched, + const struct drm_sched_backend_ops *ops, ++ struct workqueue_struct *submit_wq, + u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit, + long timeout, struct workqueue_struct *timeout_wq, + atomic_t *score, const char *name, struct device *dev) +@@ -1121,14 +1116,22 @@ int drm_sched_init(struct drm_gpu_schedu + return 0; + } + ++ if (submit_wq) { ++ sched->submit_wq = submit_wq; ++ sched->own_submit_wq = false; ++ } else { ++ sched->submit_wq = alloc_ordered_workqueue(name, 0); ++ if (!sched->submit_wq) ++ return -ENOMEM; ++ ++ sched->own_submit_wq = true; ++ } ++ ret = -ENOMEM; + sched->sched_rq = kmalloc_array(num_rqs, sizeof(*sched->sched_rq), + GFP_KERNEL | __GFP_ZERO); +- if (!sched->sched_rq) { +- drm_err(sched, "%s: out of memory for sched_rq\n", __func__); +- return -ENOMEM; +- } ++ if (!sched->sched_rq) ++ goto Out_free; + sched->num_rqs = num_rqs; +- ret = -ENOMEM; + for (i = DRM_SCHED_PRIORITY_MIN; i < sched->num_rqs; i++) { + sched->sched_rq[i] = kzalloc(sizeof(*sched->sched_rq[i]), GFP_KERNEL); + if (!sched->sched_rq[i]) +@@ -1136,31 +1139,26 @@ int drm_sched_init(struct drm_gpu_schedu + drm_sched_rq_init(sched, sched->sched_rq[i]); + } + +- init_waitqueue_head(&sched->wake_up_worker); + init_waitqueue_head(&sched->job_scheduled); + INIT_LIST_HEAD(&sched->pending_list); + spin_lock_init(&sched->job_list_lock); + atomic_set(&sched->hw_rq_count, 0); + INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); ++ INIT_WORK(&sched->work_run_job, drm_sched_run_job_work); + atomic_set(&sched->_score, 0); + atomic64_set(&sched->job_id_count, 0); +- +- /* Each scheduler will run on a seperate kernel thread */ +- sched->thread = kthread_run(drm_sched_main, sched, sched->name); +- if (IS_ERR(sched->thread)) { +- ret = PTR_ERR(sched->thread); +- sched->thread = NULL; +- DRM_DEV_ERROR(sched->dev, "Failed to create scheduler for %s.\n", name); +- goto Out_unroll; +- } ++ sched->pause_submit = false; + + sched->ready = true; + return 0; + Out_unroll: + for (--i ; i >= DRM_SCHED_PRIORITY_MIN; i--) + kfree(sched->sched_rq[i]); ++Out_free: + kfree(sched->sched_rq); + sched->sched_rq = NULL; ++ if (sched->own_submit_wq) ++ destroy_workqueue(sched->submit_wq); + drm_err(sched, "%s: Failed to setup GPU scheduler--out of memory\n", __func__); + return ret; + } +@@ -1178,8 +1176,7 @@ void drm_sched_fini(struct drm_gpu_sched + struct drm_sched_entity *s_entity; + int i; + +- if (sched->thread) +- kthread_stop(sched->thread); ++ drm_sched_wqueue_stop(sched); + + for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { + struct drm_sched_rq *rq = sched->sched_rq[i]; +@@ -1202,6 +1199,8 @@ void drm_sched_fini(struct drm_gpu_sched + /* Confirm no work left behind accessing device structures */ + cancel_delayed_work_sync(&sched->work_tdr); + ++ if (sched->own_submit_wq) ++ destroy_workqueue(sched->submit_wq); + sched->ready = false; + kfree(sched->sched_rq); + sched->sched_rq = NULL; +@@ -1262,7 +1261,7 @@ EXPORT_SYMBOL(drm_sched_increase_karma); + */ + bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched) + { +- return !!sched->thread; ++ return sched->ready; + } + EXPORT_SYMBOL(drm_sched_wqueue_ready); + +@@ -1273,7 +1272,8 @@ EXPORT_SYMBOL(drm_sched_wqueue_ready); + */ + void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched) + { +- kthread_park(sched->thread); ++ WRITE_ONCE(sched->pause_submit, true); ++ cancel_work_sync(&sched->work_run_job); + } + EXPORT_SYMBOL(drm_sched_wqueue_stop); + +@@ -1284,6 +1284,7 @@ EXPORT_SYMBOL(drm_sched_wqueue_stop); + */ + void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched) + { +- kthread_unpark(sched->thread); ++ WRITE_ONCE(sched->pause_submit, false); ++ queue_work(sched->submit_wq, &sched->work_run_job); + } + EXPORT_SYMBOL(drm_sched_wqueue_start); +--- a/drivers/gpu/drm/v3d/v3d_sched.c ++++ b/drivers/gpu/drm/v3d/v3d_sched.c +@@ -388,7 +388,7 @@ v3d_sched_init(struct v3d_dev *v3d) + int ret; + + ret = drm_sched_init(&v3d->queue[V3D_BIN].sched, +- &v3d_bin_sched_ops, ++ &v3d_bin_sched_ops, NULL, + DRM_SCHED_PRIORITY_COUNT, + hw_jobs_limit, job_hang_limit, + msecs_to_jiffies(hang_limit_ms), NULL, +@@ -397,7 +397,7 @@ v3d_sched_init(struct v3d_dev *v3d) + return ret; + + ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched, +- &v3d_render_sched_ops, ++ &v3d_render_sched_ops, NULL, + DRM_SCHED_PRIORITY_COUNT, + hw_jobs_limit, job_hang_limit, + msecs_to_jiffies(hang_limit_ms), NULL, +@@ -406,7 +406,7 @@ v3d_sched_init(struct v3d_dev *v3d) + goto fail; + + ret = drm_sched_init(&v3d->queue[V3D_TFU].sched, +- &v3d_tfu_sched_ops, ++ &v3d_tfu_sched_ops, NULL, + DRM_SCHED_PRIORITY_COUNT, + hw_jobs_limit, job_hang_limit, + msecs_to_jiffies(hang_limit_ms), NULL, +@@ -416,7 +416,7 @@ v3d_sched_init(struct v3d_dev *v3d) + + if (v3d_has_csd(v3d)) { + ret = drm_sched_init(&v3d->queue[V3D_CSD].sched, +- &v3d_csd_sched_ops, ++ &v3d_csd_sched_ops, NULL, + DRM_SCHED_PRIORITY_COUNT, + hw_jobs_limit, job_hang_limit, + msecs_to_jiffies(hang_limit_ms), NULL, +@@ -425,7 +425,7 @@ v3d_sched_init(struct v3d_dev *v3d) + goto fail; + + ret = drm_sched_init(&v3d->queue[V3D_CACHE_CLEAN].sched, +- &v3d_cache_clean_sched_ops, ++ &v3d_cache_clean_sched_ops, NULL, + DRM_SCHED_PRIORITY_COUNT, + hw_jobs_limit, job_hang_limit, + msecs_to_jiffies(hang_limit_ms), NULL, +--- a/include/drm/gpu_scheduler.h ++++ b/include/drm/gpu_scheduler.h +@@ -474,17 +474,16 @@ struct drm_sched_backend_ops { + * @num_rqs: Number of run-queues. This is at most DRM_SCHED_PRIORITY_COUNT, + * as there's usually one run-queue per priority, but could be less. + * @sched_rq: An allocated array of run-queues of size @num_rqs; +- * @wake_up_worker: the wait queue on which the scheduler sleeps until a job +- * is ready to be scheduled. + * @job_scheduled: once @drm_sched_entity_do_release is called the scheduler + * waits on this wait queue until all the scheduled jobs are + * finished. + * @hw_rq_count: the number of jobs currently in the hardware queue. + * @job_id_count: used to assign unique id to the each job. ++ * @submit_wq: workqueue used to queue @work_run_job + * @timeout_wq: workqueue used to queue @work_tdr ++ * @work_run_job: work which calls run_job op of each scheduler. + * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the + * timeout interval is over. +- * @thread: the kthread on which the scheduler which run. + * @pending_list: the list of jobs which are currently in the job queue. + * @job_list_lock: lock to protect the pending_list. + * @hang_limit: once the hangs by a job crosses this limit then it is marked +@@ -493,6 +492,8 @@ struct drm_sched_backend_ops { + * @_score: score used when the driver doesn't provide one + * @ready: marks if the underlying HW is ready to work + * @free_guilty: A hit to time out handler to free the guilty job. ++ * @pause_submit: pause queuing of @work_run_job on @submit_wq ++ * @own_submit_wq: scheduler owns allocation of @submit_wq + * @dev: system &struct device + * + * One scheduler is implemented for each hardware ring. +@@ -504,13 +505,13 @@ struct drm_gpu_scheduler { + const char *name; + u32 num_rqs; + struct drm_sched_rq **sched_rq; +- wait_queue_head_t wake_up_worker; + wait_queue_head_t job_scheduled; + atomic_t hw_rq_count; + atomic64_t job_id_count; ++ struct workqueue_struct *submit_wq; + struct workqueue_struct *timeout_wq; ++ struct work_struct work_run_job; + struct delayed_work work_tdr; +- struct task_struct *thread; + struct list_head pending_list; + spinlock_t job_list_lock; + int hang_limit; +@@ -518,11 +519,14 @@ struct drm_gpu_scheduler { + atomic_t _score; + bool ready; + bool free_guilty; ++ bool pause_submit; ++ bool own_submit_wq; + struct device *dev; + }; + + int drm_sched_init(struct drm_gpu_scheduler *sched, + const struct drm_sched_backend_ops *ops, ++ struct workqueue_struct *submit_wq, + u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit, + long timeout, struct workqueue_struct *timeout_wq, + atomic_t *score, const char *name, struct device *dev); diff --git a/patches-6.6/034-23-v6.8-drm-sched-Split-free_job-into-own-work-item.patch b/patches-6.6/034-23-v6.8-drm-sched-Split-free_job-into-own-work-item.patch new file mode 100644 index 0000000..a2efa0f --- /dev/null +++ b/patches-6.6/034-23-v6.8-drm-sched-Split-free_job-into-own-work-item.patch @@ -0,0 +1,275 @@ +From f7fe64ad0f22ff034f8ebcfbd7299ee9cc9b57d7 Mon Sep 17 00:00:00 2001 +From: Matthew Brost +Date: Mon, 30 Oct 2023 20:24:37 -0700 +Subject: [PATCH] drm/sched: Split free_job into own work item + +Rather than call free_job and run_job in same work item have a dedicated +work item for each. This aligns with the design and intended use of work +queues. + +v2: + - Test for DMA_FENCE_FLAG_TIMESTAMP_BIT before setting + timestamp in free_job() work item (Danilo) +v3: + - Drop forward dec of drm_sched_select_entity (Boris) + - Return in drm_sched_run_job_work if entity NULL (Boris) +v4: + - Replace dequeue with peek and invert logic (Luben) + - Wrap to 100 lines (Luben) + - Update comments for *_queue / *_queue_if_ready functions (Luben) +v5: + - Drop peek argument, blindly reinit idle (Luben) + - s/drm_sched_free_job_queue_if_ready/drm_sched_free_job_queue_if_done (Luben) + - Update work_run_job & work_free_job kernel doc (Luben) +v6: + - Do not move drm_sched_select_entity in file (Luben) + +Signed-off-by: Matthew Brost +Link: https://lore.kernel.org/r/20231031032439.1558703-4-matthew.brost@intel.com +Reviewed-by: Luben Tuikov +Signed-off-by: Luben Tuikov +--- + drivers/gpu/drm/scheduler/sched_main.c | 146 +++++++++++++++++-------- + include/drm/gpu_scheduler.h | 4 +- + 2 files changed, 101 insertions(+), 49 deletions(-) + +--- a/drivers/gpu/drm/scheduler/sched_main.c ++++ b/drivers/gpu/drm/scheduler/sched_main.c +@@ -266,6 +266,32 @@ static void drm_sched_run_job_queue(stru + } + + /** ++ * drm_sched_free_job_queue - enqueue free-job work ++ * @sched: scheduler instance ++ */ ++static void drm_sched_free_job_queue(struct drm_gpu_scheduler *sched) ++{ ++ if (!READ_ONCE(sched->pause_submit)) ++ queue_work(sched->submit_wq, &sched->work_free_job); ++} ++ ++/** ++ * drm_sched_free_job_queue_if_done - enqueue free-job work if ready ++ * @sched: scheduler instance ++ */ ++static void drm_sched_free_job_queue_if_done(struct drm_gpu_scheduler *sched) ++{ ++ struct drm_sched_job *job; ++ ++ spin_lock(&sched->job_list_lock); ++ job = list_first_entry_or_null(&sched->pending_list, ++ struct drm_sched_job, list); ++ if (job && dma_fence_is_signaled(&job->s_fence->finished)) ++ drm_sched_free_job_queue(sched); ++ spin_unlock(&sched->job_list_lock); ++} ++ ++/** + * drm_sched_job_done - complete a job + * @s_job: pointer to the job which is done + * +@@ -284,7 +310,7 @@ static void drm_sched_job_done(struct dr + dma_fence_get(&s_fence->finished); + drm_sched_fence_finished(s_fence, result); + dma_fence_put(&s_fence->finished); +- drm_sched_run_job_queue(sched); ++ drm_sched_free_job_queue(sched); + } + + /** +@@ -943,8 +969,10 @@ drm_sched_get_cleanup_job(struct drm_gpu + typeof(*next), list); + + if (next) { +- next->s_fence->scheduled.timestamp = +- dma_fence_timestamp(&job->s_fence->finished); ++ if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, ++ &next->s_fence->scheduled.flags)) ++ next->s_fence->scheduled.timestamp = ++ dma_fence_timestamp(&job->s_fence->finished); + /* start TO timer for next job */ + drm_sched_start_timeout(sched); + } +@@ -994,7 +1022,40 @@ drm_sched_pick_best(struct drm_gpu_sched + EXPORT_SYMBOL(drm_sched_pick_best); + + /** +- * drm_sched_run_job_work - main scheduler thread ++ * drm_sched_run_job_queue_if_ready - enqueue run-job work if ready ++ * @sched: scheduler instance ++ */ ++static void drm_sched_run_job_queue_if_ready(struct drm_gpu_scheduler *sched) ++{ ++ if (drm_sched_select_entity(sched)) ++ drm_sched_run_job_queue(sched); ++} ++ ++/** ++ * drm_sched_free_job_work - worker to call free_job ++ * ++ * @w: free job work ++ */ ++static void drm_sched_free_job_work(struct work_struct *w) ++{ ++ struct drm_gpu_scheduler *sched = ++ container_of(w, struct drm_gpu_scheduler, work_free_job); ++ struct drm_sched_job *cleanup_job; ++ ++ if (READ_ONCE(sched->pause_submit)) ++ return; ++ ++ cleanup_job = drm_sched_get_cleanup_job(sched); ++ if (cleanup_job) { ++ sched->ops->free_job(cleanup_job); ++ ++ drm_sched_free_job_queue_if_done(sched); ++ drm_sched_run_job_queue_if_ready(sched); ++ } ++} ++ ++/** ++ * drm_sched_run_job_work - worker to call run_job + * + * @w: run job work + */ +@@ -1003,65 +1064,51 @@ static void drm_sched_run_job_work(struc + struct drm_gpu_scheduler *sched = + container_of(w, struct drm_gpu_scheduler, work_run_job); + struct drm_sched_entity *entity; +- struct drm_sched_job *cleanup_job; ++ struct dma_fence *fence; ++ struct drm_sched_fence *s_fence; ++ struct drm_sched_job *sched_job; + int r; + + if (READ_ONCE(sched->pause_submit)) + return; + +- cleanup_job = drm_sched_get_cleanup_job(sched); + entity = drm_sched_select_entity(sched); ++ if (!entity) ++ return; + +- if (!entity && !cleanup_job) ++ sched_job = drm_sched_entity_pop_job(entity); ++ if (!sched_job) { ++ complete_all(&entity->entity_idle); + return; /* No more work */ ++ } + +- if (cleanup_job) +- sched->ops->free_job(cleanup_job); +- +- if (entity) { +- struct dma_fence *fence; +- struct drm_sched_fence *s_fence; +- struct drm_sched_job *sched_job; +- +- sched_job = drm_sched_entity_pop_job(entity); +- if (!sched_job) { +- complete_all(&entity->entity_idle); +- if (!cleanup_job) +- return; /* No more work */ +- goto again; +- } +- +- s_fence = sched_job->s_fence; +- +- atomic_inc(&sched->hw_rq_count); +- drm_sched_job_begin(sched_job); ++ s_fence = sched_job->s_fence; + +- trace_drm_run_job(sched_job, entity); +- fence = sched->ops->run_job(sched_job); +- complete_all(&entity->entity_idle); +- drm_sched_fence_scheduled(s_fence, fence); ++ atomic_inc(&sched->hw_rq_count); ++ drm_sched_job_begin(sched_job); + +- if (!IS_ERR_OR_NULL(fence)) { +- /* Drop for original kref_init of the fence */ +- dma_fence_put(fence); ++ trace_drm_run_job(sched_job, entity); ++ fence = sched->ops->run_job(sched_job); ++ complete_all(&entity->entity_idle); ++ drm_sched_fence_scheduled(s_fence, fence); + +- r = dma_fence_add_callback(fence, &sched_job->cb, +- drm_sched_job_done_cb); +- if (r == -ENOENT) +- drm_sched_job_done(sched_job, fence->error); +- else if (r) +- DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n", +- r); +- } else { +- drm_sched_job_done(sched_job, IS_ERR(fence) ? +- PTR_ERR(fence) : 0); +- } ++ if (!IS_ERR_OR_NULL(fence)) { ++ /* Drop for original kref_init of the fence */ ++ dma_fence_put(fence); + +- wake_up(&sched->job_scheduled); ++ r = dma_fence_add_callback(fence, &sched_job->cb, ++ drm_sched_job_done_cb); ++ if (r == -ENOENT) ++ drm_sched_job_done(sched_job, fence->error); ++ else if (r) ++ DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n", r); ++ } else { ++ drm_sched_job_done(sched_job, IS_ERR(fence) ? ++ PTR_ERR(fence) : 0); + } + +-again: +- drm_sched_run_job_queue(sched); ++ wake_up(&sched->job_scheduled); ++ drm_sched_run_job_queue_if_ready(sched); + } + + /** +@@ -1145,6 +1192,7 @@ int drm_sched_init(struct drm_gpu_schedu + atomic_set(&sched->hw_rq_count, 0); + INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); + INIT_WORK(&sched->work_run_job, drm_sched_run_job_work); ++ INIT_WORK(&sched->work_free_job, drm_sched_free_job_work); + atomic_set(&sched->_score, 0); + atomic64_set(&sched->job_id_count, 0); + sched->pause_submit = false; +@@ -1274,6 +1322,7 @@ void drm_sched_wqueue_stop(struct drm_gp + { + WRITE_ONCE(sched->pause_submit, true); + cancel_work_sync(&sched->work_run_job); ++ cancel_work_sync(&sched->work_free_job); + } + EXPORT_SYMBOL(drm_sched_wqueue_stop); + +@@ -1286,5 +1335,6 @@ void drm_sched_wqueue_start(struct drm_g + { + WRITE_ONCE(sched->pause_submit, false); + queue_work(sched->submit_wq, &sched->work_run_job); ++ queue_work(sched->submit_wq, &sched->work_free_job); + } + EXPORT_SYMBOL(drm_sched_wqueue_start); +--- a/include/drm/gpu_scheduler.h ++++ b/include/drm/gpu_scheduler.h +@@ -479,9 +479,10 @@ struct drm_sched_backend_ops { + * finished. + * @hw_rq_count: the number of jobs currently in the hardware queue. + * @job_id_count: used to assign unique id to the each job. +- * @submit_wq: workqueue used to queue @work_run_job ++ * @submit_wq: workqueue used to queue @work_run_job and @work_free_job + * @timeout_wq: workqueue used to queue @work_tdr + * @work_run_job: work which calls run_job op of each scheduler. ++ * @work_free_job: work which calls free_job op of each scheduler. + * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the + * timeout interval is over. + * @pending_list: the list of jobs which are currently in the job queue. +@@ -511,6 +512,7 @@ struct drm_gpu_scheduler { + struct workqueue_struct *submit_wq; + struct workqueue_struct *timeout_wq; + struct work_struct work_run_job; ++ struct work_struct work_free_job; + struct delayed_work work_tdr; + struct list_head pending_list; + spinlock_t job_list_lock; diff --git a/patches-6.6/034-24-v6.8-drm-sched-Add-a-helper-to-queue-TDR-immediately.patch b/patches-6.6/034-24-v6.8-drm-sched-Add-a-helper-to-queue-TDR-immediately.patch new file mode 100644 index 0000000..8ed2d99 --- /dev/null +++ b/patches-6.6/034-24-v6.8-drm-sched-Add-a-helper-to-queue-TDR-immediately.patch @@ -0,0 +1,70 @@ +From 3c6c7ca4508b6cb1a033ac954c50a1b2c97af883 Mon Sep 17 00:00:00 2001 +From: Matthew Brost +Date: Mon, 30 Oct 2023 20:24:39 -0700 +Subject: [PATCH] drm/sched: Add a helper to queue TDR immediately + +Add a helper whereby a driver can invoke TDR immediately. + +v2: + - Drop timeout args, rename function, use mod delayed work (Luben) +v3: + - s/XE/Xe (Luben) + - present tense in commit message (Luben) + - Adjust comment for drm_sched_tdr_queue_imm (Luben) +v4: + - Adjust commit message (Luben) + +Cc: Luben Tuikov +Signed-off-by: Matthew Brost +Reviewed-by: Luben Tuikov +Link: https://lore.kernel.org/r/20231031032439.1558703-6-matthew.brost@intel.com +Signed-off-by: Luben Tuikov +--- + drivers/gpu/drm/scheduler/sched_main.c | 18 +++++++++++++++++- + include/drm/gpu_scheduler.h | 1 + + 2 files changed, 18 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/scheduler/sched_main.c ++++ b/drivers/gpu/drm/scheduler/sched_main.c +@@ -336,7 +336,7 @@ static void drm_sched_start_timeout(stru + { + if (sched->timeout != MAX_SCHEDULE_TIMEOUT && + !list_empty(&sched->pending_list)) +- queue_delayed_work(sched->timeout_wq, &sched->work_tdr, sched->timeout); ++ mod_delayed_work(sched->timeout_wq, &sched->work_tdr, sched->timeout); + } + + /** +@@ -354,6 +354,22 @@ void drm_sched_fault(struct drm_gpu_sche + EXPORT_SYMBOL(drm_sched_fault); + + /** ++ * drm_sched_tdr_queue_imm: - immediately start job timeout handler ++ * ++ * @sched: scheduler for which the timeout handling should be started. ++ * ++ * Start timeout handling immediately for the named scheduler. ++ */ ++void drm_sched_tdr_queue_imm(struct drm_gpu_scheduler *sched) ++{ ++ spin_lock(&sched->job_list_lock); ++ sched->timeout = 0; ++ drm_sched_start_timeout(sched); ++ spin_unlock(&sched->job_list_lock); ++} ++EXPORT_SYMBOL(drm_sched_tdr_queue_imm); ++ ++/** + * drm_sched_suspend_timeout - Suspend scheduler job timeout + * + * @sched: scheduler instance for which to suspend the timeout +--- a/include/drm/gpu_scheduler.h ++++ b/include/drm/gpu_scheduler.h +@@ -556,6 +556,7 @@ void drm_sched_entity_modify_sched(struc + struct drm_gpu_scheduler **sched_list, + unsigned int num_sched_list); + ++void drm_sched_tdr_queue_imm(struct drm_gpu_scheduler *sched); + void drm_sched_job_cleanup(struct drm_sched_job *job); + void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched); + bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched); diff --git a/patches-6.6/034-25-v6.8-drm-sched-Drop-suffix-from-drm_sched_wakeup_if_can_queue.patch b/patches-6.6/034-25-v6.8-drm-sched-Drop-suffix-from-drm_sched_wakeup_if_can_queue.patch new file mode 100644 index 0000000..2472275 --- /dev/null +++ b/patches-6.6/034-25-v6.8-drm-sched-Drop-suffix-from-drm_sched_wakeup_if_can_queue.patch @@ -0,0 +1,70 @@ +From f12af4c461fb6cd5ed7b48f8b4d09b22eb19fcc5 Mon Sep 17 00:00:00 2001 +From: Tvrtko Ursulin +Date: Thu, 2 Nov 2023 10:55:38 +0000 +Subject: [PATCH] drm/sched: Drop suffix from drm_sched_wakeup_if_can_queue + +Because a) helper is exported to other parts of the scheduler and +b) there isn't a plain drm_sched_wakeup to begin with, I think we can +drop the suffix and by doing so separate the intimiate knowledge +between the scheduler components a bit better. + +Signed-off-by: Tvrtko Ursulin +Cc: Luben Tuikov +Cc: Matthew Brost +Link: https://patchwork.freedesktop.org/patch/msgid/20231102105538.391648-6-tvrtko.ursulin@linux.intel.com +Reviewed-by: Luben Tuikov +Signed-off-by: Luben Tuikov +--- + drivers/gpu/drm/scheduler/sched_entity.c | 4 ++-- + drivers/gpu/drm/scheduler/sched_main.c | 4 ++-- + include/drm/gpu_scheduler.h | 2 +- + 3 files changed, 5 insertions(+), 5 deletions(-) + +--- a/drivers/gpu/drm/scheduler/sched_entity.c ++++ b/drivers/gpu/drm/scheduler/sched_entity.c +@@ -370,7 +370,7 @@ static void drm_sched_entity_wakeup(stru + container_of(cb, struct drm_sched_entity, cb); + + drm_sched_entity_clear_dep(f, cb); +- drm_sched_wakeup_if_can_queue(entity->rq->sched); ++ drm_sched_wakeup(entity->rq->sched); + } + + /** +@@ -602,7 +602,7 @@ void drm_sched_entity_push_job(struct dr + if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) + drm_sched_rq_update_fifo(entity, submit_ts); + +- drm_sched_wakeup_if_can_queue(entity->rq->sched); ++ drm_sched_wakeup(entity->rq->sched); + } + } + EXPORT_SYMBOL(drm_sched_entity_push_job); +--- a/drivers/gpu/drm/scheduler/sched_main.c ++++ b/drivers/gpu/drm/scheduler/sched_main.c +@@ -917,12 +917,12 @@ static bool drm_sched_can_queue(struct d + } + + /** +- * drm_sched_wakeup_if_can_queue - Wake up the scheduler ++ * drm_sched_wakeup - Wake up the scheduler if it is ready to queue + * @sched: scheduler instance + * + * Wake up the scheduler if we can queue jobs. + */ +-void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched) ++void drm_sched_wakeup(struct drm_gpu_scheduler *sched) + { + if (drm_sched_can_queue(sched)) + drm_sched_run_job_queue(sched); +--- a/include/drm/gpu_scheduler.h ++++ b/include/drm/gpu_scheduler.h +@@ -558,7 +558,7 @@ void drm_sched_entity_modify_sched(struc + + void drm_sched_tdr_queue_imm(struct drm_gpu_scheduler *sched); + void drm_sched_job_cleanup(struct drm_sched_job *job); +-void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched); ++void drm_sched_wakeup(struct drm_gpu_scheduler *sched); + bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched); + void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched); + void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched); diff --git a/patches-6.6/034-26-v6.8-drm-sched-Qualify-drm_sched_wakeup-by.patch b/patches-6.6/034-26-v6.8-drm-sched-Qualify-drm_sched_wakeup-by.patch new file mode 100644 index 0000000..820c82f --- /dev/null +++ b/patches-6.6/034-26-v6.8-drm-sched-Qualify-drm_sched_wakeup-by.patch @@ -0,0 +1,69 @@ +From f3123c2590005c5ff631653d31428e40cd10c618 Mon Sep 17 00:00:00 2001 +From: Luben Tuikov +Date: Thu, 9 Nov 2023 18:53:26 -0500 +Subject: [PATCH] drm/sched: Qualify drm_sched_wakeup() by + drm_sched_entity_is_ready() + +Don't "wake up" the GPU scheduler unless the entity is ready, as well as we +can queue to the scheduler, i.e. there is no point in waking up the scheduler +for the entity unless the entity is ready. + +Signed-off-by: Luben Tuikov +Fixes: bc8d6a9df99038 ("drm/sched: Don't disturb the entity when in RR-mode scheduling") +Reviewed-by: Danilo Krummrich +Link: https://patchwork.freedesktop.org/patch/msgid/20231110000123.72565-2-ltuikov89@gmail.com +--- + drivers/gpu/drm/scheduler/sched_entity.c | 4 ++-- + drivers/gpu/drm/scheduler/sched_main.c | 8 +++++--- + include/drm/gpu_scheduler.h | 2 +- + 3 files changed, 8 insertions(+), 6 deletions(-) + +--- a/drivers/gpu/drm/scheduler/sched_entity.c ++++ b/drivers/gpu/drm/scheduler/sched_entity.c +@@ -370,7 +370,7 @@ static void drm_sched_entity_wakeup(stru + container_of(cb, struct drm_sched_entity, cb); + + drm_sched_entity_clear_dep(f, cb); +- drm_sched_wakeup(entity->rq->sched); ++ drm_sched_wakeup(entity->rq->sched, entity); + } + + /** +@@ -602,7 +602,7 @@ void drm_sched_entity_push_job(struct dr + if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) + drm_sched_rq_update_fifo(entity, submit_ts); + +- drm_sched_wakeup(entity->rq->sched); ++ drm_sched_wakeup(entity->rq->sched, entity); + } + } + EXPORT_SYMBOL(drm_sched_entity_push_job); +--- a/drivers/gpu/drm/scheduler/sched_main.c ++++ b/drivers/gpu/drm/scheduler/sched_main.c +@@ -922,10 +922,12 @@ static bool drm_sched_can_queue(struct d + * + * Wake up the scheduler if we can queue jobs. + */ +-void drm_sched_wakeup(struct drm_gpu_scheduler *sched) ++void drm_sched_wakeup(struct drm_gpu_scheduler *sched, ++ struct drm_sched_entity *entity) + { +- if (drm_sched_can_queue(sched)) +- drm_sched_run_job_queue(sched); ++ if (drm_sched_entity_is_ready(entity)) ++ if (drm_sched_can_queue(sched)) ++ drm_sched_run_job_queue(sched); + } + + /** +--- a/include/drm/gpu_scheduler.h ++++ b/include/drm/gpu_scheduler.h +@@ -558,7 +558,7 @@ void drm_sched_entity_modify_sched(struc + + void drm_sched_tdr_queue_imm(struct drm_gpu_scheduler *sched); + void drm_sched_job_cleanup(struct drm_sched_job *job); +-void drm_sched_wakeup(struct drm_gpu_scheduler *sched); ++void drm_sched_wakeup(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity); + bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched); + void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched); + void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched); diff --git a/patches-6.6/034-27-v6.8-drm-sched-implement-dynamic-job-flow-control.patch b/patches-6.6/034-27-v6.8-drm-sched-implement-dynamic-job-flow-control.patch new file mode 100644 index 0000000..926cc5f --- /dev/null +++ b/patches-6.6/034-27-v6.8-drm-sched-implement-dynamic-job-flow-control.patch @@ -0,0 +1,612 @@ +From a78422e9dff366b3a46ae44caf6ec8ded9c9fc2f Mon Sep 17 00:00:00 2001 +From: Danilo Krummrich +Date: Fri, 10 Nov 2023 01:16:33 +0100 +Subject: [PATCH] drm/sched: implement dynamic job-flow control + +Currently, job flow control is implemented simply by limiting the number +of jobs in flight. Therefore, a scheduler is initialized with a credit +limit that corresponds to the number of jobs which can be sent to the +hardware. + +This implies that for each job, drivers need to account for the maximum +job size possible in order to not overflow the ring buffer. + +However, there are drivers, such as Nouveau, where the job size has a +rather large range. For such drivers it can easily happen that job +submissions not even filling the ring by 1% can block subsequent +submissions, which, in the worst case, can lead to the ring run dry. + +In order to overcome this issue, allow for tracking the actual job size +instead of the number of jobs. Therefore, add a field to track a job's +credit count, which represents the number of credits a job contributes +to the scheduler's credit limit. + +Signed-off-by: Danilo Krummrich +Reviewed-by: Luben Tuikov +Link: https://patchwork.freedesktop.org/patch/msgid/20231110001638.71750-1-dakr@redhat.com +--- + Documentation/gpu/drm-mm.rst | 6 + + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 +- + drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 2 +- + drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 2 +- + drivers/gpu/drm/lima/lima_device.c | 2 +- + drivers/gpu/drm/lima/lima_sched.c | 2 +- + drivers/gpu/drm/msm/msm_gem_submit.c | 2 +- + drivers/gpu/drm/nouveau/nouveau_sched.c | 2 +- + drivers/gpu/drm/panfrost/panfrost_drv.c | 2 +- + drivers/gpu/drm/panfrost/panfrost_job.c | 2 +- + .../gpu/drm/scheduler/gpu_scheduler_trace.h | 2 +- + drivers/gpu/drm/scheduler/sched_main.c | 170 ++++++++++++++---- + drivers/gpu/drm/v3d/v3d_gem.c | 2 +- + include/drm/gpu_scheduler.h | 28 ++- + 14 files changed, 175 insertions(+), 51 deletions(-) + +--- a/Documentation/gpu/drm-mm.rst ++++ b/Documentation/gpu/drm-mm.rst +@@ -552,6 +552,12 @@ Overview + .. kernel-doc:: drivers/gpu/drm/scheduler/sched_main.c + :doc: Overview + ++Flow Control ++------------ ++ ++.. kernel-doc:: drivers/gpu/drm/scheduler/sched_main.c ++ :doc: Flow Control ++ + Scheduler Function References + ----------------------------- + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +@@ -115,7 +115,7 @@ int amdgpu_job_alloc(struct amdgpu_devic + if (!entity) + return 0; + +- return drm_sched_job_init(&(*job)->base, entity, owner); ++ return drm_sched_job_init(&(*job)->base, entity, 1, owner); + } + + int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, +--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c ++++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +@@ -535,7 +535,7 @@ int etnaviv_ioctl_gem_submit(struct drm_ + + ret = drm_sched_job_init(&submit->sched_job, + &ctx->sched_entity[args->pipe], +- submit->ctx); ++ 1, submit->ctx); + if (ret) + goto err_submit_put; + +--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c ++++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +@@ -1917,7 +1917,7 @@ static int etnaviv_gpu_rpm_suspend(struc + u32 idle, mask; + + /* If there are any jobs in the HW queue, we're not idle */ +- if (atomic_read(&gpu->sched.hw_rq_count)) ++ if (atomic_read(&gpu->sched.credit_count)) + return -EBUSY; + + /* Check whether the hardware (except FE and MC) is idle */ +--- a/drivers/gpu/drm/lima/lima_device.c ++++ b/drivers/gpu/drm/lima/lima_device.c +@@ -514,7 +514,7 @@ int lima_device_suspend(struct device *d + + /* check any task running */ + for (i = 0; i < lima_pipe_num; i++) { +- if (atomic_read(&ldev->pipe[i].base.hw_rq_count)) ++ if (atomic_read(&ldev->pipe[i].base.credit_count)) + return -EBUSY; + } + +--- a/drivers/gpu/drm/lima/lima_sched.c ++++ b/drivers/gpu/drm/lima/lima_sched.c +@@ -123,7 +123,7 @@ int lima_sched_task_init(struct lima_sch + for (i = 0; i < num_bos; i++) + drm_gem_object_get(&bos[i]->base.base); + +- err = drm_sched_job_init(&task->base, &context->base, vm); ++ err = drm_sched_job_init(&task->base, &context->base, 1, vm); + if (err) { + kfree(task->bos); + return err; +--- a/drivers/gpu/drm/msm/msm_gem_submit.c ++++ b/drivers/gpu/drm/msm/msm_gem_submit.c +@@ -48,7 +48,7 @@ static struct msm_gem_submit *submit_cre + return ERR_PTR(ret); + } + +- ret = drm_sched_job_init(&submit->base, queue->entity, queue); ++ ret = drm_sched_job_init(&submit->base, queue->entity, 1, queue); + if (ret) { + kfree(submit->hw_fence); + kfree(submit); +--- a/drivers/gpu/drm/nouveau/nouveau_sched.c ++++ b/drivers/gpu/drm/nouveau/nouveau_sched.c +@@ -89,7 +89,7 @@ nouveau_job_init(struct nouveau_job *job + + } + +- ret = drm_sched_job_init(&job->base, &entity->base, NULL); ++ ret = drm_sched_job_init(&job->base, &entity->base, 1, NULL); + if (ret) + goto err_free_chains; + +--- a/drivers/gpu/drm/panfrost/panfrost_drv.c ++++ b/drivers/gpu/drm/panfrost/panfrost_drv.c +@@ -272,7 +272,7 @@ static int panfrost_ioctl_submit(struct + + ret = drm_sched_job_init(&job->base, + &file_priv->sched_entity[slot], +- NULL); ++ 1, NULL); + if (ret) + goto out_put_job; + +--- a/drivers/gpu/drm/panfrost/panfrost_job.c ++++ b/drivers/gpu/drm/panfrost/panfrost_job.c +@@ -939,7 +939,7 @@ int panfrost_job_is_idle(struct panfrost + + for (i = 0; i < NUM_JOB_SLOTS; i++) { + /* If there are any jobs in the HW queue, we're not idle */ +- if (atomic_read(&js->queue[i].sched.hw_rq_count)) ++ if (atomic_read(&js->queue[i].sched.credit_count)) + return false; + } + +--- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h ++++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h +@@ -51,7 +51,7 @@ DECLARE_EVENT_CLASS(drm_sched_job, + __assign_str(name, sched_job->sched->name); + __entry->job_count = spsc_queue_count(&entity->job_queue); + __entry->hw_job_count = atomic_read( +- &sched_job->sched->hw_rq_count); ++ &sched_job->sched->credit_count); + ), + TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d", + __entry->entity, __entry->id, +--- a/drivers/gpu/drm/scheduler/sched_main.c ++++ b/drivers/gpu/drm/scheduler/sched_main.c +@@ -48,6 +48,30 @@ + * through the jobs entity pointer. + */ + ++/** ++ * DOC: Flow Control ++ * ++ * The DRM GPU scheduler provides a flow control mechanism to regulate the rate ++ * in which the jobs fetched from scheduler entities are executed. ++ * ++ * In this context the &drm_gpu_scheduler keeps track of a driver specified ++ * credit limit representing the capacity of this scheduler and a credit count; ++ * every &drm_sched_job carries a driver specified number of credits. ++ * ++ * Once a job is executed (but not yet finished), the job's credits contribute ++ * to the scheduler's credit count until the job is finished. If by executing ++ * one more job the scheduler's credit count would exceed the scheduler's ++ * credit limit, the job won't be executed. Instead, the scheduler will wait ++ * until the credit count has decreased enough to not overflow its credit limit. ++ * This implies waiting for previously executed jobs. ++ * ++ * Optionally, drivers may register a callback (update_job_credits) provided by ++ * struct drm_sched_backend_ops to update the job's credits dynamically. The ++ * scheduler executes this callback every time the scheduler considers a job for ++ * execution and subsequently checks whether the job fits the scheduler's credit ++ * limit. ++ */ ++ + #include + #include + #include +@@ -75,6 +99,51 @@ int drm_sched_policy = DRM_SCHED_POLICY_ + MODULE_PARM_DESC(sched_policy, "Specify the scheduling policy for entities on a run-queue, " __stringify(DRM_SCHED_POLICY_RR) " = Round Robin, " __stringify(DRM_SCHED_POLICY_FIFO) " = FIFO (default)."); + module_param_named(sched_policy, drm_sched_policy, int, 0444); + ++static u32 drm_sched_available_credits(struct drm_gpu_scheduler *sched) ++{ ++ u32 credits; ++ ++ drm_WARN_ON(sched, check_sub_overflow(sched->credit_limit, ++ atomic_read(&sched->credit_count), ++ &credits)); ++ ++ return credits; ++} ++ ++/** ++ * drm_sched_can_queue -- Can we queue more to the hardware? ++ * @sched: scheduler instance ++ * @entity: the scheduler entity ++ * ++ * Return true if we can push at least one more job from @entity, false ++ * otherwise. ++ */ ++static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched, ++ struct drm_sched_entity *entity) ++{ ++ struct drm_sched_job *s_job; ++ ++ s_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue)); ++ if (!s_job) ++ return false; ++ ++ if (sched->ops->update_job_credits) { ++ s_job->credits = sched->ops->update_job_credits(s_job); ++ ++ drm_WARN(sched, !s_job->credits, ++ "Jobs with zero credits bypass job-flow control.\n"); ++ } ++ ++ /* If a job exceeds the credit limit, truncate it to the credit limit ++ * itself to guarantee forward progress. ++ */ ++ if (drm_WARN(sched, s_job->credits > sched->credit_limit, ++ "Jobs may not exceed the credit limit, truncate.\n")) ++ s_job->credits = sched->credit_limit; ++ ++ return drm_sched_available_credits(sched) >= s_job->credits; ++} ++ + static __always_inline bool drm_sched_entity_compare_before(struct rb_node *a, + const struct rb_node *b) + { +@@ -186,12 +255,18 @@ void drm_sched_rq_remove_entity(struct d + /** + * drm_sched_rq_select_entity_rr - Select an entity which could provide a job to run + * ++ * @sched: the gpu scheduler + * @rq: scheduler run queue to check. + * +- * Try to find a ready entity, returns NULL if none found. ++ * Try to find the next ready entity. ++ * ++ * Return an entity if one is found; return an error-pointer (!NULL) if an ++ * entity was ready, but the scheduler had insufficient credits to accommodate ++ * its job; return NULL, if no ready entity was found. + */ + static struct drm_sched_entity * +-drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq) ++drm_sched_rq_select_entity_rr(struct drm_gpu_scheduler *sched, ++ struct drm_sched_rq *rq) + { + struct drm_sched_entity *entity; + +@@ -201,6 +276,14 @@ drm_sched_rq_select_entity_rr(struct drm + if (entity) { + list_for_each_entry_continue(entity, &rq->entities, list) { + if (drm_sched_entity_is_ready(entity)) { ++ /* If we can't queue yet, preserve the current ++ * entity in terms of fairness. ++ */ ++ if (!drm_sched_can_queue(sched, entity)) { ++ spin_unlock(&rq->lock); ++ return ERR_PTR(-ENOSPC); ++ } ++ + rq->current_entity = entity; + reinit_completion(&entity->entity_idle); + spin_unlock(&rq->lock); +@@ -210,8 +293,15 @@ drm_sched_rq_select_entity_rr(struct drm + } + + list_for_each_entry(entity, &rq->entities, list) { +- + if (drm_sched_entity_is_ready(entity)) { ++ /* If we can't queue yet, preserve the current entity in ++ * terms of fairness. ++ */ ++ if (!drm_sched_can_queue(sched, entity)) { ++ spin_unlock(&rq->lock); ++ return ERR_PTR(-ENOSPC); ++ } ++ + rq->current_entity = entity; + reinit_completion(&entity->entity_idle); + spin_unlock(&rq->lock); +@@ -230,12 +320,18 @@ drm_sched_rq_select_entity_rr(struct drm + /** + * drm_sched_rq_select_entity_fifo - Select an entity which provides a job to run + * ++ * @sched: the gpu scheduler + * @rq: scheduler run queue to check. + * +- * Find oldest waiting ready entity, returns NULL if none found. ++ * Find oldest waiting ready entity. ++ * ++ * Return an entity if one is found; return an error-pointer (!NULL) if an ++ * entity was ready, but the scheduler had insufficient credits to accommodate ++ * its job; return NULL, if no ready entity was found. + */ + static struct drm_sched_entity * +-drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq) ++drm_sched_rq_select_entity_fifo(struct drm_gpu_scheduler *sched, ++ struct drm_sched_rq *rq) + { + struct rb_node *rb; + +@@ -245,6 +341,14 @@ drm_sched_rq_select_entity_fifo(struct d + + entity = rb_entry(rb, struct drm_sched_entity, rb_tree_node); + if (drm_sched_entity_is_ready(entity)) { ++ /* If we can't queue yet, preserve the current entity in ++ * terms of fairness. ++ */ ++ if (!drm_sched_can_queue(sched, entity)) { ++ spin_unlock(&rq->lock); ++ return ERR_PTR(-ENOSPC); ++ } ++ + rq->current_entity = entity; + reinit_completion(&entity->entity_idle); + break; +@@ -302,7 +406,7 @@ static void drm_sched_job_done(struct dr + struct drm_sched_fence *s_fence = s_job->s_fence; + struct drm_gpu_scheduler *sched = s_fence->sched; + +- atomic_dec(&sched->hw_rq_count); ++ atomic_sub(s_job->credits, &sched->credit_count); + atomic_dec(sched->score); + + trace_drm_sched_process_job(s_fence); +@@ -519,7 +623,7 @@ void drm_sched_stop(struct drm_gpu_sched + &s_job->cb)) { + dma_fence_put(s_job->s_fence->parent); + s_job->s_fence->parent = NULL; +- atomic_dec(&sched->hw_rq_count); ++ atomic_sub(s_job->credits, &sched->credit_count); + } else { + /* + * remove job from pending_list. +@@ -580,7 +684,7 @@ void drm_sched_start(struct drm_gpu_sche + list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) { + struct dma_fence *fence = s_job->s_fence->parent; + +- atomic_inc(&sched->hw_rq_count); ++ atomic_add(s_job->credits, &sched->credit_count); + + if (!full_recovery) + continue; +@@ -664,6 +768,8 @@ EXPORT_SYMBOL(drm_sched_resubmit_jobs); + * drm_sched_job_init - init a scheduler job + * @job: scheduler job to init + * @entity: scheduler entity to use ++ * @credits: the number of credits this job contributes to the schedulers ++ * credit limit + * @owner: job owner for debugging + * + * Refer to drm_sched_entity_push_job() documentation +@@ -681,7 +787,7 @@ EXPORT_SYMBOL(drm_sched_resubmit_jobs); + */ + int drm_sched_job_init(struct drm_sched_job *job, + struct drm_sched_entity *entity, +- void *owner) ++ u32 credits, void *owner) + { + if (!entity->rq) { + /* This will most likely be followed by missing frames +@@ -692,7 +798,13 @@ int drm_sched_job_init(struct drm_sched_ + return -ENOENT; + } + ++ if (unlikely(!credits)) { ++ pr_err("*ERROR* %s: credits cannot be 0!\n", __func__); ++ return -EINVAL; ++ } ++ + job->entity = entity; ++ job->credits = credits; + job->s_fence = drm_sched_fence_alloc(entity, owner); + if (!job->s_fence) + return -ENOMEM; +@@ -905,20 +1017,9 @@ void drm_sched_job_cleanup(struct drm_sc + EXPORT_SYMBOL(drm_sched_job_cleanup); + + /** +- * drm_sched_can_queue -- Can we queue more to the hardware? +- * @sched: scheduler instance +- * +- * Return true if we can push more jobs to the hw, otherwise false. +- */ +-static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched) +-{ +- return atomic_read(&sched->hw_rq_count) < +- sched->hw_submission_limit; +-} +- +-/** + * drm_sched_wakeup - Wake up the scheduler if it is ready to queue + * @sched: scheduler instance ++ * @entity: the scheduler entity + * + * Wake up the scheduler if we can queue jobs. + */ +@@ -926,7 +1027,7 @@ void drm_sched_wakeup(struct drm_gpu_sch + struct drm_sched_entity *entity) + { + if (drm_sched_entity_is_ready(entity)) +- if (drm_sched_can_queue(sched)) ++ if (drm_sched_can_queue(sched, entity)) + drm_sched_run_job_queue(sched); + } + +@@ -935,7 +1036,11 @@ void drm_sched_wakeup(struct drm_gpu_sch + * + * @sched: scheduler instance + * +- * Returns the entity to process or NULL if none are found. ++ * Return an entity to process or NULL if none are found. ++ * ++ * Note, that we break out of the for-loop when "entity" is non-null, which can ++ * also be an error-pointer--this assures we don't process lower priority ++ * run-queues. See comments in the respectively called functions. + */ + static struct drm_sched_entity * + drm_sched_select_entity(struct drm_gpu_scheduler *sched) +@@ -943,19 +1048,16 @@ drm_sched_select_entity(struct drm_gpu_s + struct drm_sched_entity *entity; + int i; + +- if (!drm_sched_can_queue(sched)) +- return NULL; +- + /* Kernel run queue has higher priority than normal run queue*/ + for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { + entity = drm_sched_policy == DRM_SCHED_POLICY_FIFO ? +- drm_sched_rq_select_entity_fifo(sched->sched_rq[i]) : +- drm_sched_rq_select_entity_rr(sched->sched_rq[i]); ++ drm_sched_rq_select_entity_fifo(sched, sched->sched_rq[i]) : ++ drm_sched_rq_select_entity_rr(sched, sched->sched_rq[i]); + if (entity) + break; + } + +- return entity; ++ return IS_ERR(entity) ? NULL : entity; + } + + /** +@@ -1102,7 +1204,7 @@ static void drm_sched_run_job_work(struc + + s_fence = sched_job->s_fence; + +- atomic_inc(&sched->hw_rq_count); ++ atomic_add(sched_job->credits, &sched->credit_count); + drm_sched_job_begin(sched_job); + + trace_drm_run_job(sched_job, entity); +@@ -1137,7 +1239,7 @@ static void drm_sched_run_job_work(struc + * @submit_wq: workqueue to use for submission. If NULL, an ordered wq is + * allocated and used + * @num_rqs: number of runqueues, one for each priority, up to DRM_SCHED_PRIORITY_COUNT +- * @hw_submission: number of hw submissions that can be in flight ++ * @credit_limit: the number of credits this scheduler can hold from all jobs + * @hang_limit: number of times to allow a job to hang before dropping it + * @timeout: timeout value in jiffies for the scheduler + * @timeout_wq: workqueue to use for timeout work. If NULL, the system_wq is +@@ -1151,14 +1253,14 @@ static void drm_sched_run_job_work(struc + int drm_sched_init(struct drm_gpu_scheduler *sched, + const struct drm_sched_backend_ops *ops, + struct workqueue_struct *submit_wq, +- u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit, ++ u32 num_rqs, u32 credit_limit, unsigned int hang_limit, + long timeout, struct workqueue_struct *timeout_wq, + atomic_t *score, const char *name, struct device *dev) + { + int i, ret; + + sched->ops = ops; +- sched->hw_submission_limit = hw_submission; ++ sched->credit_limit = credit_limit; + sched->name = name; + sched->timeout = timeout; + sched->timeout_wq = timeout_wq ? : system_wq; +@@ -1207,7 +1309,7 @@ int drm_sched_init(struct drm_gpu_schedu + init_waitqueue_head(&sched->job_scheduled); + INIT_LIST_HEAD(&sched->pending_list); + spin_lock_init(&sched->job_list_lock); +- atomic_set(&sched->hw_rq_count, 0); ++ atomic_set(&sched->credit_count, 0); + INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); + INIT_WORK(&sched->work_run_job, drm_sched_run_job_work); + INIT_WORK(&sched->work_free_job, drm_sched_free_job_work); +--- a/drivers/gpu/drm/v3d/v3d_gem.c ++++ b/drivers/gpu/drm/v3d/v3d_gem.c +@@ -417,7 +417,7 @@ v3d_job_init(struct v3d_dev *v3d, struct + job->free = free; + + ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue], +- v3d_priv); ++ 1, v3d_priv); + if (ret) + goto fail; + +--- a/include/drm/gpu_scheduler.h ++++ b/include/drm/gpu_scheduler.h +@@ -320,6 +320,7 @@ struct drm_sched_fence *to_drm_sched_fen + * @sched: the scheduler instance on which this job is scheduled. + * @s_fence: contains the fences for the scheduling of job. + * @finish_cb: the callback for the finished fence. ++ * @credits: the number of credits this job contributes to the scheduler + * @work: Helper to reschdeule job kill to different context. + * @id: a unique id assigned to each job scheduled on the scheduler. + * @karma: increment on every hang caused by this job. If this exceeds the hang +@@ -339,6 +340,8 @@ struct drm_sched_job { + struct drm_gpu_scheduler *sched; + struct drm_sched_fence *s_fence; + ++ u32 credits; ++ + /* + * work is used only after finish_cb has been used and will not be + * accessed anymore. +@@ -462,13 +465,27 @@ struct drm_sched_backend_ops { + * and it's time to clean it up. + */ + void (*free_job)(struct drm_sched_job *sched_job); ++ ++ /** ++ * @update_job_credits: Called when the scheduler is considering this ++ * job for execution. ++ * ++ * This callback returns the number of credits the job would take if ++ * pushed to the hardware. Drivers may use this to dynamically update ++ * the job's credit count. For instance, deduct the number of credits ++ * for already signalled native fences. ++ * ++ * This callback is optional. ++ */ ++ u32 (*update_job_credits)(struct drm_sched_job *sched_job); + }; + + /** + * struct drm_gpu_scheduler - scheduler instance-specific data + * + * @ops: backend operations provided by the driver. +- * @hw_submission_limit: the max size of the hardware queue. ++ * @credit_limit: the credit limit of this scheduler ++ * @credit_count: the current credit count of this scheduler + * @timeout: the time after which a job is removed from the scheduler. + * @name: name of the ring for which this scheduler is being used. + * @num_rqs: Number of run-queues. This is at most DRM_SCHED_PRIORITY_COUNT, +@@ -477,7 +494,6 @@ struct drm_sched_backend_ops { + * @job_scheduled: once @drm_sched_entity_do_release is called the scheduler + * waits on this wait queue until all the scheduled jobs are + * finished. +- * @hw_rq_count: the number of jobs currently in the hardware queue. + * @job_id_count: used to assign unique id to the each job. + * @submit_wq: workqueue used to queue @work_run_job and @work_free_job + * @timeout_wq: workqueue used to queue @work_tdr +@@ -501,13 +517,13 @@ struct drm_sched_backend_ops { + */ + struct drm_gpu_scheduler { + const struct drm_sched_backend_ops *ops; +- uint32_t hw_submission_limit; ++ u32 credit_limit; ++ atomic_t credit_count; + long timeout; + const char *name; + u32 num_rqs; + struct drm_sched_rq **sched_rq; + wait_queue_head_t job_scheduled; +- atomic_t hw_rq_count; + atomic64_t job_id_count; + struct workqueue_struct *submit_wq; + struct workqueue_struct *timeout_wq; +@@ -529,14 +545,14 @@ struct drm_gpu_scheduler { + int drm_sched_init(struct drm_gpu_scheduler *sched, + const struct drm_sched_backend_ops *ops, + struct workqueue_struct *submit_wq, +- u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit, ++ u32 num_rqs, u32 credit_limit, unsigned int hang_limit, + long timeout, struct workqueue_struct *timeout_wq, + atomic_t *score, const char *name, struct device *dev); + + void drm_sched_fini(struct drm_gpu_scheduler *sched); + int drm_sched_job_init(struct drm_sched_job *job, + struct drm_sched_entity *entity, +- void *owner); ++ u32 credits, void *owner); + void drm_sched_job_arm(struct drm_sched_job *job); + int drm_sched_job_add_dependency(struct drm_sched_job *job, + struct dma_fence *fence); diff --git a/patches-6.6/034-28-v6.8-iommu-Allow-passing-custom-allocators-to-pgtable-drivers.patch b/patches-6.6/034-28-v6.8-iommu-Allow-passing-custom-allocators-to-pgtable-drivers.patch new file mode 100644 index 0000000..f875da8 --- /dev/null +++ b/patches-6.6/034-28-v6.8-iommu-Allow-passing-custom-allocators-to-pgtable-drivers.patch @@ -0,0 +1,129 @@ +From 17b226dcf80ce79d02f4f0b08813d8848885b986 Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Fri, 24 Nov 2023 15:24:33 +0100 +Subject: [PATCH] iommu: Allow passing custom allocators to pgtable drivers + +This will be useful for GPU drivers who want to keep page tables in a +pool so they can: + +- keep freed page tables in a free pool and speed-up upcoming page + table allocations +- batch page table allocation instead of allocating one page at a time +- pre-reserve pages for page tables needed for map/unmap operations, + to ensure map/unmap operations don't try to allocate memory in paths + they're allowed to block or fail + +It might also be valuable for other aspects of GPU and similar +use-cases, like fine-grained memory accounting and resource limiting. + +We will extend the Arm LPAE format to support custom allocators in a +separate commit. + +Signed-off-by: Boris Brezillon +Reviewed-by: Steven Price +Reviewed-by: Robin Murphy +Link: https://lore.kernel.org/r/20231124142434.1577550-2-boris.brezillon@collabora.com +Signed-off-by: Joerg Roedel +--- + drivers/iommu/io-pgtable.c | 23 +++++++++++++++++++++++ + include/linux/io-pgtable.h | 34 ++++++++++++++++++++++++++++++++++ + 2 files changed, 57 insertions(+) + +--- a/drivers/iommu/io-pgtable.c ++++ b/drivers/iommu/io-pgtable.c +@@ -34,6 +34,26 @@ io_pgtable_init_table[IO_PGTABLE_NUM_FMT + #endif + }; + ++static int check_custom_allocator(enum io_pgtable_fmt fmt, ++ struct io_pgtable_cfg *cfg) ++{ ++ /* No custom allocator, no need to check the format. */ ++ if (!cfg->alloc && !cfg->free) ++ return 0; ++ ++ /* When passing a custom allocator, both the alloc and free ++ * functions should be provided. ++ */ ++ if (!cfg->alloc || !cfg->free) ++ return -EINVAL; ++ ++ /* Make sure the format supports custom allocators. */ ++ if (io_pgtable_init_table[fmt]->caps & IO_PGTABLE_CAP_CUSTOM_ALLOCATOR) ++ return 0; ++ ++ return -EINVAL; ++} ++ + struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt, + struct io_pgtable_cfg *cfg, + void *cookie) +@@ -44,6 +64,9 @@ struct io_pgtable_ops *alloc_io_pgtable_ + if (fmt >= IO_PGTABLE_NUM_FMTS) + return NULL; + ++ if (check_custom_allocator(fmt, cfg)) ++ return NULL; ++ + fns = io_pgtable_init_table[fmt]; + if (!fns) + return NULL; +--- a/include/linux/io-pgtable.h ++++ b/include/linux/io-pgtable.h +@@ -100,6 +100,30 @@ struct io_pgtable_cfg { + const struct iommu_flush_ops *tlb; + struct device *iommu_dev; + ++ /** ++ * @alloc: Custom page allocator. ++ * ++ * Optional hook used to allocate page tables. If this function is NULL, ++ * @free must be NULL too. ++ * ++ * Memory returned should be zeroed and suitable for dma_map_single() and ++ * virt_to_phys(). ++ * ++ * Not all formats support custom page allocators. Before considering ++ * passing a non-NULL value, make sure the chosen page format supports ++ * this feature. ++ */ ++ void *(*alloc)(void *cookie, size_t size, gfp_t gfp); ++ ++ /** ++ * @free: Custom page de-allocator. ++ * ++ * Optional hook used to free page tables allocated with the @alloc ++ * hook. Must be non-NULL if @alloc is not NULL, must be NULL ++ * otherwise. ++ */ ++ void (*free)(void *cookie, void *pages, size_t size); ++ + /* Low-level data specific to the table format */ + union { + struct { +@@ -238,15 +262,25 @@ io_pgtable_tlb_add_page(struct io_pgtabl + } + + /** ++ * enum io_pgtable_caps - IO page table backend capabilities. ++ */ ++enum io_pgtable_caps { ++ /** @IO_PGTABLE_CAP_CUSTOM_ALLOCATOR: Backend accepts custom page table allocators. */ ++ IO_PGTABLE_CAP_CUSTOM_ALLOCATOR = BIT(0), ++}; ++ ++/** + * struct io_pgtable_init_fns - Alloc/free a set of page tables for a + * particular format. + * + * @alloc: Allocate a set of page tables described by cfg. + * @free: Free the page tables associated with iop. ++ * @caps: Combination of @io_pgtable_caps flags encoding the backend capabilities. + */ + struct io_pgtable_init_fns { + struct io_pgtable *(*alloc)(struct io_pgtable_cfg *cfg, void *cookie); + void (*free)(struct io_pgtable *iop); ++ u32 caps; + }; + + extern struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns; diff --git a/patches-6.6/034-29-v6.9-drm-exec-drm-gpuvm-Prefer-u32-over-uint32_t.patch b/patches-6.6/034-29-v6.9-drm-exec-drm-gpuvm-Prefer-u32-over-uint32_t.patch new file mode 100644 index 0000000..6917137 --- /dev/null +++ b/patches-6.6/034-29-v6.9-drm-exec-drm-gpuvm-Prefer-u32-over-uint32_t.patch @@ -0,0 +1,68 @@ +From cf41cebf9dc8143ca7bb0aabb7e0053e16f0515a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= +Date: Fri, 19 Jan 2024 10:05:57 +0100 +Subject: [PATCH] drm/exec, drm/gpuvm: Prefer u32 over uint32_t +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The relatively recently introduced drm/exec utility was using uint32_t +in its interface, which was then also carried over to drm/gpuvm. + +Prefer u32 in new code and update drm/exec and drm/gpuvm accordingly. + +Cc: Christian König +Cc: Danilo Krummrich +Signed-off-by: Thomas Hellström +Reviewed-by: Christian König +Reviewed-by: Danilo Krummrich +Reviewed-by: Lucas De Marchi +Link: https://patchwork.freedesktop.org/patch/msgid/20240119090557.6360-1-thomas.hellstrom@linux.intel.com +--- + drivers/gpu/drm/drm_exec.c | 2 +- + include/drm/drm_exec.h | 4 ++-- + include/drm/drm_gpuvm.h | 2 +- + 3 files changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/gpu/drm/drm_exec.c ++++ b/drivers/gpu/drm/drm_exec.c +@@ -72,7 +72,7 @@ static void drm_exec_unlock_all(struct d + * + * Initialize the object and make sure that we can track locked objects. + */ +-void drm_exec_init(struct drm_exec *exec, uint32_t flags) ++void drm_exec_init(struct drm_exec *exec, u32 flags) + { + exec->flags = flags; + exec->objects = kmalloc(PAGE_SIZE, GFP_KERNEL); +--- a/include/drm/drm_exec.h ++++ b/include/drm/drm_exec.h +@@ -18,7 +18,7 @@ struct drm_exec { + /** + * @flags: Flags to control locking behavior + */ +- uint32_t flags; ++ u32 flags; + + /** + * @ticket: WW ticket used for acquiring locks +@@ -135,7 +135,7 @@ static inline bool drm_exec_is_contended + return !!exec->contended; + } + +-void drm_exec_init(struct drm_exec *exec, uint32_t flags); ++void drm_exec_init(struct drm_exec *exec, u32 flags); + void drm_exec_fini(struct drm_exec *exec); + bool drm_exec_cleanup(struct drm_exec *exec); + int drm_exec_lock_obj(struct drm_exec *exec, struct drm_gem_object *obj); +--- a/include/drm/drm_gpuvm.h ++++ b/include/drm/drm_gpuvm.h +@@ -514,7 +514,7 @@ struct drm_gpuvm_exec { + /** + * @flags: the flags for the struct drm_exec + */ +- uint32_t flags; ++ u32 flags; + + /** + * @vm: the &drm_gpuvm to lock its DMA reservations diff --git a/patches-6.6/034-30-v6.10-drm-panthor-Add-uAPI.patch b/patches-6.6/034-30-v6.10-drm-panthor-Add-uAPI.patch new file mode 100644 index 0000000..e6f9bee --- /dev/null +++ b/patches-6.6/034-30-v6.10-drm-panthor-Add-uAPI.patch @@ -0,0 +1,1024 @@ +From 0f25e493a2462dbdd2e34f4e100405380cc0201a Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Thu, 29 Feb 2024 17:22:15 +0100 +Subject: [PATCH] drm/panthor: Add uAPI + +Panthor follows the lead of other recently submitted drivers with +ioctls allowing us to support modern Vulkan features, like sparse memory +binding: + +- Pretty standard GEM management ioctls (BO_CREATE and BO_MMAP_OFFSET), + with the 'exclusive-VM' bit to speed-up BO reservation on job submission +- VM management ioctls (VM_CREATE, VM_DESTROY and VM_BIND). The VM_BIND + ioctl is loosely based on the Xe model, and can handle both + asynchronous and synchronous requests +- GPU execution context creation/destruction, tiler heap context creation + and job submission. Those ioctls reflect how the hardware/scheduler + works and are thus driver specific. + +We also have a way to expose IO regions, such that the usermode driver +can directly access specific/well-isolate registers, like the +LATEST_FLUSH register used to implement cache-flush reduction. + +This uAPI intentionally keeps usermode queues out of the scope, which +explains why doorbell registers and command stream ring-buffers are not +directly exposed to userspace. + +v6: +- Add Maxime's and Heiko's acks + +v5: +- Fix typo +- Add Liviu's R-b + +v4: +- Add a VM_GET_STATE ioctl +- Fix doc +- Expose the CORE_FEATURES register so we can deal with variants in the + UMD +- Add Steve's R-b + +v3: +- Add the concept of sync-only VM operation +- Fix support for 32-bit userspace +- Rework drm_panthor_vm_create to pass the user VA size instead of + the kernel VA size (suggested by Robin Murphy) +- Typo fixes +- Explicitly cast enums with top bit set to avoid compiler warnings in + -pedantic mode. +- Drop property core_group_count as it can be easily calculated by the + number of bits set in l2_present. + +Co-developed-by: Steven Price +Signed-off-by: Steven Price +Signed-off-by: Boris Brezillon +Reviewed-by: Steven Price +Reviewed-by: Liviu Dudau +Acked-by: Maxime Ripard +Acked-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-2-boris.brezillon@collabora.com +--- + Documentation/gpu/driver-uapi.rst | 5 + + include/uapi/drm/panthor_drm.h | 945 ++++++++++++++++++++++++++++++ + 2 files changed, 950 insertions(+) + create mode 100644 include/uapi/drm/panthor_drm.h + +--- a/Documentation/gpu/driver-uapi.rst ++++ b/Documentation/gpu/driver-uapi.rst +@@ -17,3 +17,8 @@ VM_BIND / EXEC uAPI + :doc: Overview + + .. kernel-doc:: include/uapi/drm/nouveau_drm.h ++ ++drm/panthor uAPI ++================ ++ ++.. kernel-doc:: include/uapi/drm/panthor_drm.h +--- /dev/null ++++ b/include/uapi/drm/panthor_drm.h +@@ -0,0 +1,945 @@ ++/* SPDX-License-Identifier: MIT */ ++/* Copyright (C) 2023 Collabora ltd. */ ++#ifndef _PANTHOR_DRM_H_ ++#define _PANTHOR_DRM_H_ ++ ++#include "drm.h" ++ ++#if defined(__cplusplus) ++extern "C" { ++#endif ++ ++/** ++ * DOC: Introduction ++ * ++ * This documentation describes the Panthor IOCTLs. ++ * ++ * Just a few generic rules about the data passed to the Panthor IOCTLs: ++ * ++ * - Structures must be aligned on 64-bit/8-byte. If the object is not ++ * naturally aligned, a padding field must be added. ++ * - Fields must be explicitly aligned to their natural type alignment with ++ * pad[0..N] fields. ++ * - All padding fields will be checked by the driver to make sure they are ++ * zeroed. ++ * - Flags can be added, but not removed/replaced. ++ * - New fields can be added to the main structures (the structures ++ * directly passed to the ioctl). Those fields can be added at the end of ++ * the structure, or replace existing padding fields. Any new field being ++ * added must preserve the behavior that existed before those fields were ++ * added when a value of zero is passed. ++ * - New fields can be added to indirect objects (objects pointed by the ++ * main structure), iff those objects are passed a size to reflect the ++ * size known by the userspace driver (see drm_panthor_obj_array::stride ++ * or drm_panthor_dev_query::size). ++ * - If the kernel driver is too old to know some fields, those will be ++ * ignored if zero, and otherwise rejected (and so will be zero on output). ++ * - If userspace is too old to know some fields, those will be zeroed ++ * (input) before the structure is parsed by the kernel driver. ++ * - Each new flag/field addition must come with a driver version update so ++ * the userspace driver doesn't have to trial and error to know which ++ * flags are supported. ++ * - Structures should not contain unions, as this would defeat the ++ * extensibility of such structures. ++ * - IOCTLs can't be removed or replaced. New IOCTL IDs should be placed ++ * at the end of the drm_panthor_ioctl_id enum. ++ */ ++ ++/** ++ * DOC: MMIO regions exposed to userspace. ++ * ++ * .. c:macro:: DRM_PANTHOR_USER_MMIO_OFFSET ++ * ++ * File offset for all MMIO regions being exposed to userspace. Don't use ++ * this value directly, use DRM_PANTHOR_USER__OFFSET values instead. ++ * pgoffset passed to mmap2() is an unsigned long, which forces us to use a ++ * different offset on 32-bit and 64-bit systems. ++ * ++ * .. c:macro:: DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET ++ * ++ * File offset for the LATEST_FLUSH_ID register. The Userspace driver controls ++ * GPU cache flushing through CS instructions, but the flush reduction ++ * mechanism requires a flush_id. This flush_id could be queried with an ++ * ioctl, but Arm provides a well-isolated register page containing only this ++ * read-only register, so let's expose this page through a static mmap offset ++ * and allow direct mapping of this MMIO region so we can avoid the ++ * user <-> kernel round-trip. ++ */ ++#define DRM_PANTHOR_USER_MMIO_OFFSET_32BIT (1ull << 43) ++#define DRM_PANTHOR_USER_MMIO_OFFSET_64BIT (1ull << 56) ++#define DRM_PANTHOR_USER_MMIO_OFFSET (sizeof(unsigned long) < 8 ? \ ++ DRM_PANTHOR_USER_MMIO_OFFSET_32BIT : \ ++ DRM_PANTHOR_USER_MMIO_OFFSET_64BIT) ++#define DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET (DRM_PANTHOR_USER_MMIO_OFFSET | 0) ++ ++/** ++ * DOC: IOCTL IDs ++ * ++ * enum drm_panthor_ioctl_id - IOCTL IDs ++ * ++ * Place new ioctls at the end, don't re-order, don't replace or remove entries. ++ * ++ * These IDs are not meant to be used directly. Use the DRM_IOCTL_PANTHOR_xxx ++ * definitions instead. ++ */ ++enum drm_panthor_ioctl_id { ++ /** @DRM_PANTHOR_DEV_QUERY: Query device information. */ ++ DRM_PANTHOR_DEV_QUERY = 0, ++ ++ /** @DRM_PANTHOR_VM_CREATE: Create a VM. */ ++ DRM_PANTHOR_VM_CREATE, ++ ++ /** @DRM_PANTHOR_VM_DESTROY: Destroy a VM. */ ++ DRM_PANTHOR_VM_DESTROY, ++ ++ /** @DRM_PANTHOR_VM_BIND: Bind/unbind memory to a VM. */ ++ DRM_PANTHOR_VM_BIND, ++ ++ /** @DRM_PANTHOR_VM_GET_STATE: Get VM state. */ ++ DRM_PANTHOR_VM_GET_STATE, ++ ++ /** @DRM_PANTHOR_BO_CREATE: Create a buffer object. */ ++ DRM_PANTHOR_BO_CREATE, ++ ++ /** ++ * @DRM_PANTHOR_BO_MMAP_OFFSET: Get the file offset to pass to ++ * mmap to map a GEM object. ++ */ ++ DRM_PANTHOR_BO_MMAP_OFFSET, ++ ++ /** @DRM_PANTHOR_GROUP_CREATE: Create a scheduling group. */ ++ DRM_PANTHOR_GROUP_CREATE, ++ ++ /** @DRM_PANTHOR_GROUP_DESTROY: Destroy a scheduling group. */ ++ DRM_PANTHOR_GROUP_DESTROY, ++ ++ /** ++ * @DRM_PANTHOR_GROUP_SUBMIT: Submit jobs to queues belonging ++ * to a specific scheduling group. ++ */ ++ DRM_PANTHOR_GROUP_SUBMIT, ++ ++ /** @DRM_PANTHOR_GROUP_GET_STATE: Get the state of a scheduling group. */ ++ DRM_PANTHOR_GROUP_GET_STATE, ++ ++ /** @DRM_PANTHOR_TILER_HEAP_CREATE: Create a tiler heap. */ ++ DRM_PANTHOR_TILER_HEAP_CREATE, ++ ++ /** @DRM_PANTHOR_TILER_HEAP_DESTROY: Destroy a tiler heap. */ ++ DRM_PANTHOR_TILER_HEAP_DESTROY, ++}; ++ ++/** ++ * DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number ++ * @__access: Access type. Must be R, W or RW. ++ * @__id: One of the DRM_PANTHOR_xxx id. ++ * @__type: Suffix of the type being passed to the IOCTL. ++ * ++ * Don't use this macro directly, use the DRM_IOCTL_PANTHOR_xxx ++ * values instead. ++ * ++ * Return: An IOCTL number to be passed to ioctl() from userspace. ++ */ ++#define DRM_IOCTL_PANTHOR(__access, __id, __type) \ ++ DRM_IO ## __access(DRM_COMMAND_BASE + DRM_PANTHOR_ ## __id, \ ++ struct drm_panthor_ ## __type) ++ ++#define DRM_IOCTL_PANTHOR_DEV_QUERY \ ++ DRM_IOCTL_PANTHOR(WR, DEV_QUERY, dev_query) ++#define DRM_IOCTL_PANTHOR_VM_CREATE \ ++ DRM_IOCTL_PANTHOR(WR, VM_CREATE, vm_create) ++#define DRM_IOCTL_PANTHOR_VM_DESTROY \ ++ DRM_IOCTL_PANTHOR(WR, VM_DESTROY, vm_destroy) ++#define DRM_IOCTL_PANTHOR_VM_BIND \ ++ DRM_IOCTL_PANTHOR(WR, VM_BIND, vm_bind) ++#define DRM_IOCTL_PANTHOR_VM_GET_STATE \ ++ DRM_IOCTL_PANTHOR(WR, VM_GET_STATE, vm_get_state) ++#define DRM_IOCTL_PANTHOR_BO_CREATE \ ++ DRM_IOCTL_PANTHOR(WR, BO_CREATE, bo_create) ++#define DRM_IOCTL_PANTHOR_BO_MMAP_OFFSET \ ++ DRM_IOCTL_PANTHOR(WR, BO_MMAP_OFFSET, bo_mmap_offset) ++#define DRM_IOCTL_PANTHOR_GROUP_CREATE \ ++ DRM_IOCTL_PANTHOR(WR, GROUP_CREATE, group_create) ++#define DRM_IOCTL_PANTHOR_GROUP_DESTROY \ ++ DRM_IOCTL_PANTHOR(WR, GROUP_DESTROY, group_destroy) ++#define DRM_IOCTL_PANTHOR_GROUP_SUBMIT \ ++ DRM_IOCTL_PANTHOR(WR, GROUP_SUBMIT, group_submit) ++#define DRM_IOCTL_PANTHOR_GROUP_GET_STATE \ ++ DRM_IOCTL_PANTHOR(WR, GROUP_GET_STATE, group_get_state) ++#define DRM_IOCTL_PANTHOR_TILER_HEAP_CREATE \ ++ DRM_IOCTL_PANTHOR(WR, TILER_HEAP_CREATE, tiler_heap_create) ++#define DRM_IOCTL_PANTHOR_TILER_HEAP_DESTROY \ ++ DRM_IOCTL_PANTHOR(WR, TILER_HEAP_DESTROY, tiler_heap_destroy) ++ ++/** ++ * DOC: IOCTL arguments ++ */ ++ ++/** ++ * struct drm_panthor_obj_array - Object array. ++ * ++ * This object is used to pass an array of objects whose size is subject to changes in ++ * future versions of the driver. In order to support this mutability, we pass a stride ++ * describing the size of the object as known by userspace. ++ * ++ * You shouldn't fill drm_panthor_obj_array fields directly. You should instead use ++ * the DRM_PANTHOR_OBJ_ARRAY() macro that takes care of initializing the stride to ++ * the object size. ++ */ ++struct drm_panthor_obj_array { ++ /** @stride: Stride of object struct. Used for versioning. */ ++ __u32 stride; ++ ++ /** @count: Number of objects in the array. */ ++ __u32 count; ++ ++ /** @array: User pointer to an array of objects. */ ++ __u64 array; ++}; ++ ++/** ++ * DRM_PANTHOR_OBJ_ARRAY() - Initialize a drm_panthor_obj_array field. ++ * @cnt: Number of elements in the array. ++ * @ptr: Pointer to the array to pass to the kernel. ++ * ++ * Macro initializing a drm_panthor_obj_array based on the object size as known ++ * by userspace. ++ */ ++#define DRM_PANTHOR_OBJ_ARRAY(cnt, ptr) \ ++ { .stride = sizeof((ptr)[0]), .count = (cnt), .array = (__u64)(uintptr_t)(ptr) } ++ ++/** ++ * enum drm_panthor_sync_op_flags - Synchronization operation flags. ++ */ ++enum drm_panthor_sync_op_flags { ++ /** @DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK: Synchronization handle type mask. */ ++ DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK = 0xff, ++ ++ /** @DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ: Synchronization object type. */ ++ DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ = 0, ++ ++ /** ++ * @DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ: Timeline synchronization ++ * object type. ++ */ ++ DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ = 1, ++ ++ /** @DRM_PANTHOR_SYNC_OP_WAIT: Wait operation. */ ++ DRM_PANTHOR_SYNC_OP_WAIT = 0 << 31, ++ ++ /** @DRM_PANTHOR_SYNC_OP_SIGNAL: Signal operation. */ ++ DRM_PANTHOR_SYNC_OP_SIGNAL = (int)(1u << 31), ++}; ++ ++/** ++ * struct drm_panthor_sync_op - Synchronization operation. ++ */ ++struct drm_panthor_sync_op { ++ /** @flags: Synchronization operation flags. Combination of DRM_PANTHOR_SYNC_OP values. */ ++ __u32 flags; ++ ++ /** @handle: Sync handle. */ ++ __u32 handle; ++ ++ /** ++ * @timeline_value: MBZ if ++ * (flags & DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK) != ++ * DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ. ++ */ ++ __u64 timeline_value; ++}; ++ ++/** ++ * enum drm_panthor_dev_query_type - Query type ++ * ++ * Place new types at the end, don't re-order, don't remove or replace. ++ */ ++enum drm_panthor_dev_query_type { ++ /** @DRM_PANTHOR_DEV_QUERY_GPU_INFO: Query GPU information. */ ++ DRM_PANTHOR_DEV_QUERY_GPU_INFO = 0, ++ ++ /** @DRM_PANTHOR_DEV_QUERY_CSIF_INFO: Query command-stream interface information. */ ++ DRM_PANTHOR_DEV_QUERY_CSIF_INFO, ++}; ++ ++/** ++ * struct drm_panthor_gpu_info - GPU information ++ * ++ * Structure grouping all queryable information relating to the GPU. ++ */ ++struct drm_panthor_gpu_info { ++ /** @gpu_id : GPU ID. */ ++ __u32 gpu_id; ++#define DRM_PANTHOR_ARCH_MAJOR(x) ((x) >> 28) ++#define DRM_PANTHOR_ARCH_MINOR(x) (((x) >> 24) & 0xf) ++#define DRM_PANTHOR_ARCH_REV(x) (((x) >> 20) & 0xf) ++#define DRM_PANTHOR_PRODUCT_MAJOR(x) (((x) >> 16) & 0xf) ++#define DRM_PANTHOR_VERSION_MAJOR(x) (((x) >> 12) & 0xf) ++#define DRM_PANTHOR_VERSION_MINOR(x) (((x) >> 4) & 0xff) ++#define DRM_PANTHOR_VERSION_STATUS(x) ((x) & 0xf) ++ ++ /** @gpu_rev: GPU revision. */ ++ __u32 gpu_rev; ++ ++ /** @csf_id: Command stream frontend ID. */ ++ __u32 csf_id; ++#define DRM_PANTHOR_CSHW_MAJOR(x) (((x) >> 26) & 0x3f) ++#define DRM_PANTHOR_CSHW_MINOR(x) (((x) >> 20) & 0x3f) ++#define DRM_PANTHOR_CSHW_REV(x) (((x) >> 16) & 0xf) ++#define DRM_PANTHOR_MCU_MAJOR(x) (((x) >> 10) & 0x3f) ++#define DRM_PANTHOR_MCU_MINOR(x) (((x) >> 4) & 0x3f) ++#define DRM_PANTHOR_MCU_REV(x) ((x) & 0xf) ++ ++ /** @l2_features: L2-cache features. */ ++ __u32 l2_features; ++ ++ /** @tiler_features: Tiler features. */ ++ __u32 tiler_features; ++ ++ /** @mem_features: Memory features. */ ++ __u32 mem_features; ++ ++ /** @mmu_features: MMU features. */ ++ __u32 mmu_features; ++#define DRM_PANTHOR_MMU_VA_BITS(x) ((x) & 0xff) ++ ++ /** @thread_features: Thread features. */ ++ __u32 thread_features; ++ ++ /** @max_threads: Maximum number of threads. */ ++ __u32 max_threads; ++ ++ /** @thread_max_workgroup_size: Maximum workgroup size. */ ++ __u32 thread_max_workgroup_size; ++ ++ /** ++ * @thread_max_barrier_size: Maximum number of threads that can wait ++ * simultaneously on a barrier. ++ */ ++ __u32 thread_max_barrier_size; ++ ++ /** @coherency_features: Coherency features. */ ++ __u32 coherency_features; ++ ++ /** @texture_features: Texture features. */ ++ __u32 texture_features[4]; ++ ++ /** @as_present: Bitmask encoding the number of address-space exposed by the MMU. */ ++ __u32 as_present; ++ ++ /** @shader_present: Bitmask encoding the shader cores exposed by the GPU. */ ++ __u64 shader_present; ++ ++ /** @l2_present: Bitmask encoding the L2 caches exposed by the GPU. */ ++ __u64 l2_present; ++ ++ /** @tiler_present: Bitmask encoding the tiler units exposed by the GPU. */ ++ __u64 tiler_present; ++ ++ /* @core_features: Used to discriminate core variants when they exist. */ ++ __u32 core_features; ++ ++ /* @pad: MBZ. */ ++ __u32 pad; ++}; ++ ++/** ++ * struct drm_panthor_csif_info - Command stream interface information ++ * ++ * Structure grouping all queryable information relating to the command stream interface. ++ */ ++struct drm_panthor_csif_info { ++ /** @csg_slot_count: Number of command stream group slots exposed by the firmware. */ ++ __u32 csg_slot_count; ++ ++ /** @cs_slot_count: Number of command stream slots per group. */ ++ __u32 cs_slot_count; ++ ++ /** @cs_reg_count: Number of command stream registers. */ ++ __u32 cs_reg_count; ++ ++ /** @scoreboard_slot_count: Number of scoreboard slots. */ ++ __u32 scoreboard_slot_count; ++ ++ /** ++ * @unpreserved_cs_reg_count: Number of command stream registers reserved by ++ * the kernel driver to call a userspace command stream. ++ * ++ * All registers can be used by a userspace command stream, but the ++ * [cs_slot_count - unpreserved_cs_reg_count .. cs_slot_count] registers are ++ * used by the kernel when DRM_PANTHOR_IOCTL_GROUP_SUBMIT is called. ++ */ ++ __u32 unpreserved_cs_reg_count; ++ ++ /** ++ * @pad: Padding field, set to zero. ++ */ ++ __u32 pad; ++}; ++ ++/** ++ * struct drm_panthor_dev_query - Arguments passed to DRM_PANTHOR_IOCTL_DEV_QUERY ++ */ ++struct drm_panthor_dev_query { ++ /** @type: the query type (see drm_panthor_dev_query_type). */ ++ __u32 type; ++ ++ /** ++ * @size: size of the type being queried. ++ * ++ * If pointer is NULL, size is updated by the driver to provide the ++ * output structure size. If pointer is not NULL, the driver will ++ * only copy min(size, actual_structure_size) bytes to the pointer, ++ * and update the size accordingly. This allows us to extend query ++ * types without breaking userspace. ++ */ ++ __u32 size; ++ ++ /** ++ * @pointer: user pointer to a query type struct. ++ * ++ * Pointer can be NULL, in which case, nothing is copied, but the ++ * actual structure size is returned. If not NULL, it must point to ++ * a location that's large enough to hold size bytes. ++ */ ++ __u64 pointer; ++}; ++ ++/** ++ * struct drm_panthor_vm_create - Arguments passed to DRM_PANTHOR_IOCTL_VM_CREATE ++ */ ++struct drm_panthor_vm_create { ++ /** @flags: VM flags, MBZ. */ ++ __u32 flags; ++ ++ /** @id: Returned VM ID. */ ++ __u32 id; ++ ++ /** ++ * @user_va_range: Size of the VA space reserved for user objects. ++ * ++ * The kernel will pick the remaining space to map kernel-only objects to the ++ * VM (heap chunks, heap context, ring buffers, kernel synchronization objects, ++ * ...). If the space left for kernel objects is too small, kernel object ++ * allocation will fail further down the road. One can use ++ * drm_panthor_gpu_info::mmu_features to extract the total virtual address ++ * range, and chose a user_va_range that leaves some space to the kernel. ++ * ++ * If user_va_range is zero, the kernel will pick a sensible value based on ++ * TASK_SIZE and the virtual range supported by the GPU MMU (the kernel/user ++ * split should leave enough VA space for userspace processes to support SVM, ++ * while still allowing the kernel to map some amount of kernel objects in ++ * the kernel VA range). The value chosen by the driver will be returned in ++ * @user_va_range. ++ * ++ * User VA space always starts at 0x0, kernel VA space is always placed after ++ * the user VA range. ++ */ ++ __u64 user_va_range; ++}; ++ ++/** ++ * struct drm_panthor_vm_destroy - Arguments passed to DRM_PANTHOR_IOCTL_VM_DESTROY ++ */ ++struct drm_panthor_vm_destroy { ++ /** @id: ID of the VM to destroy. */ ++ __u32 id; ++ ++ /** @pad: MBZ. */ ++ __u32 pad; ++}; ++ ++/** ++ * enum drm_panthor_vm_bind_op_flags - VM bind operation flags ++ */ ++enum drm_panthor_vm_bind_op_flags { ++ /** ++ * @DRM_PANTHOR_VM_BIND_OP_MAP_READONLY: Map the memory read-only. ++ * ++ * Only valid with DRM_PANTHOR_VM_BIND_OP_TYPE_MAP. ++ */ ++ DRM_PANTHOR_VM_BIND_OP_MAP_READONLY = 1 << 0, ++ ++ /** ++ * @DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC: Map the memory not-executable. ++ * ++ * Only valid with DRM_PANTHOR_VM_BIND_OP_TYPE_MAP. ++ */ ++ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC = 1 << 1, ++ ++ /** ++ * @DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED: Map the memory uncached. ++ * ++ * Only valid with DRM_PANTHOR_VM_BIND_OP_TYPE_MAP. ++ */ ++ DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED = 1 << 2, ++ ++ /** ++ * @DRM_PANTHOR_VM_BIND_OP_TYPE_MASK: Mask used to determine the type of operation. ++ */ ++ DRM_PANTHOR_VM_BIND_OP_TYPE_MASK = (int)(0xfu << 28), ++ ++ /** @DRM_PANTHOR_VM_BIND_OP_TYPE_MAP: Map operation. */ ++ DRM_PANTHOR_VM_BIND_OP_TYPE_MAP = 0 << 28, ++ ++ /** @DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP: Unmap operation. */ ++ DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP = 1 << 28, ++ ++ /** ++ * @DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY: No VM operation. ++ * ++ * Just serves as a synchronization point on a VM queue. ++ * ++ * Only valid if %DRM_PANTHOR_VM_BIND_ASYNC is set in drm_panthor_vm_bind::flags, ++ * and drm_panthor_vm_bind_op::syncs contains at least one element. ++ */ ++ DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY = 2 << 28, ++}; ++ ++/** ++ * struct drm_panthor_vm_bind_op - VM bind operation ++ */ ++struct drm_panthor_vm_bind_op { ++ /** @flags: Combination of drm_panthor_vm_bind_op_flags flags. */ ++ __u32 flags; ++ ++ /** ++ * @bo_handle: Handle of the buffer object to map. ++ * MBZ for unmap or sync-only operations. ++ */ ++ __u32 bo_handle; ++ ++ /** ++ * @bo_offset: Buffer object offset. ++ * MBZ for unmap or sync-only operations. ++ */ ++ __u64 bo_offset; ++ ++ /** ++ * @va: Virtual address to map/unmap. ++ * MBZ for sync-only operations. ++ */ ++ __u64 va; ++ ++ /** ++ * @size: Size to map/unmap. ++ * MBZ for sync-only operations. ++ */ ++ __u64 size; ++ ++ /** ++ * @syncs: Array of struct drm_panthor_sync_op synchronization ++ * operations. ++ * ++ * This array must be empty if %DRM_PANTHOR_VM_BIND_ASYNC is not set on ++ * the drm_panthor_vm_bind object containing this VM bind operation. ++ * ++ * This array shall not be empty for sync-only operations. ++ */ ++ struct drm_panthor_obj_array syncs; ++ ++}; ++ ++/** ++ * enum drm_panthor_vm_bind_flags - VM bind flags ++ */ ++enum drm_panthor_vm_bind_flags { ++ /** ++ * @DRM_PANTHOR_VM_BIND_ASYNC: VM bind operations are queued to the VM ++ * queue instead of being executed synchronously. ++ */ ++ DRM_PANTHOR_VM_BIND_ASYNC = 1 << 0, ++}; ++ ++/** ++ * struct drm_panthor_vm_bind - Arguments passed to DRM_IOCTL_PANTHOR_VM_BIND ++ */ ++struct drm_panthor_vm_bind { ++ /** @vm_id: VM targeted by the bind request. */ ++ __u32 vm_id; ++ ++ /** @flags: Combination of drm_panthor_vm_bind_flags flags. */ ++ __u32 flags; ++ ++ /** @ops: Array of struct drm_panthor_vm_bind_op bind operations. */ ++ struct drm_panthor_obj_array ops; ++}; ++ ++/** ++ * enum drm_panthor_vm_state - VM states. ++ */ ++enum drm_panthor_vm_state { ++ /** ++ * @DRM_PANTHOR_VM_STATE_USABLE: VM is usable. ++ * ++ * New VM operations will be accepted on this VM. ++ */ ++ DRM_PANTHOR_VM_STATE_USABLE, ++ ++ /** ++ * @DRM_PANTHOR_VM_STATE_UNUSABLE: VM is unusable. ++ * ++ * Something put the VM in an unusable state (like an asynchronous ++ * VM_BIND request failing for any reason). ++ * ++ * Once the VM is in this state, all new MAP operations will be ++ * rejected, and any GPU job targeting this VM will fail. ++ * UNMAP operations are still accepted. ++ * ++ * The only way to recover from an unusable VM is to create a new ++ * VM, and destroy the old one. ++ */ ++ DRM_PANTHOR_VM_STATE_UNUSABLE, ++}; ++ ++/** ++ * struct drm_panthor_vm_get_state - Get VM state. ++ */ ++struct drm_panthor_vm_get_state { ++ /** @vm_id: VM targeted by the get_state request. */ ++ __u32 vm_id; ++ ++ /** ++ * @state: state returned by the driver. ++ * ++ * Must be one of the enum drm_panthor_vm_state values. ++ */ ++ __u32 state; ++}; ++ ++/** ++ * enum drm_panthor_bo_flags - Buffer object flags, passed at creation time. ++ */ ++enum drm_panthor_bo_flags { ++ /** @DRM_PANTHOR_BO_NO_MMAP: The buffer object will never be CPU-mapped in userspace. */ ++ DRM_PANTHOR_BO_NO_MMAP = (1 << 0), ++}; ++ ++/** ++ * struct drm_panthor_bo_create - Arguments passed to DRM_IOCTL_PANTHOR_BO_CREATE. ++ */ ++struct drm_panthor_bo_create { ++ /** ++ * @size: Requested size for the object ++ * ++ * The (page-aligned) allocated size for the object will be returned. ++ */ ++ __u64 size; ++ ++ /** ++ * @flags: Flags. Must be a combination of drm_panthor_bo_flags flags. ++ */ ++ __u32 flags; ++ ++ /** ++ * @exclusive_vm_id: Exclusive VM this buffer object will be mapped to. ++ * ++ * If not zero, the field must refer to a valid VM ID, and implies that: ++ * - the buffer object will only ever be bound to that VM ++ * - cannot be exported as a PRIME fd ++ */ ++ __u32 exclusive_vm_id; ++ ++ /** ++ * @handle: Returned handle for the object. ++ * ++ * Object handles are nonzero. ++ */ ++ __u32 handle; ++ ++ /** @pad: MBZ. */ ++ __u32 pad; ++}; ++ ++/** ++ * struct drm_panthor_bo_mmap_offset - Arguments passed to DRM_IOCTL_PANTHOR_BO_MMAP_OFFSET. ++ */ ++struct drm_panthor_bo_mmap_offset { ++ /** @handle: Handle of the object we want an mmap offset for. */ ++ __u32 handle; ++ ++ /** @pad: MBZ. */ ++ __u32 pad; ++ ++ /** @offset: The fake offset to use for subsequent mmap calls. */ ++ __u64 offset; ++}; ++ ++/** ++ * struct drm_panthor_queue_create - Queue creation arguments. ++ */ ++struct drm_panthor_queue_create { ++ /** ++ * @priority: Defines the priority of queues inside a group. Goes from 0 to 15, ++ * 15 being the highest priority. ++ */ ++ __u8 priority; ++ ++ /** @pad: Padding fields, MBZ. */ ++ __u8 pad[3]; ++ ++ /** @ringbuf_size: Size of the ring buffer to allocate to this queue. */ ++ __u32 ringbuf_size; ++}; ++ ++/** ++ * enum drm_panthor_group_priority - Scheduling group priority ++ */ ++enum drm_panthor_group_priority { ++ /** @PANTHOR_GROUP_PRIORITY_LOW: Low priority group. */ ++ PANTHOR_GROUP_PRIORITY_LOW = 0, ++ ++ /** @PANTHOR_GROUP_PRIORITY_MEDIUM: Medium priority group. */ ++ PANTHOR_GROUP_PRIORITY_MEDIUM, ++ ++ /** @PANTHOR_GROUP_PRIORITY_HIGH: High priority group. */ ++ PANTHOR_GROUP_PRIORITY_HIGH, ++}; ++ ++/** ++ * struct drm_panthor_group_create - Arguments passed to DRM_IOCTL_PANTHOR_GROUP_CREATE ++ */ ++struct drm_panthor_group_create { ++ /** @queues: Array of drm_panthor_queue_create elements. */ ++ struct drm_panthor_obj_array queues; ++ ++ /** ++ * @max_compute_cores: Maximum number of cores that can be used by compute ++ * jobs across CS queues bound to this group. ++ * ++ * Must be less or equal to the number of bits set in @compute_core_mask. ++ */ ++ __u8 max_compute_cores; ++ ++ /** ++ * @max_fragment_cores: Maximum number of cores that can be used by fragment ++ * jobs across CS queues bound to this group. ++ * ++ * Must be less or equal to the number of bits set in @fragment_core_mask. ++ */ ++ __u8 max_fragment_cores; ++ ++ /** ++ * @max_tiler_cores: Maximum number of tilers that can be used by tiler jobs ++ * across CS queues bound to this group. ++ * ++ * Must be less or equal to the number of bits set in @tiler_core_mask. ++ */ ++ __u8 max_tiler_cores; ++ ++ /** @priority: Group priority (see enum drm_panthor_group_priority). */ ++ __u8 priority; ++ ++ /** @pad: Padding field, MBZ. */ ++ __u32 pad; ++ ++ /** ++ * @compute_core_mask: Mask encoding cores that can be used for compute jobs. ++ * ++ * This field must have at least @max_compute_cores bits set. ++ * ++ * The bits set here should also be set in drm_panthor_gpu_info::shader_present. ++ */ ++ __u64 compute_core_mask; ++ ++ /** ++ * @fragment_core_mask: Mask encoding cores that can be used for fragment jobs. ++ * ++ * This field must have at least @max_fragment_cores bits set. ++ * ++ * The bits set here should also be set in drm_panthor_gpu_info::shader_present. ++ */ ++ __u64 fragment_core_mask; ++ ++ /** ++ * @tiler_core_mask: Mask encoding cores that can be used for tiler jobs. ++ * ++ * This field must have at least @max_tiler_cores bits set. ++ * ++ * The bits set here should also be set in drm_panthor_gpu_info::tiler_present. ++ */ ++ __u64 tiler_core_mask; ++ ++ /** ++ * @vm_id: VM ID to bind this group to. ++ * ++ * All submission to queues bound to this group will use this VM. ++ */ ++ __u32 vm_id; ++ ++ /** ++ * @group_handle: Returned group handle. Passed back when submitting jobs or ++ * destroying a group. ++ */ ++ __u32 group_handle; ++}; ++ ++/** ++ * struct drm_panthor_group_destroy - Arguments passed to DRM_IOCTL_PANTHOR_GROUP_DESTROY ++ */ ++struct drm_panthor_group_destroy { ++ /** @group_handle: Group to destroy */ ++ __u32 group_handle; ++ ++ /** @pad: Padding field, MBZ. */ ++ __u32 pad; ++}; ++ ++/** ++ * struct drm_panthor_queue_submit - Job submission arguments. ++ * ++ * This is describing the userspace command stream to call from the kernel ++ * command stream ring-buffer. Queue submission is always part of a group ++ * submission, taking one or more jobs to submit to the underlying queues. ++ */ ++struct drm_panthor_queue_submit { ++ /** @queue_index: Index of the queue inside a group. */ ++ __u32 queue_index; ++ ++ /** ++ * @stream_size: Size of the command stream to execute. ++ * ++ * Must be 64-bit/8-byte aligned (the size of a CS instruction) ++ * ++ * Can be zero if stream_addr is zero too. ++ */ ++ __u32 stream_size; ++ ++ /** ++ * @stream_addr: GPU address of the command stream to execute. ++ * ++ * Must be aligned on 64-byte. ++ * ++ * Can be zero is stream_size is zero too. ++ */ ++ __u64 stream_addr; ++ ++ /** ++ * @latest_flush: FLUSH_ID read at the time the stream was built. ++ * ++ * This allows cache flush elimination for the automatic ++ * flush+invalidate(all) done at submission time, which is needed to ++ * ensure the GPU doesn't get garbage when reading the indirect command ++ * stream buffers. If you want the cache flush to happen ++ * unconditionally, pass a zero here. ++ */ ++ __u32 latest_flush; ++ ++ /** @pad: MBZ. */ ++ __u32 pad; ++ ++ /** @syncs: Array of struct drm_panthor_sync_op sync operations. */ ++ struct drm_panthor_obj_array syncs; ++}; ++ ++/** ++ * struct drm_panthor_group_submit - Arguments passed to DRM_IOCTL_PANTHOR_GROUP_SUBMIT ++ */ ++struct drm_panthor_group_submit { ++ /** @group_handle: Handle of the group to queue jobs to. */ ++ __u32 group_handle; ++ ++ /** @pad: MBZ. */ ++ __u32 pad; ++ ++ /** @queue_submits: Array of drm_panthor_queue_submit objects. */ ++ struct drm_panthor_obj_array queue_submits; ++}; ++ ++/** ++ * enum drm_panthor_group_state_flags - Group state flags ++ */ ++enum drm_panthor_group_state_flags { ++ /** ++ * @DRM_PANTHOR_GROUP_STATE_TIMEDOUT: Group had unfinished jobs. ++ * ++ * When a group ends up with this flag set, no jobs can be submitted to its queues. ++ */ ++ DRM_PANTHOR_GROUP_STATE_TIMEDOUT = 1 << 0, ++ ++ /** ++ * @DRM_PANTHOR_GROUP_STATE_FATAL_FAULT: Group had fatal faults. ++ * ++ * When a group ends up with this flag set, no jobs can be submitted to its queues. ++ */ ++ DRM_PANTHOR_GROUP_STATE_FATAL_FAULT = 1 << 1, ++}; ++ ++/** ++ * struct drm_panthor_group_get_state - Arguments passed to DRM_IOCTL_PANTHOR_GROUP_GET_STATE ++ * ++ * Used to query the state of a group and decide whether a new group should be created to ++ * replace it. ++ */ ++struct drm_panthor_group_get_state { ++ /** @group_handle: Handle of the group to query state on */ ++ __u32 group_handle; ++ ++ /** ++ * @state: Combination of DRM_PANTHOR_GROUP_STATE_* flags encoding the ++ * group state. ++ */ ++ __u32 state; ++ ++ /** @fatal_queues: Bitmask of queues that faced fatal faults. */ ++ __u32 fatal_queues; ++ ++ /** @pad: MBZ */ ++ __u32 pad; ++}; ++ ++/** ++ * struct drm_panthor_tiler_heap_create - Arguments passed to DRM_IOCTL_PANTHOR_TILER_HEAP_CREATE ++ */ ++struct drm_panthor_tiler_heap_create { ++ /** @vm_id: VM ID the tiler heap should be mapped to */ ++ __u32 vm_id; ++ ++ /** @initial_chunk_count: Initial number of chunks to allocate. */ ++ __u32 initial_chunk_count; ++ ++ /** @chunk_size: Chunk size. Must be a power of two at least 256KB large. */ ++ __u32 chunk_size; ++ ++ /** @max_chunks: Maximum number of chunks that can be allocated. */ ++ __u32 max_chunks; ++ ++ /** ++ * @target_in_flight: Maximum number of in-flight render passes. ++ * ++ * If the heap has more than tiler jobs in-flight, the FW will wait for render ++ * passes to finish before queuing new tiler jobs. ++ */ ++ __u32 target_in_flight; ++ ++ /** @handle: Returned heap handle. Passed back to DESTROY_TILER_HEAP. */ ++ __u32 handle; ++ ++ /** @tiler_heap_ctx_gpu_va: Returned heap GPU virtual address returned */ ++ __u64 tiler_heap_ctx_gpu_va; ++ ++ /** ++ * @first_heap_chunk_gpu_va: First heap chunk. ++ * ++ * The tiler heap is formed of heap chunks forming a single-link list. This ++ * is the first element in the list. ++ */ ++ __u64 first_heap_chunk_gpu_va; ++}; ++ ++/** ++ * struct drm_panthor_tiler_heap_destroy - Arguments passed to DRM_IOCTL_PANTHOR_TILER_HEAP_DESTROY ++ */ ++struct drm_panthor_tiler_heap_destroy { ++ /** @handle: Handle of the tiler heap to destroy */ ++ __u32 handle; ++ ++ /** @pad: Padding field, MBZ. */ ++ __u32 pad; ++}; ++ ++#if defined(__cplusplus) ++} ++#endif ++ ++#endif /* _PANTHOR_DRM_H_ */ diff --git a/patches-6.6/034-31-v6.10-drm-panthor-Add-GPU-register-definitions.patch b/patches-6.6/034-31-v6.10-drm-panthor-Add-GPU-register-definitions.patch new file mode 100644 index 0000000..371567e --- /dev/null +++ b/patches-6.6/034-31-v6.10-drm-panthor-Add-GPU-register-definitions.patch @@ -0,0 +1,280 @@ +From 546b366600ef34847702f43bb2d22f914d19eae0 Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Thu, 29 Feb 2024 17:22:16 +0100 +Subject: [PATCH] drm/panthor: Add GPU register definitions + +Those are the registers directly accessible through the MMIO range. + +FW registers are exposed in panthor_fw.h. + +v6: +- Add Maxime's and Heiko's acks + +v4: +- Add the CORE_FEATURES register (needed for GPU variants) +- Add Steve's R-b + +v3: +- Add macros to extract GPU ID info +- Formatting changes +- Remove AS_TRANSCFG_ADRMODE_LEGACY - it doesn't exist post-CSF +- Remove CSF_GPU_LATEST_FLUSH_ID_DEFAULT +- Add GPU_L2_FEATURES_LINE_SIZE for extracting the GPU cache line size + +Co-developed-by: Steven Price +Signed-off-by: Steven Price +Signed-off-by: Boris Brezillon +Acked-by: Steven Price # MIT+GPL2 relicensing,Arm +Acked-by: Grant Likely # MIT+GPL2 relicensing,Linaro +Acked-by: Boris Brezillon # MIT+GPL2 relicensing,Collabora +Reviewed-by: Steven Price +Acked-by: Maxime Ripard +Acked-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-3-boris.brezillon@collabora.com +--- + drivers/gpu/drm/panthor/panthor_regs.h | 239 +++++++++++++++++++++++++ + 1 file changed, 239 insertions(+) + create mode 100644 drivers/gpu/drm/panthor/panthor_regs.h + +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_regs.h +@@ -0,0 +1,239 @@ ++/* SPDX-License-Identifier: GPL-2.0 or MIT */ ++/* Copyright 2018 Marty E. Plummer */ ++/* Copyright 2019 Linaro, Ltd, Rob Herring */ ++/* Copyright 2023 Collabora ltd. */ ++/* ++ * Register definitions based on mali_kbase_gpu_regmap.h and ++ * mali_kbase_gpu_regmap_csf.h ++ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. ++ */ ++#ifndef __PANTHOR_REGS_H__ ++#define __PANTHOR_REGS_H__ ++ ++#define GPU_ID 0x0 ++#define GPU_ARCH_MAJOR(x) ((x) >> 28) ++#define GPU_ARCH_MINOR(x) (((x) & GENMASK(27, 24)) >> 24) ++#define GPU_ARCH_REV(x) (((x) & GENMASK(23, 20)) >> 20) ++#define GPU_PROD_MAJOR(x) (((x) & GENMASK(19, 16)) >> 16) ++#define GPU_VER_MAJOR(x) (((x) & GENMASK(15, 12)) >> 12) ++#define GPU_VER_MINOR(x) (((x) & GENMASK(11, 4)) >> 4) ++#define GPU_VER_STATUS(x) ((x) & GENMASK(3, 0)) ++ ++#define GPU_L2_FEATURES 0x4 ++#define GPU_L2_FEATURES_LINE_SIZE(x) (1 << ((x) & GENMASK(7, 0))) ++ ++#define GPU_CORE_FEATURES 0x8 ++ ++#define GPU_TILER_FEATURES 0xC ++#define GPU_MEM_FEATURES 0x10 ++#define GROUPS_L2_COHERENT BIT(0) ++ ++#define GPU_MMU_FEATURES 0x14 ++#define GPU_MMU_FEATURES_VA_BITS(x) ((x) & GENMASK(7, 0)) ++#define GPU_MMU_FEATURES_PA_BITS(x) (((x) >> 8) & GENMASK(7, 0)) ++#define GPU_AS_PRESENT 0x18 ++#define GPU_CSF_ID 0x1C ++ ++#define GPU_INT_RAWSTAT 0x20 ++#define GPU_INT_CLEAR 0x24 ++#define GPU_INT_MASK 0x28 ++#define GPU_INT_STAT 0x2c ++#define GPU_IRQ_FAULT BIT(0) ++#define GPU_IRQ_PROTM_FAULT BIT(1) ++#define GPU_IRQ_RESET_COMPLETED BIT(8) ++#define GPU_IRQ_POWER_CHANGED BIT(9) ++#define GPU_IRQ_POWER_CHANGED_ALL BIT(10) ++#define GPU_IRQ_CLEAN_CACHES_COMPLETED BIT(17) ++#define GPU_IRQ_DOORBELL_MIRROR BIT(18) ++#define GPU_IRQ_MCU_STATUS_CHANGED BIT(19) ++#define GPU_CMD 0x30 ++#define GPU_CMD_DEF(type, payload) ((type) | ((payload) << 8)) ++#define GPU_SOFT_RESET GPU_CMD_DEF(1, 1) ++#define GPU_HARD_RESET GPU_CMD_DEF(1, 2) ++#define CACHE_CLEAN BIT(0) ++#define CACHE_INV BIT(1) ++#define GPU_FLUSH_CACHES(l2, lsc, oth) \ ++ GPU_CMD_DEF(4, ((l2) << 0) | ((lsc) << 4) | ((oth) << 8)) ++ ++#define GPU_STATUS 0x34 ++#define GPU_STATUS_ACTIVE BIT(0) ++#define GPU_STATUS_PWR_ACTIVE BIT(1) ++#define GPU_STATUS_PAGE_FAULT BIT(4) ++#define GPU_STATUS_PROTM_ACTIVE BIT(7) ++#define GPU_STATUS_DBG_ENABLED BIT(8) ++ ++#define GPU_FAULT_STATUS 0x3C ++#define GPU_FAULT_ADDR_LO 0x40 ++#define GPU_FAULT_ADDR_HI 0x44 ++ ++#define GPU_PWR_KEY 0x50 ++#define GPU_PWR_KEY_UNLOCK 0x2968A819 ++#define GPU_PWR_OVERRIDE0 0x54 ++#define GPU_PWR_OVERRIDE1 0x58 ++ ++#define GPU_TIMESTAMP_OFFSET_LO 0x88 ++#define GPU_TIMESTAMP_OFFSET_HI 0x8C ++#define GPU_CYCLE_COUNT_LO 0x90 ++#define GPU_CYCLE_COUNT_HI 0x94 ++#define GPU_TIMESTAMP_LO 0x98 ++#define GPU_TIMESTAMP_HI 0x9C ++ ++#define GPU_THREAD_MAX_THREADS 0xA0 ++#define GPU_THREAD_MAX_WORKGROUP_SIZE 0xA4 ++#define GPU_THREAD_MAX_BARRIER_SIZE 0xA8 ++#define GPU_THREAD_FEATURES 0xAC ++ ++#define GPU_TEXTURE_FEATURES(n) (0xB0 + ((n) * 4)) ++ ++#define GPU_SHADER_PRESENT_LO 0x100 ++#define GPU_SHADER_PRESENT_HI 0x104 ++#define GPU_TILER_PRESENT_LO 0x110 ++#define GPU_TILER_PRESENT_HI 0x114 ++#define GPU_L2_PRESENT_LO 0x120 ++#define GPU_L2_PRESENT_HI 0x124 ++ ++#define SHADER_READY_LO 0x140 ++#define SHADER_READY_HI 0x144 ++#define TILER_READY_LO 0x150 ++#define TILER_READY_HI 0x154 ++#define L2_READY_LO 0x160 ++#define L2_READY_HI 0x164 ++ ++#define SHADER_PWRON_LO 0x180 ++#define SHADER_PWRON_HI 0x184 ++#define TILER_PWRON_LO 0x190 ++#define TILER_PWRON_HI 0x194 ++#define L2_PWRON_LO 0x1A0 ++#define L2_PWRON_HI 0x1A4 ++ ++#define SHADER_PWROFF_LO 0x1C0 ++#define SHADER_PWROFF_HI 0x1C4 ++#define TILER_PWROFF_LO 0x1D0 ++#define TILER_PWROFF_HI 0x1D4 ++#define L2_PWROFF_LO 0x1E0 ++#define L2_PWROFF_HI 0x1E4 ++ ++#define SHADER_PWRTRANS_LO 0x200 ++#define SHADER_PWRTRANS_HI 0x204 ++#define TILER_PWRTRANS_LO 0x210 ++#define TILER_PWRTRANS_HI 0x214 ++#define L2_PWRTRANS_LO 0x220 ++#define L2_PWRTRANS_HI 0x224 ++ ++#define SHADER_PWRACTIVE_LO 0x240 ++#define SHADER_PWRACTIVE_HI 0x244 ++#define TILER_PWRACTIVE_LO 0x250 ++#define TILER_PWRACTIVE_HI 0x254 ++#define L2_PWRACTIVE_LO 0x260 ++#define L2_PWRACTIVE_HI 0x264 ++ ++#define GPU_REVID 0x280 ++ ++#define GPU_COHERENCY_FEATURES 0x300 ++#define GPU_COHERENCY_PROT_BIT(name) BIT(GPU_COHERENCY_ ## name) ++ ++#define GPU_COHERENCY_PROTOCOL 0x304 ++#define GPU_COHERENCY_ACE 0 ++#define GPU_COHERENCY_ACE_LITE 1 ++#define GPU_COHERENCY_NONE 31 ++ ++#define MCU_CONTROL 0x700 ++#define MCU_CONTROL_ENABLE 1 ++#define MCU_CONTROL_AUTO 2 ++#define MCU_CONTROL_DISABLE 0 ++ ++#define MCU_STATUS 0x704 ++#define MCU_STATUS_DISABLED 0 ++#define MCU_STATUS_ENABLED 1 ++#define MCU_STATUS_HALT 2 ++#define MCU_STATUS_FATAL 3 ++ ++/* Job Control regs */ ++#define JOB_INT_RAWSTAT 0x1000 ++#define JOB_INT_CLEAR 0x1004 ++#define JOB_INT_MASK 0x1008 ++#define JOB_INT_STAT 0x100c ++#define JOB_INT_GLOBAL_IF BIT(31) ++#define JOB_INT_CSG_IF(x) BIT(x) ++ ++/* MMU regs */ ++#define MMU_INT_RAWSTAT 0x2000 ++#define MMU_INT_CLEAR 0x2004 ++#define MMU_INT_MASK 0x2008 ++#define MMU_INT_STAT 0x200c ++ ++/* AS_COMMAND register commands */ ++ ++#define MMU_BASE 0x2400 ++#define MMU_AS_SHIFT 6 ++#define MMU_AS(as) (MMU_BASE + ((as) << MMU_AS_SHIFT)) ++ ++#define AS_TRANSTAB_LO(as) (MMU_AS(as) + 0x0) ++#define AS_TRANSTAB_HI(as) (MMU_AS(as) + 0x4) ++#define AS_MEMATTR_LO(as) (MMU_AS(as) + 0x8) ++#define AS_MEMATTR_HI(as) (MMU_AS(as) + 0xC) ++#define AS_MEMATTR_AARCH64_INNER_ALLOC_IMPL (2 << 2) ++#define AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(w, r) ((3 << 2) | \ ++ ((w) ? BIT(0) : 0) | \ ++ ((r) ? BIT(1) : 0)) ++#define AS_MEMATTR_AARCH64_SH_MIDGARD_INNER (0 << 4) ++#define AS_MEMATTR_AARCH64_SH_CPU_INNER (1 << 4) ++#define AS_MEMATTR_AARCH64_SH_CPU_INNER_SHADER_COH (2 << 4) ++#define AS_MEMATTR_AARCH64_SHARED (0 << 6) ++#define AS_MEMATTR_AARCH64_INNER_OUTER_NC (1 << 6) ++#define AS_MEMATTR_AARCH64_INNER_OUTER_WB (2 << 6) ++#define AS_MEMATTR_AARCH64_FAULT (3 << 6) ++#define AS_LOCKADDR_LO(as) (MMU_AS(as) + 0x10) ++#define AS_LOCKADDR_HI(as) (MMU_AS(as) + 0x14) ++#define AS_COMMAND(as) (MMU_AS(as) + 0x18) ++#define AS_COMMAND_NOP 0 ++#define AS_COMMAND_UPDATE 1 ++#define AS_COMMAND_LOCK 2 ++#define AS_COMMAND_UNLOCK 3 ++#define AS_COMMAND_FLUSH_PT 4 ++#define AS_COMMAND_FLUSH_MEM 5 ++#define AS_LOCK_REGION_MIN_SIZE (1ULL << 15) ++#define AS_FAULTSTATUS(as) (MMU_AS(as) + 0x1C) ++#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3 << 8) ++#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0 << 8) ++#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1 << 8) ++#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2 << 8) ++#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3 << 8) ++#define AS_FAULTADDRESS_LO(as) (MMU_AS(as) + 0x20) ++#define AS_FAULTADDRESS_HI(as) (MMU_AS(as) + 0x24) ++#define AS_STATUS(as) (MMU_AS(as) + 0x28) ++#define AS_STATUS_AS_ACTIVE BIT(0) ++#define AS_TRANSCFG_LO(as) (MMU_AS(as) + 0x30) ++#define AS_TRANSCFG_HI(as) (MMU_AS(as) + 0x34) ++#define AS_TRANSCFG_ADRMODE_UNMAPPED (1 << 0) ++#define AS_TRANSCFG_ADRMODE_IDENTITY (2 << 0) ++#define AS_TRANSCFG_ADRMODE_AARCH64_4K (6 << 0) ++#define AS_TRANSCFG_ADRMODE_AARCH64_64K (8 << 0) ++#define AS_TRANSCFG_INA_BITS(x) ((x) << 6) ++#define AS_TRANSCFG_OUTA_BITS(x) ((x) << 14) ++#define AS_TRANSCFG_SL_CONCAT BIT(22) ++#define AS_TRANSCFG_PTW_MEMATTR_NC (1 << 24) ++#define AS_TRANSCFG_PTW_MEMATTR_WB (2 << 24) ++#define AS_TRANSCFG_PTW_SH_NS (0 << 28) ++#define AS_TRANSCFG_PTW_SH_OS (2 << 28) ++#define AS_TRANSCFG_PTW_SH_IS (3 << 28) ++#define AS_TRANSCFG_PTW_RA BIT(30) ++#define AS_TRANSCFG_DISABLE_HIER_AP BIT(33) ++#define AS_TRANSCFG_DISABLE_AF_FAULT BIT(34) ++#define AS_TRANSCFG_WXN BIT(35) ++#define AS_TRANSCFG_XREADABLE BIT(36) ++#define AS_FAULTEXTRA_LO(as) (MMU_AS(as) + 0x38) ++#define AS_FAULTEXTRA_HI(as) (MMU_AS(as) + 0x3C) ++ ++#define CSF_GPU_LATEST_FLUSH_ID 0x10000 ++ ++#define CSF_DOORBELL(i) (0x80000 + ((i) * 0x10000)) ++#define CSF_GLB_DOORBELL_ID 0 ++ ++#define gpu_write(dev, reg, data) \ ++ writel(data, (dev)->iomem + (reg)) ++ ++#define gpu_read(dev, reg) \ ++ readl((dev)->iomem + (reg)) ++ ++#endif diff --git a/patches-6.6/034-32-v6.10-drm-panthor-Add-the-device-logical-block.patch b/patches-6.6/034-32-v6.10-drm-panthor-Add-the-device-logical-block.patch new file mode 100644 index 0000000..c2583d4 --- /dev/null +++ b/patches-6.6/034-32-v6.10-drm-panthor-Add-the-device-logical-block.patch @@ -0,0 +1,1013 @@ +From 5fe909cae118a757a77afb37174b99436a36d2e2 Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Thu, 29 Feb 2024 17:22:17 +0100 +Subject: [PATCH] drm/panthor: Add the device logical block + +The panthor driver is designed in a modular way, where each logical +block is dealing with a specific HW-block or software feature. In order +for those blocks to communicate with each other, we need a central +panthor_device collecting all the blocks, and exposing some common +features, like interrupt handling, power management, reset, ... + +This what this panthor_device logical block is about. + +v6: +- Add Maxime's and Heiko's acks +- Keep header inclusion alphabetically ordered + +v5: +- Suspend the MMU/GPU blocks if panthor_fw_resume() fails in + panthor_device_resume() +- Move the pm_runtime_use_autosuspend() call before drm_dev_register() +- Add Liviu's R-b + +v4: +- Check drmm_mutex_init() return code +- Fix panthor_device_reset_work() out path +- Fix the race in the unplug logic +- Fix typos +- Unplug blocks when something fails in panthor_device_init() +- Add Steve's R-b + +v3: +- Add acks for the MIT+GPL2 relicensing +- Fix 32-bit support +- Shorten the sections protected by panthor_device::pm::mmio_lock to fix + lock ordering issues. +- Rename panthor_device::pm::lock into panthor_device::pm::mmio_lock to + better reflect what this lock is protecting +- Use dev_err_probe() +- Make sure we call drm_dev_exit() when something fails half-way in + panthor_device_reset_work() +- Replace CSF_GPU_LATEST_FLUSH_ID_DEFAULT with a constant '1' and a + comment to explain. Also remove setting the dummy flush ID on suspend. +- Remove drm_WARN_ON() in panthor_exception_name() +- Check pirq->suspended in panthor_xxx_irq_raw_handler() + +Co-developed-by: Steven Price +Signed-off-by: Steven Price +Signed-off-by: Boris Brezillon +Acked-by: Steven Price # MIT+GPL2 relicensing,Arm +Acked-by: Grant Likely # MIT+GPL2 relicensing,Linaro +Acked-by: Boris Brezillon # MIT+GPL2 relicensing,Collabora +Reviewed-by: Steven Price +Reviewed-by: Liviu Dudau +Acked-by: Maxime Ripard +Acked-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-4-boris.brezillon@collabora.com +--- + drivers/gpu/drm/panthor/panthor_device.c | 549 +++++++++++++++++++++++ + drivers/gpu/drm/panthor/panthor_device.h | 394 ++++++++++++++++ + 2 files changed, 943 insertions(+) + create mode 100644 drivers/gpu/drm/panthor/panthor_device.c + create mode 100644 drivers/gpu/drm/panthor/panthor_device.h + +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_device.c +@@ -0,0 +1,549 @@ ++// SPDX-License-Identifier: GPL-2.0 or MIT ++/* Copyright 2018 Marty E. Plummer */ ++/* Copyright 2019 Linaro, Ltd, Rob Herring */ ++/* Copyright 2023 Collabora ltd. */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++#include "panthor_devfreq.h" ++#include "panthor_device.h" ++#include "panthor_fw.h" ++#include "panthor_gpu.h" ++#include "panthor_mmu.h" ++#include "panthor_regs.h" ++#include "panthor_sched.h" ++ ++static int panthor_clk_init(struct panthor_device *ptdev) ++{ ++ ptdev->clks.core = devm_clk_get(ptdev->base.dev, NULL); ++ if (IS_ERR(ptdev->clks.core)) ++ return dev_err_probe(ptdev->base.dev, ++ PTR_ERR(ptdev->clks.core), ++ "get 'core' clock failed"); ++ ++ ptdev->clks.stacks = devm_clk_get_optional(ptdev->base.dev, "stacks"); ++ if (IS_ERR(ptdev->clks.stacks)) ++ return dev_err_probe(ptdev->base.dev, ++ PTR_ERR(ptdev->clks.stacks), ++ "get 'stacks' clock failed"); ++ ++ ptdev->clks.coregroup = devm_clk_get_optional(ptdev->base.dev, "coregroup"); ++ if (IS_ERR(ptdev->clks.coregroup)) ++ return dev_err_probe(ptdev->base.dev, ++ PTR_ERR(ptdev->clks.coregroup), ++ "get 'coregroup' clock failed"); ++ ++ drm_info(&ptdev->base, "clock rate = %lu\n", clk_get_rate(ptdev->clks.core)); ++ return 0; ++} ++ ++void panthor_device_unplug(struct panthor_device *ptdev) ++{ ++ /* This function can be called from two different path: the reset work ++ * and the platform device remove callback. drm_dev_unplug() doesn't ++ * deal with concurrent callers, so we have to protect drm_dev_unplug() ++ * calls with our own lock, and bail out if the device is already ++ * unplugged. ++ */ ++ mutex_lock(&ptdev->unplug.lock); ++ if (drm_dev_is_unplugged(&ptdev->base)) { ++ /* Someone beat us, release the lock and wait for the unplug ++ * operation to be reported as done. ++ **/ ++ mutex_unlock(&ptdev->unplug.lock); ++ wait_for_completion(&ptdev->unplug.done); ++ return; ++ } ++ ++ /* Call drm_dev_unplug() so any access to HW blocks happening after ++ * that point get rejected. ++ */ ++ drm_dev_unplug(&ptdev->base); ++ ++ /* We do the rest of the unplug with the unplug lock released, ++ * future callers will wait on ptdev->unplug.done anyway. ++ */ ++ mutex_unlock(&ptdev->unplug.lock); ++ ++ drm_WARN_ON(&ptdev->base, pm_runtime_get_sync(ptdev->base.dev) < 0); ++ ++ /* Now, try to cleanly shutdown the GPU before the device resources ++ * get reclaimed. ++ */ ++ panthor_sched_unplug(ptdev); ++ panthor_fw_unplug(ptdev); ++ panthor_mmu_unplug(ptdev); ++ panthor_gpu_unplug(ptdev); ++ ++ pm_runtime_dont_use_autosuspend(ptdev->base.dev); ++ pm_runtime_put_sync_suspend(ptdev->base.dev); ++ ++ /* Report the unplug operation as done to unblock concurrent ++ * panthor_device_unplug() callers. ++ */ ++ complete_all(&ptdev->unplug.done); ++} ++ ++static void panthor_device_reset_cleanup(struct drm_device *ddev, void *data) ++{ ++ struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base); ++ ++ cancel_work_sync(&ptdev->reset.work); ++ destroy_workqueue(ptdev->reset.wq); ++} ++ ++static void panthor_device_reset_work(struct work_struct *work) ++{ ++ struct panthor_device *ptdev = container_of(work, struct panthor_device, reset.work); ++ int ret = 0, cookie; ++ ++ if (atomic_read(&ptdev->pm.state) != PANTHOR_DEVICE_PM_STATE_ACTIVE) { ++ /* ++ * No need for a reset as the device has been (or will be) ++ * powered down ++ */ ++ atomic_set(&ptdev->reset.pending, 0); ++ return; ++ } ++ ++ if (!drm_dev_enter(&ptdev->base, &cookie)) ++ return; ++ ++ panthor_sched_pre_reset(ptdev); ++ panthor_fw_pre_reset(ptdev, true); ++ panthor_mmu_pre_reset(ptdev); ++ panthor_gpu_soft_reset(ptdev); ++ panthor_gpu_l2_power_on(ptdev); ++ panthor_mmu_post_reset(ptdev); ++ ret = panthor_fw_post_reset(ptdev); ++ if (ret) ++ goto out_dev_exit; ++ ++ atomic_set(&ptdev->reset.pending, 0); ++ panthor_sched_post_reset(ptdev); ++ ++out_dev_exit: ++ drm_dev_exit(cookie); ++ ++ if (ret) { ++ panthor_device_unplug(ptdev); ++ drm_err(&ptdev->base, "Failed to boot MCU after reset, making device unusable."); ++ } ++} ++ ++static bool panthor_device_is_initialized(struct panthor_device *ptdev) ++{ ++ return !!ptdev->scheduler; ++} ++ ++static void panthor_device_free_page(struct drm_device *ddev, void *data) ++{ ++ free_page((unsigned long)data); ++} ++ ++int panthor_device_init(struct panthor_device *ptdev) ++{ ++ struct resource *res; ++ struct page *p; ++ int ret; ++ ++ ptdev->coherent = device_get_dma_attr(ptdev->base.dev) == DEV_DMA_COHERENT; ++ ++ init_completion(&ptdev->unplug.done); ++ ret = drmm_mutex_init(&ptdev->base, &ptdev->unplug.lock); ++ if (ret) ++ return ret; ++ ++ ret = drmm_mutex_init(&ptdev->base, &ptdev->pm.mmio_lock); ++ if (ret) ++ return ret; ++ ++ atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED); ++ p = alloc_page(GFP_KERNEL | __GFP_ZERO); ++ if (!p) ++ return -ENOMEM; ++ ++ ptdev->pm.dummy_latest_flush = page_address(p); ++ ret = drmm_add_action_or_reset(&ptdev->base, panthor_device_free_page, ++ ptdev->pm.dummy_latest_flush); ++ if (ret) ++ return ret; ++ ++ /* ++ * Set the dummy page holding the latest flush to 1. This will cause the ++ * flush to avoided as we know it isn't necessary if the submission ++ * happens while the dummy page is mapped. Zero cannot be used because ++ * that means 'always flush'. ++ */ ++ *ptdev->pm.dummy_latest_flush = 1; ++ ++ INIT_WORK(&ptdev->reset.work, panthor_device_reset_work); ++ ptdev->reset.wq = alloc_ordered_workqueue("panthor-reset-wq", 0); ++ if (!ptdev->reset.wq) ++ return -ENOMEM; ++ ++ ret = drmm_add_action_or_reset(&ptdev->base, panthor_device_reset_cleanup, NULL); ++ if (ret) ++ return ret; ++ ++ ret = panthor_clk_init(ptdev); ++ if (ret) ++ return ret; ++ ++ ret = panthor_devfreq_init(ptdev); ++ if (ret) ++ return ret; ++ ++ ptdev->iomem = devm_platform_get_and_ioremap_resource(to_platform_device(ptdev->base.dev), ++ 0, &res); ++ if (IS_ERR(ptdev->iomem)) ++ return PTR_ERR(ptdev->iomem); ++ ++ ptdev->phys_addr = res->start; ++ ++ ret = devm_pm_runtime_enable(ptdev->base.dev); ++ if (ret) ++ return ret; ++ ++ ret = pm_runtime_resume_and_get(ptdev->base.dev); ++ if (ret) ++ return ret; ++ ++ ret = panthor_gpu_init(ptdev); ++ if (ret) ++ goto err_rpm_put; ++ ++ ret = panthor_mmu_init(ptdev); ++ if (ret) ++ goto err_unplug_gpu; ++ ++ ret = panthor_fw_init(ptdev); ++ if (ret) ++ goto err_unplug_mmu; ++ ++ ret = panthor_sched_init(ptdev); ++ if (ret) ++ goto err_unplug_fw; ++ ++ /* ~3 frames */ ++ pm_runtime_set_autosuspend_delay(ptdev->base.dev, 50); ++ pm_runtime_use_autosuspend(ptdev->base.dev); ++ ++ ret = drm_dev_register(&ptdev->base, 0); ++ if (ret) ++ goto err_disable_autosuspend; ++ ++ pm_runtime_put_autosuspend(ptdev->base.dev); ++ return 0; ++ ++err_disable_autosuspend: ++ pm_runtime_dont_use_autosuspend(ptdev->base.dev); ++ panthor_sched_unplug(ptdev); ++ ++err_unplug_fw: ++ panthor_fw_unplug(ptdev); ++ ++err_unplug_mmu: ++ panthor_mmu_unplug(ptdev); ++ ++err_unplug_gpu: ++ panthor_gpu_unplug(ptdev); ++ ++err_rpm_put: ++ pm_runtime_put_sync_suspend(ptdev->base.dev); ++ return ret; ++} ++ ++#define PANTHOR_EXCEPTION(id) \ ++ [DRM_PANTHOR_EXCEPTION_ ## id] = { \ ++ .name = #id, \ ++ } ++ ++struct panthor_exception_info { ++ const char *name; ++}; ++ ++static const struct panthor_exception_info panthor_exception_infos[] = { ++ PANTHOR_EXCEPTION(OK), ++ PANTHOR_EXCEPTION(TERMINATED), ++ PANTHOR_EXCEPTION(KABOOM), ++ PANTHOR_EXCEPTION(EUREKA), ++ PANTHOR_EXCEPTION(ACTIVE), ++ PANTHOR_EXCEPTION(CS_RES_TERM), ++ PANTHOR_EXCEPTION(CS_CONFIG_FAULT), ++ PANTHOR_EXCEPTION(CS_ENDPOINT_FAULT), ++ PANTHOR_EXCEPTION(CS_BUS_FAULT), ++ PANTHOR_EXCEPTION(CS_INSTR_INVALID), ++ PANTHOR_EXCEPTION(CS_CALL_STACK_OVERFLOW), ++ PANTHOR_EXCEPTION(CS_INHERIT_FAULT), ++ PANTHOR_EXCEPTION(INSTR_INVALID_PC), ++ PANTHOR_EXCEPTION(INSTR_INVALID_ENC), ++ PANTHOR_EXCEPTION(INSTR_BARRIER_FAULT), ++ PANTHOR_EXCEPTION(DATA_INVALID_FAULT), ++ PANTHOR_EXCEPTION(TILE_RANGE_FAULT), ++ PANTHOR_EXCEPTION(ADDR_RANGE_FAULT), ++ PANTHOR_EXCEPTION(IMPRECISE_FAULT), ++ PANTHOR_EXCEPTION(OOM), ++ PANTHOR_EXCEPTION(CSF_FW_INTERNAL_ERROR), ++ PANTHOR_EXCEPTION(CSF_RES_EVICTION_TIMEOUT), ++ PANTHOR_EXCEPTION(GPU_BUS_FAULT), ++ PANTHOR_EXCEPTION(GPU_SHAREABILITY_FAULT), ++ PANTHOR_EXCEPTION(SYS_SHAREABILITY_FAULT), ++ PANTHOR_EXCEPTION(GPU_CACHEABILITY_FAULT), ++ PANTHOR_EXCEPTION(TRANSLATION_FAULT_0), ++ PANTHOR_EXCEPTION(TRANSLATION_FAULT_1), ++ PANTHOR_EXCEPTION(TRANSLATION_FAULT_2), ++ PANTHOR_EXCEPTION(TRANSLATION_FAULT_3), ++ PANTHOR_EXCEPTION(TRANSLATION_FAULT_4), ++ PANTHOR_EXCEPTION(PERM_FAULT_0), ++ PANTHOR_EXCEPTION(PERM_FAULT_1), ++ PANTHOR_EXCEPTION(PERM_FAULT_2), ++ PANTHOR_EXCEPTION(PERM_FAULT_3), ++ PANTHOR_EXCEPTION(ACCESS_FLAG_1), ++ PANTHOR_EXCEPTION(ACCESS_FLAG_2), ++ PANTHOR_EXCEPTION(ACCESS_FLAG_3), ++ PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_IN), ++ PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT0), ++ PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT1), ++ PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT2), ++ PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT3), ++ PANTHOR_EXCEPTION(MEM_ATTR_FAULT_0), ++ PANTHOR_EXCEPTION(MEM_ATTR_FAULT_1), ++ PANTHOR_EXCEPTION(MEM_ATTR_FAULT_2), ++ PANTHOR_EXCEPTION(MEM_ATTR_FAULT_3), ++}; ++ ++const char *panthor_exception_name(struct panthor_device *ptdev, u32 exception_code) ++{ ++ if (exception_code >= ARRAY_SIZE(panthor_exception_infos) || ++ !panthor_exception_infos[exception_code].name) ++ return "Unknown exception type"; ++ ++ return panthor_exception_infos[exception_code].name; ++} ++ ++static vm_fault_t panthor_mmio_vm_fault(struct vm_fault *vmf) ++{ ++ struct vm_area_struct *vma = vmf->vma; ++ struct panthor_device *ptdev = vma->vm_private_data; ++ u64 id = (u64)vma->vm_pgoff << PAGE_SHIFT; ++ unsigned long pfn; ++ pgprot_t pgprot; ++ vm_fault_t ret; ++ bool active; ++ int cookie; ++ ++ if (!drm_dev_enter(&ptdev->base, &cookie)) ++ return VM_FAULT_SIGBUS; ++ ++ mutex_lock(&ptdev->pm.mmio_lock); ++ active = atomic_read(&ptdev->pm.state) == PANTHOR_DEVICE_PM_STATE_ACTIVE; ++ ++ switch (panthor_device_mmio_offset(id)) { ++ case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET: ++ if (active) ++ pfn = __phys_to_pfn(ptdev->phys_addr + CSF_GPU_LATEST_FLUSH_ID); ++ else ++ pfn = virt_to_pfn(ptdev->pm.dummy_latest_flush); ++ break; ++ ++ default: ++ ret = VM_FAULT_SIGBUS; ++ goto out_unlock; ++ } ++ ++ pgprot = vma->vm_page_prot; ++ if (active) ++ pgprot = pgprot_noncached(pgprot); ++ ++ ret = vmf_insert_pfn_prot(vma, vmf->address, pfn, pgprot); ++ ++out_unlock: ++ mutex_unlock(&ptdev->pm.mmio_lock); ++ drm_dev_exit(cookie); ++ return ret; ++} ++ ++static const struct vm_operations_struct panthor_mmio_vm_ops = { ++ .fault = panthor_mmio_vm_fault, ++}; ++ ++int panthor_device_mmap_io(struct panthor_device *ptdev, struct vm_area_struct *vma) ++{ ++ u64 id = (u64)vma->vm_pgoff << PAGE_SHIFT; ++ ++ switch (panthor_device_mmio_offset(id)) { ++ case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET: ++ if (vma->vm_end - vma->vm_start != PAGE_SIZE || ++ (vma->vm_flags & (VM_WRITE | VM_EXEC))) ++ return -EINVAL; ++ ++ break; ++ ++ default: ++ return -EINVAL; ++ } ++ ++ /* Defer actual mapping to the fault handler. */ ++ vma->vm_private_data = ptdev; ++ vma->vm_ops = &panthor_mmio_vm_ops; ++ vm_flags_set(vma, ++ VM_IO | VM_DONTCOPY | VM_DONTEXPAND | ++ VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP); ++ return 0; ++} ++ ++#ifdef CONFIG_PM ++int panthor_device_resume(struct device *dev) ++{ ++ struct panthor_device *ptdev = dev_get_drvdata(dev); ++ int ret, cookie; ++ ++ if (atomic_read(&ptdev->pm.state) != PANTHOR_DEVICE_PM_STATE_SUSPENDED) ++ return -EINVAL; ++ ++ atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_RESUMING); ++ ++ ret = clk_prepare_enable(ptdev->clks.core); ++ if (ret) ++ goto err_set_suspended; ++ ++ ret = clk_prepare_enable(ptdev->clks.stacks); ++ if (ret) ++ goto err_disable_core_clk; ++ ++ ret = clk_prepare_enable(ptdev->clks.coregroup); ++ if (ret) ++ goto err_disable_stacks_clk; ++ ++ ret = panthor_devfreq_resume(ptdev); ++ if (ret) ++ goto err_disable_coregroup_clk; ++ ++ if (panthor_device_is_initialized(ptdev) && ++ drm_dev_enter(&ptdev->base, &cookie)) { ++ panthor_gpu_resume(ptdev); ++ panthor_mmu_resume(ptdev); ++ ret = drm_WARN_ON(&ptdev->base, panthor_fw_resume(ptdev)); ++ if (!ret) { ++ panthor_sched_resume(ptdev); ++ } else { ++ panthor_mmu_suspend(ptdev); ++ panthor_gpu_suspend(ptdev); ++ } ++ ++ drm_dev_exit(cookie); ++ ++ if (ret) ++ goto err_suspend_devfreq; ++ } ++ ++ if (atomic_read(&ptdev->reset.pending)) ++ queue_work(ptdev->reset.wq, &ptdev->reset.work); ++ ++ /* Clear all IOMEM mappings pointing to this device after we've ++ * resumed. This way the fake mappings pointing to the dummy pages ++ * are removed and the real iomem mapping will be restored on next ++ * access. ++ */ ++ mutex_lock(&ptdev->pm.mmio_lock); ++ unmap_mapping_range(ptdev->base.anon_inode->i_mapping, ++ DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1); ++ atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_ACTIVE); ++ mutex_unlock(&ptdev->pm.mmio_lock); ++ return 0; ++ ++err_suspend_devfreq: ++ panthor_devfreq_suspend(ptdev); ++ ++err_disable_coregroup_clk: ++ clk_disable_unprepare(ptdev->clks.coregroup); ++ ++err_disable_stacks_clk: ++ clk_disable_unprepare(ptdev->clks.stacks); ++ ++err_disable_core_clk: ++ clk_disable_unprepare(ptdev->clks.core); ++ ++err_set_suspended: ++ atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED); ++ return ret; ++} ++ ++int panthor_device_suspend(struct device *dev) ++{ ++ struct panthor_device *ptdev = dev_get_drvdata(dev); ++ int ret, cookie; ++ ++ if (atomic_read(&ptdev->pm.state) != PANTHOR_DEVICE_PM_STATE_ACTIVE) ++ return -EINVAL; ++ ++ /* Clear all IOMEM mappings pointing to this device before we ++ * shutdown the power-domain and clocks. Failing to do that results ++ * in external aborts when the process accesses the iomem region. ++ * We change the state and call unmap_mapping_range() with the ++ * mmio_lock held to make sure the vm_fault handler won't set up ++ * invalid mappings. ++ */ ++ mutex_lock(&ptdev->pm.mmio_lock); ++ atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDING); ++ unmap_mapping_range(ptdev->base.anon_inode->i_mapping, ++ DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1); ++ mutex_unlock(&ptdev->pm.mmio_lock); ++ ++ if (panthor_device_is_initialized(ptdev) && ++ drm_dev_enter(&ptdev->base, &cookie)) { ++ cancel_work_sync(&ptdev->reset.work); ++ ++ /* We prepare everything as if we were resetting the GPU. ++ * The end of the reset will happen in the resume path though. ++ */ ++ panthor_sched_suspend(ptdev); ++ panthor_fw_suspend(ptdev); ++ panthor_mmu_suspend(ptdev); ++ panthor_gpu_suspend(ptdev); ++ drm_dev_exit(cookie); ++ } ++ ++ ret = panthor_devfreq_suspend(ptdev); ++ if (ret) { ++ if (panthor_device_is_initialized(ptdev) && ++ drm_dev_enter(&ptdev->base, &cookie)) { ++ panthor_gpu_resume(ptdev); ++ panthor_mmu_resume(ptdev); ++ drm_WARN_ON(&ptdev->base, panthor_fw_resume(ptdev)); ++ panthor_sched_resume(ptdev); ++ drm_dev_exit(cookie); ++ } ++ ++ goto err_set_active; ++ } ++ ++ clk_disable_unprepare(ptdev->clks.coregroup); ++ clk_disable_unprepare(ptdev->clks.stacks); ++ clk_disable_unprepare(ptdev->clks.core); ++ atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED); ++ return 0; ++ ++err_set_active: ++ /* If something failed and we have to revert back to an ++ * active state, we also need to clear the MMIO userspace ++ * mappings, so any dumb pages that were mapped while we ++ * were trying to suspend gets invalidated. ++ */ ++ mutex_lock(&ptdev->pm.mmio_lock); ++ atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_ACTIVE); ++ unmap_mapping_range(ptdev->base.anon_inode->i_mapping, ++ DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1); ++ mutex_unlock(&ptdev->pm.mmio_lock); ++ return ret; ++} ++#endif +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_device.h +@@ -0,0 +1,394 @@ ++/* SPDX-License-Identifier: GPL-2.0 or MIT */ ++/* Copyright 2018 Marty E. Plummer */ ++/* Copyright 2019 Linaro, Ltd, Rob Herring */ ++/* Copyright 2023 Collabora ltd. */ ++ ++#ifndef __PANTHOR_DEVICE_H__ ++#define __PANTHOR_DEVICE_H__ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++ ++struct panthor_csf; ++struct panthor_csf_ctx; ++struct panthor_device; ++struct panthor_gpu; ++struct panthor_group_pool; ++struct panthor_heap_pool; ++struct panthor_job; ++struct panthor_mmu; ++struct panthor_fw; ++struct panthor_perfcnt; ++struct panthor_vm; ++struct panthor_vm_pool; ++ ++/** ++ * enum panthor_device_pm_state - PM state ++ */ ++enum panthor_device_pm_state { ++ /** @PANTHOR_DEVICE_PM_STATE_SUSPENDED: Device is suspended. */ ++ PANTHOR_DEVICE_PM_STATE_SUSPENDED = 0, ++ ++ /** @PANTHOR_DEVICE_PM_STATE_RESUMING: Device is being resumed. */ ++ PANTHOR_DEVICE_PM_STATE_RESUMING, ++ ++ /** @PANTHOR_DEVICE_PM_STATE_ACTIVE: Device is active. */ ++ PANTHOR_DEVICE_PM_STATE_ACTIVE, ++ ++ /** @PANTHOR_DEVICE_PM_STATE_SUSPENDING: Device is being suspended. */ ++ PANTHOR_DEVICE_PM_STATE_SUSPENDING, ++}; ++ ++/** ++ * struct panthor_irq - IRQ data ++ * ++ * Used to automate IRQ handling for the 3 different IRQs we have in this driver. ++ */ ++struct panthor_irq { ++ /** @ptdev: Panthor device */ ++ struct panthor_device *ptdev; ++ ++ /** @irq: IRQ number. */ ++ int irq; ++ ++ /** @mask: Current mask being applied to xxx_INT_MASK. */ ++ u32 mask; ++ ++ /** @suspended: Set to true when the IRQ is suspended. */ ++ atomic_t suspended; ++}; ++ ++/** ++ * struct panthor_device - Panthor device ++ */ ++struct panthor_device { ++ /** @base: Base drm_device. */ ++ struct drm_device base; ++ ++ /** @phys_addr: Physical address of the iomem region. */ ++ phys_addr_t phys_addr; ++ ++ /** @iomem: CPU mapping of the IOMEM region. */ ++ void __iomem *iomem; ++ ++ /** @clks: GPU clocks. */ ++ struct { ++ /** @core: Core clock. */ ++ struct clk *core; ++ ++ /** @stacks: Stacks clock. This clock is optional. */ ++ struct clk *stacks; ++ ++ /** @coregroup: Core group clock. This clock is optional. */ ++ struct clk *coregroup; ++ } clks; ++ ++ /** @coherent: True if the CPU/GPU are memory coherent. */ ++ bool coherent; ++ ++ /** @gpu_info: GPU information. */ ++ struct drm_panthor_gpu_info gpu_info; ++ ++ /** @csif_info: Command stream interface information. */ ++ struct drm_panthor_csif_info csif_info; ++ ++ /** @gpu: GPU management data. */ ++ struct panthor_gpu *gpu; ++ ++ /** @fw: FW management data. */ ++ struct panthor_fw *fw; ++ ++ /** @mmu: MMU management data. */ ++ struct panthor_mmu *mmu; ++ ++ /** @scheduler: Scheduler management data. */ ++ struct panthor_scheduler *scheduler; ++ ++ /** @devfreq: Device frequency scaling management data. */ ++ struct panthor_devfreq *devfreq; ++ ++ /** @unplug: Device unplug related fields. */ ++ struct { ++ /** @lock: Lock used to serialize unplug operations. */ ++ struct mutex lock; ++ ++ /** ++ * @done: Completion object signaled when the unplug ++ * operation is done. ++ */ ++ struct completion done; ++ } unplug; ++ ++ /** @reset: Reset related fields. */ ++ struct { ++ /** @wq: Ordered worqueud used to schedule reset operations. */ ++ struct workqueue_struct *wq; ++ ++ /** @work: Reset work. */ ++ struct work_struct work; ++ ++ /** @pending: Set to true if a reset is pending. */ ++ atomic_t pending; ++ } reset; ++ ++ /** @pm: Power management related data. */ ++ struct { ++ /** @state: Power state. */ ++ atomic_t state; ++ ++ /** ++ * @mmio_lock: Lock protecting MMIO userspace CPU mappings. ++ * ++ * This is needed to ensure we map the dummy IO pages when ++ * the device is being suspended, and the real IO pages when ++ * the device is being resumed. We can't just do with the ++ * state atomicity to deal with this race. ++ */ ++ struct mutex mmio_lock; ++ ++ /** ++ * @dummy_latest_flush: Dummy LATEST_FLUSH page. ++ * ++ * Used to replace the real LATEST_FLUSH page when the GPU ++ * is suspended. ++ */ ++ u32 *dummy_latest_flush; ++ } pm; ++}; ++ ++/** ++ * struct panthor_file - Panthor file ++ */ ++struct panthor_file { ++ /** @ptdev: Device attached to this file. */ ++ struct panthor_device *ptdev; ++ ++ /** @vms: VM pool attached to this file. */ ++ struct panthor_vm_pool *vms; ++ ++ /** @groups: Scheduling group pool attached to this file. */ ++ struct panthor_group_pool *groups; ++}; ++ ++int panthor_device_init(struct panthor_device *ptdev); ++void panthor_device_unplug(struct panthor_device *ptdev); ++ ++/** ++ * panthor_device_schedule_reset() - Schedules a reset operation ++ */ ++static inline void panthor_device_schedule_reset(struct panthor_device *ptdev) ++{ ++ if (!atomic_cmpxchg(&ptdev->reset.pending, 0, 1) && ++ atomic_read(&ptdev->pm.state) == PANTHOR_DEVICE_PM_STATE_ACTIVE) ++ queue_work(ptdev->reset.wq, &ptdev->reset.work); ++} ++ ++/** ++ * panthor_device_reset_is_pending() - Checks if a reset is pending. ++ * ++ * Return: true if a reset is pending, false otherwise. ++ */ ++static inline bool panthor_device_reset_is_pending(struct panthor_device *ptdev) ++{ ++ return atomic_read(&ptdev->reset.pending) != 0; ++} ++ ++int panthor_device_mmap_io(struct panthor_device *ptdev, ++ struct vm_area_struct *vma); ++ ++int panthor_device_resume(struct device *dev); ++int panthor_device_suspend(struct device *dev); ++ ++enum drm_panthor_exception_type { ++ DRM_PANTHOR_EXCEPTION_OK = 0x00, ++ DRM_PANTHOR_EXCEPTION_TERMINATED = 0x04, ++ DRM_PANTHOR_EXCEPTION_KABOOM = 0x05, ++ DRM_PANTHOR_EXCEPTION_EUREKA = 0x06, ++ DRM_PANTHOR_EXCEPTION_ACTIVE = 0x08, ++ DRM_PANTHOR_EXCEPTION_CS_RES_TERM = 0x0f, ++ DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT = 0x3f, ++ DRM_PANTHOR_EXCEPTION_CS_CONFIG_FAULT = 0x40, ++ DRM_PANTHOR_EXCEPTION_CS_ENDPOINT_FAULT = 0x44, ++ DRM_PANTHOR_EXCEPTION_CS_BUS_FAULT = 0x48, ++ DRM_PANTHOR_EXCEPTION_CS_INSTR_INVALID = 0x49, ++ DRM_PANTHOR_EXCEPTION_CS_CALL_STACK_OVERFLOW = 0x4a, ++ DRM_PANTHOR_EXCEPTION_CS_INHERIT_FAULT = 0x4b, ++ DRM_PANTHOR_EXCEPTION_INSTR_INVALID_PC = 0x50, ++ DRM_PANTHOR_EXCEPTION_INSTR_INVALID_ENC = 0x51, ++ DRM_PANTHOR_EXCEPTION_INSTR_BARRIER_FAULT = 0x55, ++ DRM_PANTHOR_EXCEPTION_DATA_INVALID_FAULT = 0x58, ++ DRM_PANTHOR_EXCEPTION_TILE_RANGE_FAULT = 0x59, ++ DRM_PANTHOR_EXCEPTION_ADDR_RANGE_FAULT = 0x5a, ++ DRM_PANTHOR_EXCEPTION_IMPRECISE_FAULT = 0x5b, ++ DRM_PANTHOR_EXCEPTION_OOM = 0x60, ++ DRM_PANTHOR_EXCEPTION_CSF_FW_INTERNAL_ERROR = 0x68, ++ DRM_PANTHOR_EXCEPTION_CSF_RES_EVICTION_TIMEOUT = 0x69, ++ DRM_PANTHOR_EXCEPTION_GPU_BUS_FAULT = 0x80, ++ DRM_PANTHOR_EXCEPTION_GPU_SHAREABILITY_FAULT = 0x88, ++ DRM_PANTHOR_EXCEPTION_SYS_SHAREABILITY_FAULT = 0x89, ++ DRM_PANTHOR_EXCEPTION_GPU_CACHEABILITY_FAULT = 0x8a, ++ DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_0 = 0xc0, ++ DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_1 = 0xc1, ++ DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_2 = 0xc2, ++ DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_3 = 0xc3, ++ DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_4 = 0xc4, ++ DRM_PANTHOR_EXCEPTION_PERM_FAULT_0 = 0xc8, ++ DRM_PANTHOR_EXCEPTION_PERM_FAULT_1 = 0xc9, ++ DRM_PANTHOR_EXCEPTION_PERM_FAULT_2 = 0xca, ++ DRM_PANTHOR_EXCEPTION_PERM_FAULT_3 = 0xcb, ++ DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_1 = 0xd9, ++ DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_2 = 0xda, ++ DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_3 = 0xdb, ++ DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_IN = 0xe0, ++ DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT0 = 0xe4, ++ DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT1 = 0xe5, ++ DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT2 = 0xe6, ++ DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT3 = 0xe7, ++ DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_0 = 0xe8, ++ DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_1 = 0xe9, ++ DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_2 = 0xea, ++ DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_3 = 0xeb, ++}; ++ ++/** ++ * panthor_exception_is_fault() - Checks if an exception is a fault. ++ * ++ * Return: true if the exception is a fault, false otherwise. ++ */ ++static inline bool ++panthor_exception_is_fault(u32 exception_code) ++{ ++ return exception_code > DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT; ++} ++ ++const char *panthor_exception_name(struct panthor_device *ptdev, ++ u32 exception_code); ++ ++/** ++ * PANTHOR_IRQ_HANDLER() - Define interrupt handlers and the interrupt ++ * registration function. ++ * ++ * The boiler-plate to gracefully deal with shared interrupts is ++ * auto-generated. All you have to do is call PANTHOR_IRQ_HANDLER() ++ * just after the actual handler. The handler prototype is: ++ * ++ * void (*handler)(struct panthor_device *, u32 status); ++ */ ++#define PANTHOR_IRQ_HANDLER(__name, __reg_prefix, __handler) \ ++static irqreturn_t panthor_ ## __name ## _irq_raw_handler(int irq, void *data) \ ++{ \ ++ struct panthor_irq *pirq = data; \ ++ struct panthor_device *ptdev = pirq->ptdev; \ ++ \ ++ if (atomic_read(&pirq->suspended)) \ ++ return IRQ_NONE; \ ++ if (!gpu_read(ptdev, __reg_prefix ## _INT_STAT)) \ ++ return IRQ_NONE; \ ++ \ ++ gpu_write(ptdev, __reg_prefix ## _INT_MASK, 0); \ ++ return IRQ_WAKE_THREAD; \ ++} \ ++ \ ++static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *data) \ ++{ \ ++ struct panthor_irq *pirq = data; \ ++ struct panthor_device *ptdev = pirq->ptdev; \ ++ irqreturn_t ret = IRQ_NONE; \ ++ \ ++ while (true) { \ ++ u32 status = gpu_read(ptdev, __reg_prefix ## _INT_RAWSTAT) & pirq->mask; \ ++ \ ++ if (!status) \ ++ break; \ ++ \ ++ gpu_write(ptdev, __reg_prefix ## _INT_CLEAR, status); \ ++ \ ++ __handler(ptdev, status); \ ++ ret = IRQ_HANDLED; \ ++ } \ ++ \ ++ if (!atomic_read(&pirq->suspended)) \ ++ gpu_write(ptdev, __reg_prefix ## _INT_MASK, pirq->mask); \ ++ \ ++ return ret; \ ++} \ ++ \ ++static inline void panthor_ ## __name ## _irq_suspend(struct panthor_irq *pirq) \ ++{ \ ++ int cookie; \ ++ \ ++ atomic_set(&pirq->suspended, true); \ ++ \ ++ if (drm_dev_enter(&pirq->ptdev->base, &cookie)) { \ ++ gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, 0); \ ++ synchronize_irq(pirq->irq); \ ++ drm_dev_exit(cookie); \ ++ } \ ++ \ ++ pirq->mask = 0; \ ++} \ ++ \ ++static inline void panthor_ ## __name ## _irq_resume(struct panthor_irq *pirq, u32 mask) \ ++{ \ ++ int cookie; \ ++ \ ++ atomic_set(&pirq->suspended, false); \ ++ pirq->mask = mask; \ ++ \ ++ if (drm_dev_enter(&pirq->ptdev->base, &cookie)) { \ ++ gpu_write(pirq->ptdev, __reg_prefix ## _INT_CLEAR, mask); \ ++ gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, mask); \ ++ drm_dev_exit(cookie); \ ++ } \ ++} \ ++ \ ++static int panthor_request_ ## __name ## _irq(struct panthor_device *ptdev, \ ++ struct panthor_irq *pirq, \ ++ int irq, u32 mask) \ ++{ \ ++ pirq->ptdev = ptdev; \ ++ pirq->irq = irq; \ ++ panthor_ ## __name ## _irq_resume(pirq, mask); \ ++ \ ++ return devm_request_threaded_irq(ptdev->base.dev, irq, \ ++ panthor_ ## __name ## _irq_raw_handler, \ ++ panthor_ ## __name ## _irq_threaded_handler, \ ++ IRQF_SHARED, KBUILD_MODNAME "-" # __name, \ ++ pirq); \ ++} ++ ++/** ++ * panthor_device_mmio_offset() - Turn a user MMIO offset into a kernel one ++ * @offset: Offset to convert. ++ * ++ * With 32-bit systems being limited by the 32-bit representation of mmap2's ++ * pgoffset field, we need to make the MMIO offset arch specific. This function ++ * converts a user MMIO offset into something the kernel driver understands. ++ * ++ * If the kernel and userspace architecture match, the offset is unchanged. If ++ * the kernel is 64-bit and userspace is 32-bit, the offset is adjusted to match ++ * 64-bit offsets. 32-bit kernel with 64-bit userspace is impossible. ++ * ++ * Return: Adjusted offset. ++ */ ++static inline u64 panthor_device_mmio_offset(u64 offset) ++{ ++#ifdef CONFIG_ARM64 ++ if (test_tsk_thread_flag(current, TIF_32BIT)) ++ offset += DRM_PANTHOR_USER_MMIO_OFFSET_64BIT - DRM_PANTHOR_USER_MMIO_OFFSET_32BIT; ++#endif ++ ++ return offset; ++} ++ ++extern struct workqueue_struct *panthor_cleanup_wq; ++ ++#endif diff --git a/patches-6.6/034-33-v6.10-drm-panthor-Add-the-GPU-logical-block.patch b/patches-6.6/034-33-v6.10-drm-panthor-Add-the-GPU-logical-block.patch new file mode 100644 index 0000000..dbc1ba9 --- /dev/null +++ b/patches-6.6/034-33-v6.10-drm-panthor-Add-the-GPU-logical-block.patch @@ -0,0 +1,593 @@ +From 5cd894e258c4b0b92b9b475309cea244e590d194 Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Thu, 29 Feb 2024 17:22:18 +0100 +Subject: [PATCH] drm/panthor: Add the GPU logical block + +Handles everything that's not related to the FW, the MMU or the +scheduler. This is the block dealing with the GPU property retrieval, +the GPU block power on/off logic, and some global operations, like +global cache flushing. + +v6: +- Add Maxime's and Heiko's acks + +v5: +- Fix GPU_MODEL() kernel doc +- Fix test in panthor_gpu_block_power_off() +- Add Steve's R-b + +v4: +- Expose CORE_FEATURES through DEV_QUERY + +v3: +- Add acks for the MIT/GPL2 relicensing +- Use macros to extract GPU ID info +- Make sure we reset clear pending_reqs bits when wait_event_timeout() + times out but the corresponding bit is cleared in GPU_INT_RAWSTAT + (can happen if the IRQ is masked or HW takes to long to call the IRQ + handler) +- GPU_MODEL now takes separate arch and product majors to be more + readable. +- Drop GPU_IRQ_MCU_STATUS_CHANGED from interrupt mask. +- Handle GPU_IRQ_PROTM_FAULT correctly (don't output registers that are + not updated for protected interrupts). +- Minor code tidy ups + +Cc: Alexey Sheplyakov # MIT+GPL2 relicensing +Co-developed-by: Steven Price +Signed-off-by: Steven Price +Signed-off-by: Boris Brezillon +Acked-by: Steven Price # MIT+GPL2 relicensing,Arm +Acked-by: Grant Likely # MIT+GPL2 relicensing,Linaro +Acked-by: Boris Brezillon # MIT+GPL2 relicensing,Collabora +Reviewed-by: Steven Price +Acked-by: Maxime Ripard +Acked-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-5-boris.brezillon@collabora.com +--- + drivers/gpu/drm/panthor/panthor_gpu.c | 482 ++++++++++++++++++++++++++ + drivers/gpu/drm/panthor/panthor_gpu.h | 52 +++ + 2 files changed, 534 insertions(+) + create mode 100644 drivers/gpu/drm/panthor/panthor_gpu.c + create mode 100644 drivers/gpu/drm/panthor/panthor_gpu.h + +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_gpu.c +@@ -0,0 +1,482 @@ ++// SPDX-License-Identifier: GPL-2.0 or MIT ++/* Copyright 2018 Marty E. Plummer */ ++/* Copyright 2019 Linaro, Ltd., Rob Herring */ ++/* Copyright 2019 Collabora ltd. */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++#include "panthor_device.h" ++#include "panthor_gpu.h" ++#include "panthor_regs.h" ++ ++/** ++ * struct panthor_gpu - GPU block management data. ++ */ ++struct panthor_gpu { ++ /** @irq: GPU irq. */ ++ struct panthor_irq irq; ++ ++ /** @reqs_lock: Lock protecting access to pending_reqs. */ ++ spinlock_t reqs_lock; ++ ++ /** @pending_reqs: Pending GPU requests. */ ++ u32 pending_reqs; ++ ++ /** @reqs_acked: GPU request wait queue. */ ++ wait_queue_head_t reqs_acked; ++}; ++ ++/** ++ * struct panthor_model - GPU model description ++ */ ++struct panthor_model { ++ /** @name: Model name. */ ++ const char *name; ++ ++ /** @arch_major: Major version number of architecture. */ ++ u8 arch_major; ++ ++ /** @product_major: Major version number of product. */ ++ u8 product_major; ++}; ++ ++/** ++ * GPU_MODEL() - Define a GPU model. A GPU product can be uniquely identified ++ * by a combination of the major architecture version and the major product ++ * version. ++ * @_name: Name for the GPU model. ++ * @_arch_major: Architecture major. ++ * @_product_major: Product major. ++ */ ++#define GPU_MODEL(_name, _arch_major, _product_major) \ ++{\ ++ .name = __stringify(_name), \ ++ .arch_major = _arch_major, \ ++ .product_major = _product_major, \ ++} ++ ++static const struct panthor_model gpu_models[] = { ++ GPU_MODEL(g610, 10, 7), ++ {}, ++}; ++ ++#define GPU_INTERRUPTS_MASK \ ++ (GPU_IRQ_FAULT | \ ++ GPU_IRQ_PROTM_FAULT | \ ++ GPU_IRQ_RESET_COMPLETED | \ ++ GPU_IRQ_CLEAN_CACHES_COMPLETED) ++ ++static void panthor_gpu_init_info(struct panthor_device *ptdev) ++{ ++ const struct panthor_model *model; ++ u32 arch_major, product_major; ++ u32 major, minor, status; ++ unsigned int i; ++ ++ ptdev->gpu_info.gpu_id = gpu_read(ptdev, GPU_ID); ++ ptdev->gpu_info.csf_id = gpu_read(ptdev, GPU_CSF_ID); ++ ptdev->gpu_info.gpu_rev = gpu_read(ptdev, GPU_REVID); ++ ptdev->gpu_info.core_features = gpu_read(ptdev, GPU_CORE_FEATURES); ++ ptdev->gpu_info.l2_features = gpu_read(ptdev, GPU_L2_FEATURES); ++ ptdev->gpu_info.tiler_features = gpu_read(ptdev, GPU_TILER_FEATURES); ++ ptdev->gpu_info.mem_features = gpu_read(ptdev, GPU_MEM_FEATURES); ++ ptdev->gpu_info.mmu_features = gpu_read(ptdev, GPU_MMU_FEATURES); ++ ptdev->gpu_info.thread_features = gpu_read(ptdev, GPU_THREAD_FEATURES); ++ ptdev->gpu_info.max_threads = gpu_read(ptdev, GPU_THREAD_MAX_THREADS); ++ ptdev->gpu_info.thread_max_workgroup_size = gpu_read(ptdev, GPU_THREAD_MAX_WORKGROUP_SIZE); ++ ptdev->gpu_info.thread_max_barrier_size = gpu_read(ptdev, GPU_THREAD_MAX_BARRIER_SIZE); ++ ptdev->gpu_info.coherency_features = gpu_read(ptdev, GPU_COHERENCY_FEATURES); ++ for (i = 0; i < 4; i++) ++ ptdev->gpu_info.texture_features[i] = gpu_read(ptdev, GPU_TEXTURE_FEATURES(i)); ++ ++ ptdev->gpu_info.as_present = gpu_read(ptdev, GPU_AS_PRESENT); ++ ++ ptdev->gpu_info.shader_present = gpu_read(ptdev, GPU_SHADER_PRESENT_LO); ++ ptdev->gpu_info.shader_present |= (u64)gpu_read(ptdev, GPU_SHADER_PRESENT_HI) << 32; ++ ++ ptdev->gpu_info.tiler_present = gpu_read(ptdev, GPU_TILER_PRESENT_LO); ++ ptdev->gpu_info.tiler_present |= (u64)gpu_read(ptdev, GPU_TILER_PRESENT_HI) << 32; ++ ++ ptdev->gpu_info.l2_present = gpu_read(ptdev, GPU_L2_PRESENT_LO); ++ ptdev->gpu_info.l2_present |= (u64)gpu_read(ptdev, GPU_L2_PRESENT_HI) << 32; ++ ++ arch_major = GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id); ++ product_major = GPU_PROD_MAJOR(ptdev->gpu_info.gpu_id); ++ major = GPU_VER_MAJOR(ptdev->gpu_info.gpu_id); ++ minor = GPU_VER_MINOR(ptdev->gpu_info.gpu_id); ++ status = GPU_VER_STATUS(ptdev->gpu_info.gpu_id); ++ ++ for (model = gpu_models; model->name; model++) { ++ if (model->arch_major == arch_major && ++ model->product_major == product_major) ++ break; ++ } ++ ++ drm_info(&ptdev->base, ++ "mali-%s id 0x%x major 0x%x minor 0x%x status 0x%x", ++ model->name ?: "unknown", ptdev->gpu_info.gpu_id >> 16, ++ major, minor, status); ++ ++ drm_info(&ptdev->base, ++ "Features: L2:%#x Tiler:%#x Mem:%#x MMU:%#x AS:%#x", ++ ptdev->gpu_info.l2_features, ++ ptdev->gpu_info.tiler_features, ++ ptdev->gpu_info.mem_features, ++ ptdev->gpu_info.mmu_features, ++ ptdev->gpu_info.as_present); ++ ++ drm_info(&ptdev->base, ++ "shader_present=0x%0llx l2_present=0x%0llx tiler_present=0x%0llx", ++ ptdev->gpu_info.shader_present, ptdev->gpu_info.l2_present, ++ ptdev->gpu_info.tiler_present); ++} ++ ++static void panthor_gpu_irq_handler(struct panthor_device *ptdev, u32 status) ++{ ++ if (status & GPU_IRQ_FAULT) { ++ u32 fault_status = gpu_read(ptdev, GPU_FAULT_STATUS); ++ u64 address = ((u64)gpu_read(ptdev, GPU_FAULT_ADDR_HI) << 32) | ++ gpu_read(ptdev, GPU_FAULT_ADDR_LO); ++ ++ drm_warn(&ptdev->base, "GPU Fault 0x%08x (%s) at 0x%016llx\n", ++ fault_status, panthor_exception_name(ptdev, fault_status & 0xFF), ++ address); ++ } ++ if (status & GPU_IRQ_PROTM_FAULT) ++ drm_warn(&ptdev->base, "GPU Fault in protected mode\n"); ++ ++ spin_lock(&ptdev->gpu->reqs_lock); ++ if (status & ptdev->gpu->pending_reqs) { ++ ptdev->gpu->pending_reqs &= ~status; ++ wake_up_all(&ptdev->gpu->reqs_acked); ++ } ++ spin_unlock(&ptdev->gpu->reqs_lock); ++} ++PANTHOR_IRQ_HANDLER(gpu, GPU, panthor_gpu_irq_handler); ++ ++/** ++ * panthor_gpu_unplug() - Called when the GPU is unplugged. ++ * @ptdev: Device to unplug. ++ */ ++void panthor_gpu_unplug(struct panthor_device *ptdev) ++{ ++ unsigned long flags; ++ ++ /* Make sure the IRQ handler is not running after that point. */ ++ panthor_gpu_irq_suspend(&ptdev->gpu->irq); ++ ++ /* Wake-up all waiters. */ ++ spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags); ++ ptdev->gpu->pending_reqs = 0; ++ wake_up_all(&ptdev->gpu->reqs_acked); ++ spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags); ++} ++ ++/** ++ * panthor_gpu_init() - Initialize the GPU block ++ * @ptdev: Device. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_gpu_init(struct panthor_device *ptdev) ++{ ++ struct panthor_gpu *gpu; ++ u32 pa_bits; ++ int ret, irq; ++ ++ gpu = drmm_kzalloc(&ptdev->base, sizeof(*gpu), GFP_KERNEL); ++ if (!gpu) ++ return -ENOMEM; ++ ++ spin_lock_init(&gpu->reqs_lock); ++ init_waitqueue_head(&gpu->reqs_acked); ++ ptdev->gpu = gpu; ++ panthor_gpu_init_info(ptdev); ++ ++ dma_set_max_seg_size(ptdev->base.dev, UINT_MAX); ++ pa_bits = GPU_MMU_FEATURES_PA_BITS(ptdev->gpu_info.mmu_features); ++ ret = dma_set_mask_and_coherent(ptdev->base.dev, DMA_BIT_MASK(pa_bits)); ++ if (ret) ++ return ret; ++ ++ irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "gpu"); ++ if (irq <= 0) ++ return ret; ++ ++ ret = panthor_request_gpu_irq(ptdev, &ptdev->gpu->irq, irq, GPU_INTERRUPTS_MASK); ++ if (ret) ++ return ret; ++ ++ return 0; ++} ++ ++/** ++ * panthor_gpu_block_power_off() - Power-off a specific block of the GPU ++ * @ptdev: Device. ++ * @blk_name: Block name. ++ * @pwroff_reg: Power-off register for this block. ++ * @pwrtrans_reg: Power transition register for this block. ++ * @mask: Sub-elements to power-off. ++ * @timeout_us: Timeout in microseconds. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_gpu_block_power_off(struct panthor_device *ptdev, ++ const char *blk_name, ++ u32 pwroff_reg, u32 pwrtrans_reg, ++ u64 mask, u32 timeout_us) ++{ ++ u32 val, i; ++ int ret; ++ ++ for (i = 0; i < 2; i++) { ++ u32 mask32 = mask >> (i * 32); ++ ++ if (!mask32) ++ continue; ++ ++ ret = readl_relaxed_poll_timeout(ptdev->iomem + pwrtrans_reg + (i * 4), ++ val, !(mask32 & val), ++ 100, timeout_us); ++ if (ret) { ++ drm_err(&ptdev->base, "timeout waiting on %s:%llx power transition", ++ blk_name, mask); ++ return ret; ++ } ++ } ++ ++ if (mask & GENMASK(31, 0)) ++ gpu_write(ptdev, pwroff_reg, mask); ++ ++ if (mask >> 32) ++ gpu_write(ptdev, pwroff_reg + 4, mask >> 32); ++ ++ for (i = 0; i < 2; i++) { ++ u32 mask32 = mask >> (i * 32); ++ ++ if (!mask32) ++ continue; ++ ++ ret = readl_relaxed_poll_timeout(ptdev->iomem + pwrtrans_reg + (i * 4), ++ val, !(mask32 & val), ++ 100, timeout_us); ++ if (ret) { ++ drm_err(&ptdev->base, "timeout waiting on %s:%llx power transition", ++ blk_name, mask); ++ return ret; ++ } ++ } ++ ++ return 0; ++} ++ ++/** ++ * panthor_gpu_block_power_on() - Power-on a specific block of the GPU ++ * @ptdev: Device. ++ * @blk_name: Block name. ++ * @pwron_reg: Power-on register for this block. ++ * @pwrtrans_reg: Power transition register for this block. ++ * @rdy_reg: Power transition ready register. ++ * @mask: Sub-elements to power-on. ++ * @timeout_us: Timeout in microseconds. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_gpu_block_power_on(struct panthor_device *ptdev, ++ const char *blk_name, ++ u32 pwron_reg, u32 pwrtrans_reg, ++ u32 rdy_reg, u64 mask, u32 timeout_us) ++{ ++ u32 val, i; ++ int ret; ++ ++ for (i = 0; i < 2; i++) { ++ u32 mask32 = mask >> (i * 32); ++ ++ if (!mask32) ++ continue; ++ ++ ret = readl_relaxed_poll_timeout(ptdev->iomem + pwrtrans_reg + (i * 4), ++ val, !(mask32 & val), ++ 100, timeout_us); ++ if (ret) { ++ drm_err(&ptdev->base, "timeout waiting on %s:%llx power transition", ++ blk_name, mask); ++ return ret; ++ } ++ } ++ ++ if (mask & GENMASK(31, 0)) ++ gpu_write(ptdev, pwron_reg, mask); ++ ++ if (mask >> 32) ++ gpu_write(ptdev, pwron_reg + 4, mask >> 32); ++ ++ for (i = 0; i < 2; i++) { ++ u32 mask32 = mask >> (i * 32); ++ ++ if (!mask32) ++ continue; ++ ++ ret = readl_relaxed_poll_timeout(ptdev->iomem + rdy_reg + (i * 4), ++ val, (mask32 & val) == mask32, ++ 100, timeout_us); ++ if (ret) { ++ drm_err(&ptdev->base, "timeout waiting on %s:%llx readyness", ++ blk_name, mask); ++ return ret; ++ } ++ } ++ ++ return 0; ++} ++ ++/** ++ * panthor_gpu_l2_power_on() - Power-on the L2-cache ++ * @ptdev: Device. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_gpu_l2_power_on(struct panthor_device *ptdev) ++{ ++ if (ptdev->gpu_info.l2_present != 1) { ++ /* ++ * Only support one core group now. ++ * ~(l2_present - 1) unsets all bits in l2_present except ++ * the bottom bit. (l2_present - 2) has all the bits in ++ * the first core group set. AND them together to generate ++ * a mask of cores in the first core group. ++ */ ++ u64 core_mask = ~(ptdev->gpu_info.l2_present - 1) & ++ (ptdev->gpu_info.l2_present - 2); ++ drm_info_once(&ptdev->base, "using only 1st core group (%lu cores from %lu)\n", ++ hweight64(core_mask), ++ hweight64(ptdev->gpu_info.shader_present)); ++ } ++ ++ return panthor_gpu_power_on(ptdev, L2, 1, 20000); ++} ++ ++/** ++ * panthor_gpu_flush_caches() - Flush caches ++ * @ptdev: Device. ++ * @l2: L2 flush type. ++ * @lsc: LSC flush type. ++ * @other: Other flush type. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_gpu_flush_caches(struct panthor_device *ptdev, ++ u32 l2, u32 lsc, u32 other) ++{ ++ bool timedout = false; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags); ++ if (!drm_WARN_ON(&ptdev->base, ++ ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED)) { ++ ptdev->gpu->pending_reqs |= GPU_IRQ_CLEAN_CACHES_COMPLETED; ++ gpu_write(ptdev, GPU_CMD, GPU_FLUSH_CACHES(l2, lsc, other)); ++ } ++ spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags); ++ ++ if (!wait_event_timeout(ptdev->gpu->reqs_acked, ++ !(ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED), ++ msecs_to_jiffies(100))) { ++ spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags); ++ if ((ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED) != 0 && ++ !(gpu_read(ptdev, GPU_INT_RAWSTAT) & GPU_IRQ_CLEAN_CACHES_COMPLETED)) ++ timedout = true; ++ else ++ ptdev->gpu->pending_reqs &= ~GPU_IRQ_CLEAN_CACHES_COMPLETED; ++ spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags); ++ } ++ ++ if (timedout) { ++ drm_err(&ptdev->base, "Flush caches timeout"); ++ return -ETIMEDOUT; ++ } ++ ++ return 0; ++} ++ ++/** ++ * panthor_gpu_soft_reset() - Issue a soft-reset ++ * @ptdev: Device. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_gpu_soft_reset(struct panthor_device *ptdev) ++{ ++ bool timedout = false; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags); ++ if (!drm_WARN_ON(&ptdev->base, ++ ptdev->gpu->pending_reqs & GPU_IRQ_RESET_COMPLETED)) { ++ ptdev->gpu->pending_reqs |= GPU_IRQ_RESET_COMPLETED; ++ gpu_write(ptdev, GPU_INT_CLEAR, GPU_IRQ_RESET_COMPLETED); ++ gpu_write(ptdev, GPU_CMD, GPU_SOFT_RESET); ++ } ++ spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags); ++ ++ if (!wait_event_timeout(ptdev->gpu->reqs_acked, ++ !(ptdev->gpu->pending_reqs & GPU_IRQ_RESET_COMPLETED), ++ msecs_to_jiffies(100))) { ++ spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags); ++ if ((ptdev->gpu->pending_reqs & GPU_IRQ_RESET_COMPLETED) != 0 && ++ !(gpu_read(ptdev, GPU_INT_RAWSTAT) & GPU_IRQ_RESET_COMPLETED)) ++ timedout = true; ++ else ++ ptdev->gpu->pending_reqs &= ~GPU_IRQ_RESET_COMPLETED; ++ spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags); ++ } ++ ++ if (timedout) { ++ drm_err(&ptdev->base, "Soft reset timeout"); ++ return -ETIMEDOUT; ++ } ++ ++ return 0; ++} ++ ++/** ++ * panthor_gpu_suspend() - Suspend the GPU block. ++ * @ptdev: Device. ++ * ++ * Suspend the GPU irq. This should be called last in the suspend procedure, ++ * after all other blocks have been suspented. ++ */ ++void panthor_gpu_suspend(struct panthor_device *ptdev) ++{ ++ /* ++ * It may be preferable to simply power down the L2, but for now just ++ * soft-reset which will leave the L2 powered down. ++ */ ++ panthor_gpu_soft_reset(ptdev); ++ panthor_gpu_irq_suspend(&ptdev->gpu->irq); ++} ++ ++/** ++ * panthor_gpu_resume() - Resume the GPU block. ++ * @ptdev: Device. ++ * ++ * Resume the IRQ handler and power-on the L2-cache. ++ * The FW takes care of powering the other blocks. ++ */ ++void panthor_gpu_resume(struct panthor_device *ptdev) ++{ ++ panthor_gpu_irq_resume(&ptdev->gpu->irq, GPU_INTERRUPTS_MASK); ++ panthor_gpu_l2_power_on(ptdev); ++} +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_gpu.h +@@ -0,0 +1,52 @@ ++/* SPDX-License-Identifier: GPL-2.0 or MIT */ ++/* Copyright 2018 Marty E. Plummer */ ++/* Copyright 2019 Collabora ltd. */ ++ ++#ifndef __PANTHOR_GPU_H__ ++#define __PANTHOR_GPU_H__ ++ ++struct panthor_device; ++ ++int panthor_gpu_init(struct panthor_device *ptdev); ++void panthor_gpu_unplug(struct panthor_device *ptdev); ++void panthor_gpu_suspend(struct panthor_device *ptdev); ++void panthor_gpu_resume(struct panthor_device *ptdev); ++ ++int panthor_gpu_block_power_on(struct panthor_device *ptdev, ++ const char *blk_name, ++ u32 pwron_reg, u32 pwrtrans_reg, ++ u32 rdy_reg, u64 mask, u32 timeout_us); ++int panthor_gpu_block_power_off(struct panthor_device *ptdev, ++ const char *blk_name, ++ u32 pwroff_reg, u32 pwrtrans_reg, ++ u64 mask, u32 timeout_us); ++ ++/** ++ * panthor_gpu_power_on() - Power on the GPU block. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++#define panthor_gpu_power_on(ptdev, type, mask, timeout_us) \ ++ panthor_gpu_block_power_on(ptdev, #type, \ ++ type ## _PWRON_LO, \ ++ type ## _PWRTRANS_LO, \ ++ type ## _READY_LO, \ ++ mask, timeout_us) ++ ++/** ++ * panthor_gpu_power_off() - Power off the GPU block. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++#define panthor_gpu_power_off(ptdev, type, mask, timeout_us) \ ++ panthor_gpu_block_power_off(ptdev, #type, \ ++ type ## _PWROFF_LO, \ ++ type ## _PWRTRANS_LO, \ ++ mask, timeout_us) ++ ++int panthor_gpu_l2_power_on(struct panthor_device *ptdev); ++int panthor_gpu_flush_caches(struct panthor_device *ptdev, ++ u32 l2, u32 lsc, u32 other); ++int panthor_gpu_soft_reset(struct panthor_device *ptdev); ++ ++#endif diff --git a/patches-6.6/034-34-v6.10-drm-panthor-Add-GEM-logical-block.patch b/patches-6.6/034-34-v6.10-drm-panthor-Add-GEM-logical-block.patch new file mode 100644 index 0000000..dec21e3 --- /dev/null +++ b/patches-6.6/034-34-v6.10-drm-panthor-Add-GEM-logical-block.patch @@ -0,0 +1,426 @@ +From 8a1cc07578bf42d85f008316873d710ff684dd29 Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Thu, 29 Feb 2024 17:22:19 +0100 +Subject: [PATCH] drm/panthor: Add GEM logical block + +Anything relating to GEM object management is placed here. Nothing +particularly interesting here, given the implementation is based on +drm_gem_shmem_object, which is doing most of the work. + +v6: +- Add Maxime's and Heiko's acks +- Return a page-aligned BO size to userspace when creating a BO +- Keep header inclusion alphabetically ordered + +v5: +- Add Liviu's and Steve's R-b + +v4: +- Force kernel BOs to be GPU mapped +- Make panthor_kernel_bo_destroy() robust against ERR/NULL BO pointers + to simplify the call sites + +v3: +- Add acks for the MIT/GPL2 relicensing +- Provide a panthor_kernel_bo abstraction for buffer objects managed by + the kernel (will replace panthor_fw_mem and be used everywhere we were + using panthor_gem_create_and_map() before) +- Adjust things to match drm_gpuvm changes +- Change return of panthor_gem_create_with_handle() to int + +Co-developed-by: Steven Price +Signed-off-by: Steven Price +Signed-off-by: Boris Brezillon +Acked-by: Steven Price # MIT+GPL2 relicensing,Arm +Acked-by: Grant Likely # MIT+GPL2 relicensing,Linaro +Acked-by: Boris Brezillon # MIT+GPL2 relicensing,Collabora +Reviewed-by: Liviu Dudau +Reviewed-by: Steven Price +Acked-by: Maxime Ripard +Acked-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-6-boris.brezillon@collabora.com +--- + drivers/gpu/drm/panthor/panthor_gem.c | 230 ++++++++++++++++++++++++++ + drivers/gpu/drm/panthor/panthor_gem.h | 142 ++++++++++++++++ + 2 files changed, 372 insertions(+) + create mode 100644 drivers/gpu/drm/panthor/panthor_gem.c + create mode 100644 drivers/gpu/drm/panthor/panthor_gem.h + +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_gem.c +@@ -0,0 +1,230 @@ ++// SPDX-License-Identifier: GPL-2.0 or MIT ++/* Copyright 2019 Linaro, Ltd, Rob Herring */ ++/* Copyright 2023 Collabora ltd. */ ++ ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include "panthor_device.h" ++#include "panthor_gem.h" ++#include "panthor_mmu.h" ++ ++static void panthor_gem_free_object(struct drm_gem_object *obj) ++{ ++ struct panthor_gem_object *bo = to_panthor_bo(obj); ++ struct drm_gem_object *vm_root_gem = bo->exclusive_vm_root_gem; ++ ++ drm_gem_free_mmap_offset(&bo->base.base); ++ mutex_destroy(&bo->gpuva_list_lock); ++ drm_gem_shmem_free(&bo->base); ++ drm_gem_object_put(vm_root_gem); ++} ++ ++/** ++ * panthor_kernel_bo_destroy() - Destroy a kernel buffer object ++ * @vm: The VM this BO was mapped to. ++ * @bo: Kernel buffer object to destroy. If NULL or an ERR_PTR(), the destruction ++ * is skipped. ++ */ ++void panthor_kernel_bo_destroy(struct panthor_vm *vm, ++ struct panthor_kernel_bo *bo) ++{ ++ int ret; ++ ++ if (IS_ERR_OR_NULL(bo)) ++ return; ++ ++ panthor_kernel_bo_vunmap(bo); ++ ++ if (drm_WARN_ON(bo->obj->dev, ++ to_panthor_bo(bo->obj)->exclusive_vm_root_gem != panthor_vm_root_gem(vm))) ++ goto out_free_bo; ++ ++ ret = panthor_vm_unmap_range(vm, bo->va_node.start, ++ panthor_kernel_bo_size(bo)); ++ if (ret) ++ goto out_free_bo; ++ ++ panthor_vm_free_va(vm, &bo->va_node); ++ drm_gem_object_put(bo->obj); ++ ++out_free_bo: ++ kfree(bo); ++} ++ ++/** ++ * panthor_kernel_bo_create() - Create and map a GEM object to a VM ++ * @ptdev: Device. ++ * @vm: VM to map the GEM to. If NULL, the kernel object is not GPU mapped. ++ * @size: Size of the buffer object. ++ * @bo_flags: Combination of drm_panthor_bo_flags flags. ++ * @vm_map_flags: Combination of drm_panthor_vm_bind_op_flags (only those ++ * that are related to map operations). ++ * @gpu_va: GPU address assigned when mapping to the VM. ++ * If gpu_va == PANTHOR_VM_KERNEL_AUTO_VA, the virtual address will be ++ * automatically allocated. ++ * ++ * Return: A valid pointer in case of success, an ERR_PTR() otherwise. ++ */ ++struct panthor_kernel_bo * ++panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm, ++ size_t size, u32 bo_flags, u32 vm_map_flags, ++ u64 gpu_va) ++{ ++ struct drm_gem_shmem_object *obj; ++ struct panthor_kernel_bo *kbo; ++ struct panthor_gem_object *bo; ++ int ret; ++ ++ if (drm_WARN_ON(&ptdev->base, !vm)) ++ return ERR_PTR(-EINVAL); ++ ++ kbo = kzalloc(sizeof(*kbo), GFP_KERNEL); ++ if (!kbo) ++ return ERR_PTR(-ENOMEM); ++ ++ obj = drm_gem_shmem_create(&ptdev->base, size); ++ if (IS_ERR(obj)) { ++ ret = PTR_ERR(obj); ++ goto err_free_bo; ++ } ++ ++ bo = to_panthor_bo(&obj->base); ++ size = obj->base.size; ++ kbo->obj = &obj->base; ++ bo->flags = bo_flags; ++ ++ ret = panthor_vm_alloc_va(vm, gpu_va, size, &kbo->va_node); ++ if (ret) ++ goto err_put_obj; ++ ++ ret = panthor_vm_map_bo_range(vm, bo, 0, size, kbo->va_node.start, vm_map_flags); ++ if (ret) ++ goto err_free_va; ++ ++ bo->exclusive_vm_root_gem = panthor_vm_root_gem(vm); ++ drm_gem_object_get(bo->exclusive_vm_root_gem); ++ bo->base.base.resv = bo->exclusive_vm_root_gem->resv; ++ return kbo; ++ ++err_free_va: ++ panthor_vm_free_va(vm, &kbo->va_node); ++ ++err_put_obj: ++ drm_gem_object_put(&obj->base); ++ ++err_free_bo: ++ kfree(kbo); ++ return ERR_PTR(ret); ++} ++ ++static int panthor_gem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) ++{ ++ struct panthor_gem_object *bo = to_panthor_bo(obj); ++ ++ /* Don't allow mmap on objects that have the NO_MMAP flag set. */ ++ if (bo->flags & DRM_PANTHOR_BO_NO_MMAP) ++ return -EINVAL; ++ ++ return drm_gem_shmem_object_mmap(obj, vma); ++} ++ ++static struct dma_buf * ++panthor_gem_prime_export(struct drm_gem_object *obj, int flags) ++{ ++ /* We can't export GEMs that have an exclusive VM. */ ++ if (to_panthor_bo(obj)->exclusive_vm_root_gem) ++ return ERR_PTR(-EINVAL); ++ ++ return drm_gem_prime_export(obj, flags); ++} ++ ++static const struct drm_gem_object_funcs panthor_gem_funcs = { ++ .free = panthor_gem_free_object, ++ .print_info = drm_gem_shmem_object_print_info, ++ .pin = drm_gem_shmem_object_pin, ++ .unpin = drm_gem_shmem_object_unpin, ++ .get_sg_table = drm_gem_shmem_object_get_sg_table, ++ .vmap = drm_gem_shmem_object_vmap, ++ .vunmap = drm_gem_shmem_object_vunmap, ++ .mmap = panthor_gem_mmap, ++ .export = panthor_gem_prime_export, ++ .vm_ops = &drm_gem_shmem_vm_ops, ++}; ++ ++/** ++ * panthor_gem_create_object - Implementation of driver->gem_create_object. ++ * @ddev: DRM device ++ * @size: Size in bytes of the memory the object will reference ++ * ++ * This lets the GEM helpers allocate object structs for us, and keep ++ * our BO stats correct. ++ */ ++struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t size) ++{ ++ struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base); ++ struct panthor_gem_object *obj; ++ ++ obj = kzalloc(sizeof(*obj), GFP_KERNEL); ++ if (!obj) ++ return ERR_PTR(-ENOMEM); ++ ++ obj->base.base.funcs = &panthor_gem_funcs; ++ obj->base.map_wc = !ptdev->coherent; ++ mutex_init(&obj->gpuva_list_lock); ++ drm_gem_gpuva_set_lock(&obj->base.base, &obj->gpuva_list_lock); ++ ++ return &obj->base.base; ++} ++ ++/** ++ * panthor_gem_create_with_handle() - Create a GEM object and attach it to a handle. ++ * @file: DRM file. ++ * @ddev: DRM device. ++ * @exclusive_vm: Exclusive VM. Not NULL if the GEM object can't be shared. ++ * @size: Size of the GEM object to allocate. ++ * @flags: Combination of drm_panthor_bo_flags flags. ++ * @handle: Pointer holding the handle pointing to the new GEM object. ++ * ++ * Return: Zero on success ++ */ ++int ++panthor_gem_create_with_handle(struct drm_file *file, ++ struct drm_device *ddev, ++ struct panthor_vm *exclusive_vm, ++ u64 *size, u32 flags, u32 *handle) ++{ ++ int ret; ++ struct drm_gem_shmem_object *shmem; ++ struct panthor_gem_object *bo; ++ ++ shmem = drm_gem_shmem_create(ddev, *size); ++ if (IS_ERR(shmem)) ++ return PTR_ERR(shmem); ++ ++ bo = to_panthor_bo(&shmem->base); ++ bo->flags = flags; ++ ++ if (exclusive_vm) { ++ bo->exclusive_vm_root_gem = panthor_vm_root_gem(exclusive_vm); ++ drm_gem_object_get(bo->exclusive_vm_root_gem); ++ bo->base.base.resv = bo->exclusive_vm_root_gem->resv; ++ } ++ ++ /* ++ * Allocate an id of idr table where the obj is registered ++ * and handle has the id what user can see. ++ */ ++ ret = drm_gem_handle_create(file, &shmem->base, handle); ++ if (!ret) ++ *size = bo->base.base.size; ++ ++ /* drop reference from allocate - handle holds it now. */ ++ drm_gem_object_put(&shmem->base); ++ ++ return ret; ++} +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_gem.h +@@ -0,0 +1,142 @@ ++/* SPDX-License-Identifier: GPL-2.0 or MIT */ ++/* Copyright 2019 Linaro, Ltd, Rob Herring */ ++/* Copyright 2023 Collabora ltd. */ ++ ++#ifndef __PANTHOR_GEM_H__ ++#define __PANTHOR_GEM_H__ ++ ++#include ++#include ++ ++#include ++#include ++ ++struct panthor_vm; ++ ++/** ++ * struct panthor_gem_object - Driver specific GEM object. ++ */ ++struct panthor_gem_object { ++ /** @base: Inherit from drm_gem_shmem_object. */ ++ struct drm_gem_shmem_object base; ++ ++ /** ++ * @exclusive_vm_root_gem: Root GEM of the exclusive VM this GEM object ++ * is attached to. ++ * ++ * If @exclusive_vm_root_gem != NULL, any attempt to bind the GEM to a ++ * different VM will fail. ++ * ++ * All FW memory objects have this field set to the root GEM of the MCU ++ * VM. ++ */ ++ struct drm_gem_object *exclusive_vm_root_gem; ++ ++ /** ++ * @gpuva_list_lock: Custom GPUVA lock. ++ * ++ * Used to protect insertion of drm_gpuva elements to the ++ * drm_gem_object.gpuva.list list. ++ * ++ * We can't use the GEM resv for that, because drm_gpuva_link() is ++ * called in a dma-signaling path, where we're not allowed to take ++ * resv locks. ++ */ ++ struct mutex gpuva_list_lock; ++ ++ /** @flags: Combination of drm_panthor_bo_flags flags. */ ++ u32 flags; ++}; ++ ++/** ++ * struct panthor_kernel_bo - Kernel buffer object. ++ * ++ * These objects are only manipulated by the kernel driver and not ++ * directly exposed to the userspace. The GPU address of a kernel ++ * BO might be passed to userspace though. ++ */ ++struct panthor_kernel_bo { ++ /** ++ * @obj: The GEM object backing this kernel buffer object. ++ */ ++ struct drm_gem_object *obj; ++ ++ /** ++ * @va_node: VA space allocated to this GEM. ++ */ ++ struct drm_mm_node va_node; ++ ++ /** ++ * @kmap: Kernel CPU mapping of @gem. ++ */ ++ void *kmap; ++}; ++ ++static inline ++struct panthor_gem_object *to_panthor_bo(struct drm_gem_object *obj) ++{ ++ return container_of(to_drm_gem_shmem_obj(obj), struct panthor_gem_object, base); ++} ++ ++struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t size); ++ ++struct drm_gem_object * ++panthor_gem_prime_import_sg_table(struct drm_device *ddev, ++ struct dma_buf_attachment *attach, ++ struct sg_table *sgt); ++ ++int ++panthor_gem_create_with_handle(struct drm_file *file, ++ struct drm_device *ddev, ++ struct panthor_vm *exclusive_vm, ++ u64 *size, u32 flags, uint32_t *handle); ++ ++static inline u64 ++panthor_kernel_bo_gpuva(struct panthor_kernel_bo *bo) ++{ ++ return bo->va_node.start; ++} ++ ++static inline size_t ++panthor_kernel_bo_size(struct panthor_kernel_bo *bo) ++{ ++ return bo->obj->size; ++} ++ ++static inline int ++panthor_kernel_bo_vmap(struct panthor_kernel_bo *bo) ++{ ++ struct iosys_map map; ++ int ret; ++ ++ if (bo->kmap) ++ return 0; ++ ++ ret = drm_gem_vmap_unlocked(bo->obj, &map); ++ if (ret) ++ return ret; ++ ++ bo->kmap = map.vaddr; ++ return 0; ++} ++ ++static inline void ++panthor_kernel_bo_vunmap(struct panthor_kernel_bo *bo) ++{ ++ if (bo->kmap) { ++ struct iosys_map map = IOSYS_MAP_INIT_VADDR(bo->kmap); ++ ++ drm_gem_vunmap_unlocked(bo->obj, &map); ++ bo->kmap = NULL; ++ } ++} ++ ++struct panthor_kernel_bo * ++panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm, ++ size_t size, u32 bo_flags, u32 vm_map_flags, ++ u64 gpu_va); ++ ++void panthor_kernel_bo_destroy(struct panthor_vm *vm, ++ struct panthor_kernel_bo *bo); ++ ++#endif /* __PANTHOR_GEM_H__ */ diff --git a/patches-6.6/034-35-v6.10-drm-panthor-Add-the-devfreq-logical-block.patch b/patches-6.6/034-35-v6.10-drm-panthor-Add-the-devfreq-logical-block.patch new file mode 100644 index 0000000..be7ac4a --- /dev/null +++ b/patches-6.6/034-35-v6.10-drm-panthor-Add-the-devfreq-logical-block.patch @@ -0,0 +1,356 @@ +From fac9b22df4b1108f7fa5a087a77f922489861484 Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Thu, 29 Feb 2024 17:22:20 +0100 +Subject: [PATCH] drm/panthor: Add the devfreq logical block +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Every thing related to devfreq in placed in panthor_devfreq.c, and +helpers that can be called by other logical blocks are exposed through +panthor_devfreq.h. + +This implementation is loosely based on the panfrost implementation, +the only difference being that we don't count device users, because +the idle/active state will be managed by the scheduler logic. + +v6: +- Add Maxime's and Heiko's acks +- Keep header inclusion alphabetically ordered + +v4: +- Add Clément's A-b for the relicensing + +v3: +- Add acks for the MIT/GPL2 relicensing + +v2: +- Added in v2 + +Cc: Clément Péron # MIT+GPL2 relicensing +Reviewed-by: Steven Price +Signed-off-by: Boris Brezillon +Acked-by: Steven Price # MIT+GPL2 relicensing,Arm +Acked-by: Grant Likely # MIT+GPL2 relicensing,Linaro +Acked-by: Boris Brezillon # MIT+GPL2 relicensing,Collabora +Acked-by: Clément Péron # MIT+GPL2 relicensing +Acked-by: Maxime Ripard +Acked-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-7-boris.brezillon@collabora.com +--- + drivers/gpu/drm/panthor/panthor_devfreq.c | 283 ++++++++++++++++++++++ + drivers/gpu/drm/panthor/panthor_devfreq.h | 21 ++ + 2 files changed, 304 insertions(+) + create mode 100644 drivers/gpu/drm/panthor/panthor_devfreq.c + create mode 100644 drivers/gpu/drm/panthor/panthor_devfreq.h + +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_devfreq.c +@@ -0,0 +1,283 @@ ++// SPDX-License-Identifier: GPL-2.0 or MIT ++/* Copyright 2019 Collabora ltd. */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include "panthor_devfreq.h" ++#include "panthor_device.h" ++ ++/** ++ * struct panthor_devfreq - Device frequency management ++ */ ++struct panthor_devfreq { ++ /** @devfreq: devfreq device. */ ++ struct devfreq *devfreq; ++ ++ /** @gov_data: Governor data. */ ++ struct devfreq_simple_ondemand_data gov_data; ++ ++ /** @busy_time: Busy time. */ ++ ktime_t busy_time; ++ ++ /** @idle_time: Idle time. */ ++ ktime_t idle_time; ++ ++ /** @time_last_update: Last update time. */ ++ ktime_t time_last_update; ++ ++ /** @last_busy_state: True if the GPU was busy last time we updated the state. */ ++ bool last_busy_state; ++ ++ /* ++ * @lock: Lock used to protect busy_time, idle_time, time_last_update and ++ * last_busy_state. ++ * ++ * These fields can be accessed concurrently by panthor_devfreq_get_dev_status() ++ * and panthor_devfreq_record_{busy,idle}(). ++ */ ++ spinlock_t lock; ++}; ++ ++static void panthor_devfreq_update_utilization(struct panthor_devfreq *pdevfreq) ++{ ++ ktime_t now, last; ++ ++ now = ktime_get(); ++ last = pdevfreq->time_last_update; ++ ++ if (pdevfreq->last_busy_state) ++ pdevfreq->busy_time += ktime_sub(now, last); ++ else ++ pdevfreq->idle_time += ktime_sub(now, last); ++ ++ pdevfreq->time_last_update = now; ++} ++ ++static int panthor_devfreq_target(struct device *dev, unsigned long *freq, ++ u32 flags) ++{ ++ struct dev_pm_opp *opp; ++ ++ opp = devfreq_recommended_opp(dev, freq, flags); ++ if (IS_ERR(opp)) ++ return PTR_ERR(opp); ++ dev_pm_opp_put(opp); ++ ++ return dev_pm_opp_set_rate(dev, *freq); ++} ++ ++static void panthor_devfreq_reset(struct panthor_devfreq *pdevfreq) ++{ ++ pdevfreq->busy_time = 0; ++ pdevfreq->idle_time = 0; ++ pdevfreq->time_last_update = ktime_get(); ++} ++ ++static int panthor_devfreq_get_dev_status(struct device *dev, ++ struct devfreq_dev_status *status) ++{ ++ struct panthor_device *ptdev = dev_get_drvdata(dev); ++ struct panthor_devfreq *pdevfreq = ptdev->devfreq; ++ unsigned long irqflags; ++ ++ status->current_frequency = clk_get_rate(ptdev->clks.core); ++ ++ spin_lock_irqsave(&pdevfreq->lock, irqflags); ++ ++ panthor_devfreq_update_utilization(pdevfreq); ++ ++ status->total_time = ktime_to_ns(ktime_add(pdevfreq->busy_time, ++ pdevfreq->idle_time)); ++ ++ status->busy_time = ktime_to_ns(pdevfreq->busy_time); ++ ++ panthor_devfreq_reset(pdevfreq); ++ ++ spin_unlock_irqrestore(&pdevfreq->lock, irqflags); ++ ++ drm_dbg(&ptdev->base, "busy %lu total %lu %lu %% freq %lu MHz\n", ++ status->busy_time, status->total_time, ++ status->busy_time / (status->total_time / 100), ++ status->current_frequency / 1000 / 1000); ++ ++ return 0; ++} ++ ++static struct devfreq_dev_profile panthor_devfreq_profile = { ++ .timer = DEVFREQ_TIMER_DELAYED, ++ .polling_ms = 50, /* ~3 frames */ ++ .target = panthor_devfreq_target, ++ .get_dev_status = panthor_devfreq_get_dev_status, ++}; ++ ++int panthor_devfreq_init(struct panthor_device *ptdev) ++{ ++ /* There's actually 2 regulators (mali and sram), but the OPP core only ++ * supports one. ++ * ++ * We assume the sram regulator is coupled with the mali one and let ++ * the coupling logic deal with voltage updates. ++ */ ++ static const char * const reg_names[] = { "mali", NULL }; ++ struct thermal_cooling_device *cooling; ++ struct device *dev = ptdev->base.dev; ++ struct panthor_devfreq *pdevfreq; ++ struct dev_pm_opp *opp; ++ unsigned long cur_freq; ++ int ret; ++ ++ pdevfreq = drmm_kzalloc(&ptdev->base, sizeof(*ptdev->devfreq), GFP_KERNEL); ++ if (!pdevfreq) ++ return -ENOMEM; ++ ++ ptdev->devfreq = pdevfreq; ++ ++ ret = devm_pm_opp_set_regulators(dev, reg_names); ++ if (ret) { ++ if (ret != -EPROBE_DEFER) ++ DRM_DEV_ERROR(dev, "Couldn't set OPP regulators\n"); ++ ++ return ret; ++ } ++ ++ ret = devm_pm_opp_of_add_table(dev); ++ if (ret) ++ return ret; ++ ++ spin_lock_init(&pdevfreq->lock); ++ ++ panthor_devfreq_reset(pdevfreq); ++ ++ cur_freq = clk_get_rate(ptdev->clks.core); ++ ++ opp = devfreq_recommended_opp(dev, &cur_freq, 0); ++ if (IS_ERR(opp)) ++ return PTR_ERR(opp); ++ ++ panthor_devfreq_profile.initial_freq = cur_freq; ++ ++ /* Regulator coupling only takes care of synchronizing/balancing voltage ++ * updates, but the coupled regulator needs to be enabled manually. ++ * ++ * We use devm_regulator_get_enable_optional() and keep the sram supply ++ * enabled until the device is removed, just like we do for the mali ++ * supply, which is enabled when dev_pm_opp_set_opp(dev, opp) is called, ++ * and disabled when the opp_table is torn down, using the devm action. ++ * ++ * If we really care about disabling regulators on suspend, we should: ++ * - use devm_regulator_get_optional() here ++ * - call dev_pm_opp_set_opp(dev, NULL) before leaving this function ++ * (this disables the regulator passed to the OPP layer) ++ * - call dev_pm_opp_set_opp(dev, NULL) and ++ * regulator_disable(ptdev->regulators.sram) in ++ * panthor_devfreq_suspend() ++ * - call dev_pm_opp_set_opp(dev, default_opp) and ++ * regulator_enable(ptdev->regulators.sram) in ++ * panthor_devfreq_resume() ++ * ++ * But without knowing if it's beneficial or not (in term of power ++ * consumption), or how much it slows down the suspend/resume steps, ++ * let's just keep regulators enabled for the device lifetime. ++ */ ++ ret = devm_regulator_get_enable_optional(dev, "sram"); ++ if (ret && ret != -ENODEV) { ++ if (ret != -EPROBE_DEFER) ++ DRM_DEV_ERROR(dev, "Couldn't retrieve/enable sram supply\n"); ++ return ret; ++ } ++ ++ /* ++ * Set the recommend OPP this will enable and configure the regulator ++ * if any and will avoid a switch off by regulator_late_cleanup() ++ */ ++ ret = dev_pm_opp_set_opp(dev, opp); ++ if (ret) { ++ DRM_DEV_ERROR(dev, "Couldn't set recommended OPP\n"); ++ return ret; ++ } ++ ++ dev_pm_opp_put(opp); ++ ++ /* ++ * Setup default thresholds for the simple_ondemand governor. ++ * The values are chosen based on experiments. ++ */ ++ pdevfreq->gov_data.upthreshold = 45; ++ pdevfreq->gov_data.downdifferential = 5; ++ ++ pdevfreq->devfreq = devm_devfreq_add_device(dev, &panthor_devfreq_profile, ++ DEVFREQ_GOV_SIMPLE_ONDEMAND, ++ &pdevfreq->gov_data); ++ if (IS_ERR(pdevfreq->devfreq)) { ++ DRM_DEV_ERROR(dev, "Couldn't initialize GPU devfreq\n"); ++ ret = PTR_ERR(pdevfreq->devfreq); ++ pdevfreq->devfreq = NULL; ++ return ret; ++ } ++ ++ cooling = devfreq_cooling_em_register(pdevfreq->devfreq, NULL); ++ if (IS_ERR(cooling)) ++ DRM_DEV_INFO(dev, "Failed to register cooling device\n"); ++ ++ return 0; ++} ++ ++int panthor_devfreq_resume(struct panthor_device *ptdev) ++{ ++ struct panthor_devfreq *pdevfreq = ptdev->devfreq; ++ ++ if (!pdevfreq->devfreq) ++ return 0; ++ ++ panthor_devfreq_reset(pdevfreq); ++ ++ return devfreq_resume_device(pdevfreq->devfreq); ++} ++ ++int panthor_devfreq_suspend(struct panthor_device *ptdev) ++{ ++ struct panthor_devfreq *pdevfreq = ptdev->devfreq; ++ ++ if (!pdevfreq->devfreq) ++ return 0; ++ ++ return devfreq_suspend_device(pdevfreq->devfreq); ++} ++ ++void panthor_devfreq_record_busy(struct panthor_device *ptdev) ++{ ++ struct panthor_devfreq *pdevfreq = ptdev->devfreq; ++ unsigned long irqflags; ++ ++ if (!pdevfreq->devfreq) ++ return; ++ ++ spin_lock_irqsave(&pdevfreq->lock, irqflags); ++ ++ panthor_devfreq_update_utilization(pdevfreq); ++ pdevfreq->last_busy_state = true; ++ ++ spin_unlock_irqrestore(&pdevfreq->lock, irqflags); ++} ++ ++void panthor_devfreq_record_idle(struct panthor_device *ptdev) ++{ ++ struct panthor_devfreq *pdevfreq = ptdev->devfreq; ++ unsigned long irqflags; ++ ++ if (!pdevfreq->devfreq) ++ return; ++ ++ spin_lock_irqsave(&pdevfreq->lock, irqflags); ++ ++ panthor_devfreq_update_utilization(pdevfreq); ++ pdevfreq->last_busy_state = false; ++ ++ spin_unlock_irqrestore(&pdevfreq->lock, irqflags); ++} +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_devfreq.h +@@ -0,0 +1,21 @@ ++/* SPDX-License-Identifier: GPL-2.0 or MIT */ ++/* Copyright 2019 Collabora ltd. */ ++ ++#ifndef __PANTHOR_DEVFREQ_H__ ++#define __PANTHOR_DEVFREQ_H__ ++ ++struct devfreq; ++struct thermal_cooling_device; ++ ++struct panthor_device; ++struct panthor_devfreq; ++ ++int panthor_devfreq_init(struct panthor_device *ptdev); ++ ++int panthor_devfreq_resume(struct panthor_device *ptdev); ++int panthor_devfreq_suspend(struct panthor_device *ptdev); ++ ++void panthor_devfreq_record_busy(struct panthor_device *ptdev); ++void panthor_devfreq_record_idle(struct panthor_device *ptdev); ++ ++#endif /* __PANTHOR_DEVFREQ_H__ */ diff --git a/patches-6.6/034-36-v6.10-drm-panthor-Add-the-MMU-VM-logical-block.patch b/patches-6.6/034-36-v6.10-drm-panthor-Add-the-MMU-VM-logical-block.patch new file mode 100644 index 0000000..097c09a --- /dev/null +++ b/patches-6.6/034-36-v6.10-drm-panthor-Add-the-MMU-VM-logical-block.patch @@ -0,0 +1,2975 @@ +From 647810ec247641eb5aec8caef818919a4518a0b1 Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Thu, 29 Feb 2024 17:22:21 +0100 +Subject: [PATCH] drm/panthor: Add the MMU/VM logical block + +MMU and VM management is related and placed in the same source file. + +Page table updates are delegated to the io-pgtable-arm driver that's in +the iommu subsystem. + +The VM management logic is based on drm_gpuva_mgr, and is assuming the +VA space is mostly managed by the usermode driver, except for a reserved +portion of this VA-space that's used for kernel objects (like the heap +contexts/chunks). + +Both asynchronous and synchronous VM operations are supported, and +internal helpers are exposed to allow other logical blocks to map their +buffers in the GPU VA space. + +There's one VM_BIND queue per-VM (meaning the Vulkan driver can only +expose one sparse-binding queue), and this bind queue is managed with +a 1:1 drm_sched_entity:drm_gpu_scheduler, such that each VM gets its own +independent execution queue, avoiding VM operation serialization at the +device level (things are still serialized at the VM level). + +The rest is just implementation details that are hopefully well explained +in the documentation. + +v6: +- Add Maxime's and Heiko's acks +- Add Steve's R-b +- Adjust the TRANSCFG value to account for SW VA space limitation on + 32-bit systems +- Keep header inclusion alphabetically ordered + +v5: +- Fix a double panthor_vm_cleanup_op_ctx() call +- Fix a race between panthor_vm_prepare_map_op_ctx() and + panthor_vm_bo_put() +- Fix panthor_vm_pool_destroy_vm() kernel doc +- Fix paddr adjustment in panthor_vm_map_pages() +- Fix bo_offset calculation in panthor_vm_get_bo_for_va() + +v4: +- Add an helper to return the VM state +- Check drmm_mutex_init() return code +- Remove the VM from the AS reclaim list when panthor_vm_active() is + called +- Count the number of active VM users instead of considering there's + at most one user (several scheduling groups can point to the same + vM) +- Pre-allocate a VMA object for unmap operations (unmaps can trigger + a sm_step_remap() call) +- Check vm->root_page_table instead of vm->pgtbl_ops to detect if + the io-pgtable is trying to allocate the root page table +- Don't memset() the va_node in panthor_vm_alloc_va(), make it a + caller requirement +- Fix the kernel doc in a few places +- Drop the panthor_vm::base offset constraint and modify + panthor_vm_put() to explicitly check for a NULL value +- Fix unbalanced vm_bo refcount in panthor_gpuva_sm_step_remap() +- Drop stale comments about the shared_bos list +- Patch mmu_features::va_bits on 32-bit builds to reflect the + io_pgtable limitation and let the UMD know about it + +v3: +- Add acks for the MIT/GPL2 relicensing +- Propagate MMU faults to the scheduler +- Move pages pinning/unpinning out of the dma_signalling path +- Fix 32-bit support +- Rework the user/kernel VA range calculation +- Make the auto-VA range explicit (auto-VA range doesn't cover the full + kernel-VA range on the MCU VM) +- Let callers of panthor_vm_alloc_va() allocate the drm_mm_node + (embedded in panthor_kernel_bo now) +- Adjust things to match the latest drm_gpuvm changes (extobj tracking, + resv prep and more) +- Drop the per-AS lock and use slots_lock (fixes a race on vm->as.id) +- Set as.id to -1 when reusing an address space from the LRU list +- Drop misleading comment about page faults +- Remove check for irq being assigned in panthor_mmu_unplug() + +Co-developed-by: Steven Price +Signed-off-by: Steven Price +Signed-off-by: Boris Brezillon +Acked-by: Steven Price # MIT+GPL2 relicensing,Arm +Acked-by: Grant Likely # MIT+GPL2 relicensing,Linaro +Acked-by: Boris Brezillon # MIT+GPL2 relicensing,Collabora +Reviewed-by: Steven Price +Acked-by: Maxime Ripard +Acked-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-8-boris.brezillon@collabora.com +--- + drivers/gpu/drm/panthor/panthor_mmu.c | 2768 +++++++++++++++++++++++++ + drivers/gpu/drm/panthor/panthor_mmu.h | 102 + + 2 files changed, 2870 insertions(+) + create mode 100644 drivers/gpu/drm/panthor/panthor_mmu.c + create mode 100644 drivers/gpu/drm/panthor/panthor_mmu.h + +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_mmu.c +@@ -0,0 +1,2768 @@ ++// SPDX-License-Identifier: GPL-2.0 or MIT ++/* Copyright 2019 Linaro, Ltd, Rob Herring */ ++/* Copyright 2023 Collabora ltd. */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "panthor_device.h" ++#include "panthor_gem.h" ++#include "panthor_heap.h" ++#include "panthor_mmu.h" ++#include "panthor_regs.h" ++#include "panthor_sched.h" ++ ++#define MAX_AS_SLOTS 32 ++ ++struct panthor_vm; ++ ++/** ++ * struct panthor_as_slot - Address space slot ++ */ ++struct panthor_as_slot { ++ /** @vm: VM bound to this slot. NULL is no VM is bound. */ ++ struct panthor_vm *vm; ++}; ++ ++/** ++ * struct panthor_mmu - MMU related data ++ */ ++struct panthor_mmu { ++ /** @irq: The MMU irq. */ ++ struct panthor_irq irq; ++ ++ /** @as: Address space related fields. ++ * ++ * The GPU has a limited number of address spaces (AS) slots, forcing ++ * us to re-assign them to re-assign slots on-demand. ++ */ ++ struct { ++ /** @slots_lock: Lock protecting access to all other AS fields. */ ++ struct mutex slots_lock; ++ ++ /** @alloc_mask: Bitmask encoding the allocated slots. */ ++ unsigned long alloc_mask; ++ ++ /** @faulty_mask: Bitmask encoding the faulty slots. */ ++ unsigned long faulty_mask; ++ ++ /** @slots: VMs currently bound to the AS slots. */ ++ struct panthor_as_slot slots[MAX_AS_SLOTS]; ++ ++ /** ++ * @lru_list: List of least recently used VMs. ++ * ++ * We use this list to pick a VM to evict when all slots are ++ * used. ++ * ++ * There should be no more active VMs than there are AS slots, ++ * so this LRU is just here to keep VMs bound until there's ++ * a need to release a slot, thus avoid unnecessary TLB/cache ++ * flushes. ++ */ ++ struct list_head lru_list; ++ } as; ++ ++ /** @vm: VMs management fields */ ++ struct { ++ /** @lock: Lock protecting access to list. */ ++ struct mutex lock; ++ ++ /** @list: List containing all VMs. */ ++ struct list_head list; ++ ++ /** @reset_in_progress: True if a reset is in progress. */ ++ bool reset_in_progress; ++ ++ /** @wq: Workqueue used for the VM_BIND queues. */ ++ struct workqueue_struct *wq; ++ } vm; ++}; ++ ++/** ++ * struct panthor_vm_pool - VM pool object ++ */ ++struct panthor_vm_pool { ++ /** @xa: Array used for VM handle tracking. */ ++ struct xarray xa; ++}; ++ ++/** ++ * struct panthor_vma - GPU mapping object ++ * ++ * This is used to track GEM mappings in GPU space. ++ */ ++struct panthor_vma { ++ /** @base: Inherits from drm_gpuva. */ ++ struct drm_gpuva base; ++ ++ /** @node: Used to implement deferred release of VMAs. */ ++ struct list_head node; ++ ++ /** ++ * @flags: Combination of drm_panthor_vm_bind_op_flags. ++ * ++ * Only map related flags are accepted. ++ */ ++ u32 flags; ++}; ++ ++/** ++ * struct panthor_vm_op_ctx - VM operation context ++ * ++ * With VM operations potentially taking place in a dma-signaling path, we ++ * need to make sure everything that might require resource allocation is ++ * pre-allocated upfront. This is what this operation context is far. ++ * ++ * We also collect resources that have been freed, so we can release them ++ * asynchronously, and let the VM_BIND scheduler process the next VM_BIND ++ * request. ++ */ ++struct panthor_vm_op_ctx { ++ /** @rsvd_page_tables: Pages reserved for the MMU page table update. */ ++ struct { ++ /** @count: Number of pages reserved. */ ++ u32 count; ++ ++ /** @ptr: Point to the first unused page in the @pages table. */ ++ u32 ptr; ++ ++ /** ++ * @page: Array of pages that can be used for an MMU page table update. ++ * ++ * After an VM operation, there might be free pages left in this array. ++ * They should be returned to the pt_cache as part of the op_ctx cleanup. ++ */ ++ void **pages; ++ } rsvd_page_tables; ++ ++ /** ++ * @preallocated_vmas: Pre-allocated VMAs to handle the remap case. ++ * ++ * Partial unmap requests or map requests overlapping existing mappings will ++ * trigger a remap call, which need to register up to three panthor_vma objects ++ * (one for the new mapping, and two for the previous and next mappings). ++ */ ++ struct panthor_vma *preallocated_vmas[3]; ++ ++ /** @flags: Combination of drm_panthor_vm_bind_op_flags. */ ++ u32 flags; ++ ++ /** @va: Virtual range targeted by the VM operation. */ ++ struct { ++ /** @addr: Start address. */ ++ u64 addr; ++ ++ /** @range: Range size. */ ++ u64 range; ++ } va; ++ ++ /** ++ * @returned_vmas: List of panthor_vma objects returned after a VM operation. ++ * ++ * For unmap operations, this will contain all VMAs that were covered by the ++ * specified VA range. ++ * ++ * For map operations, this will contain all VMAs that previously mapped to ++ * the specified VA range. ++ * ++ * Those VMAs, and the resources they point to will be released as part of ++ * the op_ctx cleanup operation. ++ */ ++ struct list_head returned_vmas; ++ ++ /** @map: Fields specific to a map operation. */ ++ struct { ++ /** @vm_bo: Buffer object to map. */ ++ struct drm_gpuvm_bo *vm_bo; ++ ++ /** @bo_offset: Offset in the buffer object. */ ++ u64 bo_offset; ++ ++ /** ++ * @sgt: sg-table pointing to pages backing the GEM object. ++ * ++ * This is gathered at job creation time, such that we don't have ++ * to allocate in ::run_job(). ++ */ ++ struct sg_table *sgt; ++ ++ /** ++ * @new_vma: The new VMA object that will be inserted to the VA tree. ++ */ ++ struct panthor_vma *new_vma; ++ } map; ++}; ++ ++/** ++ * struct panthor_vm - VM object ++ * ++ * A VM is an object representing a GPU (or MCU) virtual address space. ++ * It embeds the MMU page table for this address space, a tree containing ++ * all the virtual mappings of GEM objects, and other things needed to manage ++ * the VM. ++ * ++ * Except for the MCU VM, which is managed by the kernel, all other VMs are ++ * created by userspace and mostly managed by userspace, using the ++ * %DRM_IOCTL_PANTHOR_VM_BIND ioctl. ++ * ++ * A portion of the virtual address space is reserved for kernel objects, ++ * like heap chunks, and userspace gets to decide how much of the virtual ++ * address space is left to the kernel (half of the virtual address space ++ * by default). ++ */ ++struct panthor_vm { ++ /** ++ * @base: Inherit from drm_gpuvm. ++ * ++ * We delegate all the VA management to the common drm_gpuvm framework ++ * and only implement hooks to update the MMU page table. ++ */ ++ struct drm_gpuvm base; ++ ++ /** ++ * @sched: Scheduler used for asynchronous VM_BIND request. ++ * ++ * We use a 1:1 scheduler here. ++ */ ++ struct drm_gpu_scheduler sched; ++ ++ /** ++ * @entity: Scheduling entity representing the VM_BIND queue. ++ * ++ * There's currently one bind queue per VM. It doesn't make sense to ++ * allow more given the VM operations are serialized anyway. ++ */ ++ struct drm_sched_entity entity; ++ ++ /** @ptdev: Device. */ ++ struct panthor_device *ptdev; ++ ++ /** @memattr: Value to program to the AS_MEMATTR register. */ ++ u64 memattr; ++ ++ /** @pgtbl_ops: Page table operations. */ ++ struct io_pgtable_ops *pgtbl_ops; ++ ++ /** @root_page_table: Stores the root page table pointer. */ ++ void *root_page_table; ++ ++ /** ++ * @op_lock: Lock used to serialize operations on a VM. ++ * ++ * The serialization of jobs queued to the VM_BIND queue is already ++ * taken care of by drm_sched, but we need to serialize synchronous ++ * and asynchronous VM_BIND request. This is what this lock is for. ++ */ ++ struct mutex op_lock; ++ ++ /** ++ * @op_ctx: The context attached to the currently executing VM operation. ++ * ++ * NULL when no operation is in progress. ++ */ ++ struct panthor_vm_op_ctx *op_ctx; ++ ++ /** ++ * @mm: Memory management object representing the auto-VA/kernel-VA. ++ * ++ * Used to auto-allocate VA space for kernel-managed objects (tiler ++ * heaps, ...). ++ * ++ * For the MCU VM, this is managing the VA range that's used to map ++ * all shared interfaces. ++ * ++ * For user VMs, the range is specified by userspace, and must not ++ * exceed half of the VA space addressable. ++ */ ++ struct drm_mm mm; ++ ++ /** @mm_lock: Lock protecting the @mm field. */ ++ struct mutex mm_lock; ++ ++ /** @kernel_auto_va: Automatic VA-range for kernel BOs. */ ++ struct { ++ /** @start: Start of the automatic VA-range for kernel BOs. */ ++ u64 start; ++ ++ /** @size: Size of the automatic VA-range for kernel BOs. */ ++ u64 end; ++ } kernel_auto_va; ++ ++ /** @as: Address space related fields. */ ++ struct { ++ /** ++ * @id: ID of the address space this VM is bound to. ++ * ++ * A value of -1 means the VM is inactive/not bound. ++ */ ++ int id; ++ ++ /** @active_cnt: Number of active users of this VM. */ ++ refcount_t active_cnt; ++ ++ /** ++ * @lru_node: Used to instead the VM in the panthor_mmu::as::lru_list. ++ * ++ * Active VMs should not be inserted in the LRU list. ++ */ ++ struct list_head lru_node; ++ } as; ++ ++ /** ++ * @heaps: Tiler heap related fields. ++ */ ++ struct { ++ /** ++ * @pool: The heap pool attached to this VM. ++ * ++ * Will stay NULL until someone creates a heap context on this VM. ++ */ ++ struct panthor_heap_pool *pool; ++ ++ /** @lock: Lock used to protect access to @pool. */ ++ struct mutex lock; ++ } heaps; ++ ++ /** @node: Used to insert the VM in the panthor_mmu::vm::list. */ ++ struct list_head node; ++ ++ /** @for_mcu: True if this is the MCU VM. */ ++ bool for_mcu; ++ ++ /** ++ * @destroyed: True if the VM was destroyed. ++ * ++ * No further bind requests should be queued to a destroyed VM. ++ */ ++ bool destroyed; ++ ++ /** ++ * @unusable: True if the VM has turned unusable because something ++ * bad happened during an asynchronous request. ++ * ++ * We don't try to recover from such failures, because this implies ++ * informing userspace about the specific operation that failed, and ++ * hoping the userspace driver can replay things from there. This all ++ * sounds very complicated for little gain. ++ * ++ * Instead, we should just flag the VM as unusable, and fail any ++ * further request targeting this VM. ++ * ++ * We also provide a way to query a VM state, so userspace can destroy ++ * it and create a new one. ++ * ++ * As an analogy, this would be mapped to a VK_ERROR_DEVICE_LOST ++ * situation, where the logical device needs to be re-created. ++ */ ++ bool unusable; ++ ++ /** ++ * @unhandled_fault: Unhandled fault happened. ++ * ++ * This should be reported to the scheduler, and the queue/group be ++ * flagged as faulty as a result. ++ */ ++ bool unhandled_fault; ++}; ++ ++/** ++ * struct panthor_vm_bind_job - VM bind job ++ */ ++struct panthor_vm_bind_job { ++ /** @base: Inherit from drm_sched_job. */ ++ struct drm_sched_job base; ++ ++ /** @refcount: Reference count. */ ++ struct kref refcount; ++ ++ /** @cleanup_op_ctx_work: Work used to cleanup the VM operation context. */ ++ struct work_struct cleanup_op_ctx_work; ++ ++ /** @vm: VM targeted by the VM operation. */ ++ struct panthor_vm *vm; ++ ++ /** @ctx: Operation context. */ ++ struct panthor_vm_op_ctx ctx; ++}; ++ ++/** ++ * @pt_cache: Cache used to allocate MMU page tables. ++ * ++ * The pre-allocation pattern forces us to over-allocate to plan for ++ * the worst case scenario, and return the pages we didn't use. ++ * ++ * Having a kmem_cache allows us to speed allocations. ++ */ ++static struct kmem_cache *pt_cache; ++ ++/** ++ * alloc_pt() - Custom page table allocator ++ * @cookie: Cookie passed at page table allocation time. ++ * @size: Size of the page table. This size should be fixed, ++ * and determined at creation time based on the granule size. ++ * @gfp: GFP flags. ++ * ++ * We want a custom allocator so we can use a cache for page table ++ * allocations and amortize the cost of the over-reservation that's ++ * done to allow asynchronous VM operations. ++ * ++ * Return: non-NULL on success, NULL if the allocation failed for any ++ * reason. ++ */ ++static void *alloc_pt(void *cookie, size_t size, gfp_t gfp) ++{ ++ struct panthor_vm *vm = cookie; ++ void *page; ++ ++ /* Allocation of the root page table happening during init. */ ++ if (unlikely(!vm->root_page_table)) { ++ struct page *p; ++ ++ drm_WARN_ON(&vm->ptdev->base, vm->op_ctx); ++ p = alloc_pages_node(dev_to_node(vm->ptdev->base.dev), ++ gfp | __GFP_ZERO, get_order(size)); ++ page = p ? page_address(p) : NULL; ++ vm->root_page_table = page; ++ return page; ++ } ++ ++ /* We're not supposed to have anything bigger than 4k here, because we picked a ++ * 4k granule size at init time. ++ */ ++ if (drm_WARN_ON(&vm->ptdev->base, size != SZ_4K)) ++ return NULL; ++ ++ /* We must have some op_ctx attached to the VM and it must have at least one ++ * free page. ++ */ ++ if (drm_WARN_ON(&vm->ptdev->base, !vm->op_ctx) || ++ drm_WARN_ON(&vm->ptdev->base, ++ vm->op_ctx->rsvd_page_tables.ptr >= vm->op_ctx->rsvd_page_tables.count)) ++ return NULL; ++ ++ page = vm->op_ctx->rsvd_page_tables.pages[vm->op_ctx->rsvd_page_tables.ptr++]; ++ memset(page, 0, SZ_4K); ++ ++ /* Page table entries don't use virtual addresses, which trips out ++ * kmemleak. kmemleak_alloc_phys() might work, but physical addresses ++ * are mixed with other fields, and I fear kmemleak won't detect that ++ * either. ++ * ++ * Let's just ignore memory passed to the page-table driver for now. ++ */ ++ kmemleak_ignore(page); ++ return page; ++} ++ ++/** ++ * @free_pt() - Custom page table free function ++ * @cookie: Cookie passed at page table allocation time. ++ * @data: Page table to free. ++ * @size: Size of the page table. This size should be fixed, ++ * and determined at creation time based on the granule size. ++ */ ++static void free_pt(void *cookie, void *data, size_t size) ++{ ++ struct panthor_vm *vm = cookie; ++ ++ if (unlikely(vm->root_page_table == data)) { ++ free_pages((unsigned long)data, get_order(size)); ++ vm->root_page_table = NULL; ++ return; ++ } ++ ++ if (drm_WARN_ON(&vm->ptdev->base, size != SZ_4K)) ++ return; ++ ++ /* Return the page to the pt_cache. */ ++ kmem_cache_free(pt_cache, data); ++} ++ ++static int wait_ready(struct panthor_device *ptdev, u32 as_nr) ++{ ++ int ret; ++ u32 val; ++ ++ /* Wait for the MMU status to indicate there is no active command, in ++ * case one is pending. ++ */ ++ ret = readl_relaxed_poll_timeout_atomic(ptdev->iomem + AS_STATUS(as_nr), ++ val, !(val & AS_STATUS_AS_ACTIVE), ++ 10, 100000); ++ ++ if (ret) { ++ panthor_device_schedule_reset(ptdev); ++ drm_err(&ptdev->base, "AS_ACTIVE bit stuck\n"); ++ } ++ ++ return ret; ++} ++ ++static int write_cmd(struct panthor_device *ptdev, u32 as_nr, u32 cmd) ++{ ++ int status; ++ ++ /* write AS_COMMAND when MMU is ready to accept another command */ ++ status = wait_ready(ptdev, as_nr); ++ if (!status) ++ gpu_write(ptdev, AS_COMMAND(as_nr), cmd); ++ ++ return status; ++} ++ ++static void lock_region(struct panthor_device *ptdev, u32 as_nr, ++ u64 region_start, u64 size) ++{ ++ u8 region_width; ++ u64 region; ++ u64 region_end = region_start + size; ++ ++ if (!size) ++ return; ++ ++ /* ++ * The locked region is a naturally aligned power of 2 block encoded as ++ * log2 minus(1). ++ * Calculate the desired start/end and look for the highest bit which ++ * differs. The smallest naturally aligned block must include this bit ++ * change, the desired region starts with this bit (and subsequent bits) ++ * zeroed and ends with the bit (and subsequent bits) set to one. ++ */ ++ region_width = max(fls64(region_start ^ (region_end - 1)), ++ const_ilog2(AS_LOCK_REGION_MIN_SIZE)) - 1; ++ ++ /* ++ * Mask off the low bits of region_start (which would be ignored by ++ * the hardware anyway) ++ */ ++ region_start &= GENMASK_ULL(63, region_width); ++ ++ region = region_width | region_start; ++ ++ /* Lock the region that needs to be updated */ ++ gpu_write(ptdev, AS_LOCKADDR_LO(as_nr), lower_32_bits(region)); ++ gpu_write(ptdev, AS_LOCKADDR_HI(as_nr), upper_32_bits(region)); ++ write_cmd(ptdev, as_nr, AS_COMMAND_LOCK); ++} ++ ++static int mmu_hw_do_operation_locked(struct panthor_device *ptdev, int as_nr, ++ u64 iova, u64 size, u32 op) ++{ ++ lockdep_assert_held(&ptdev->mmu->as.slots_lock); ++ ++ if (as_nr < 0) ++ return 0; ++ ++ if (op != AS_COMMAND_UNLOCK) ++ lock_region(ptdev, as_nr, iova, size); ++ ++ /* Run the MMU operation */ ++ write_cmd(ptdev, as_nr, op); ++ ++ /* Wait for the flush to complete */ ++ return wait_ready(ptdev, as_nr); ++} ++ ++static int mmu_hw_do_operation(struct panthor_vm *vm, ++ u64 iova, u64 size, u32 op) ++{ ++ struct panthor_device *ptdev = vm->ptdev; ++ int ret; ++ ++ mutex_lock(&ptdev->mmu->as.slots_lock); ++ ret = mmu_hw_do_operation_locked(ptdev, vm->as.id, iova, size, op); ++ mutex_unlock(&ptdev->mmu->as.slots_lock); ++ ++ return ret; ++} ++ ++static int panthor_mmu_as_enable(struct panthor_device *ptdev, u32 as_nr, ++ u64 transtab, u64 transcfg, u64 memattr) ++{ ++ int ret; ++ ++ ret = mmu_hw_do_operation_locked(ptdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM); ++ if (ret) ++ return ret; ++ ++ gpu_write(ptdev, AS_TRANSTAB_LO(as_nr), lower_32_bits(transtab)); ++ gpu_write(ptdev, AS_TRANSTAB_HI(as_nr), upper_32_bits(transtab)); ++ ++ gpu_write(ptdev, AS_MEMATTR_LO(as_nr), lower_32_bits(memattr)); ++ gpu_write(ptdev, AS_MEMATTR_HI(as_nr), upper_32_bits(memattr)); ++ ++ gpu_write(ptdev, AS_TRANSCFG_LO(as_nr), lower_32_bits(transcfg)); ++ gpu_write(ptdev, AS_TRANSCFG_HI(as_nr), upper_32_bits(transcfg)); ++ ++ return write_cmd(ptdev, as_nr, AS_COMMAND_UPDATE); ++} ++ ++static int panthor_mmu_as_disable(struct panthor_device *ptdev, u32 as_nr) ++{ ++ int ret; ++ ++ ret = mmu_hw_do_operation_locked(ptdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM); ++ if (ret) ++ return ret; ++ ++ gpu_write(ptdev, AS_TRANSTAB_LO(as_nr), 0); ++ gpu_write(ptdev, AS_TRANSTAB_HI(as_nr), 0); ++ ++ gpu_write(ptdev, AS_MEMATTR_LO(as_nr), 0); ++ gpu_write(ptdev, AS_MEMATTR_HI(as_nr), 0); ++ ++ gpu_write(ptdev, AS_TRANSCFG_LO(as_nr), AS_TRANSCFG_ADRMODE_UNMAPPED); ++ gpu_write(ptdev, AS_TRANSCFG_HI(as_nr), 0); ++ ++ return write_cmd(ptdev, as_nr, AS_COMMAND_UPDATE); ++} ++ ++static u32 panthor_mmu_fault_mask(struct panthor_device *ptdev, u32 value) ++{ ++ /* Bits 16 to 31 mean REQ_COMPLETE. */ ++ return value & GENMASK(15, 0); ++} ++ ++static u32 panthor_mmu_as_fault_mask(struct panthor_device *ptdev, u32 as) ++{ ++ return BIT(as); ++} ++ ++/** ++ * panthor_vm_has_unhandled_faults() - Check if a VM has unhandled faults ++ * @vm: VM to check. ++ * ++ * Return: true if the VM has unhandled faults, false otherwise. ++ */ ++bool panthor_vm_has_unhandled_faults(struct panthor_vm *vm) ++{ ++ return vm->unhandled_fault; ++} ++ ++/** ++ * panthor_vm_is_unusable() - Check if the VM is still usable ++ * @vm: VM to check. ++ * ++ * Return: true if the VM is unusable, false otherwise. ++ */ ++bool panthor_vm_is_unusable(struct panthor_vm *vm) ++{ ++ return vm->unusable; ++} ++ ++static void panthor_vm_release_as_locked(struct panthor_vm *vm) ++{ ++ struct panthor_device *ptdev = vm->ptdev; ++ ++ lockdep_assert_held(&ptdev->mmu->as.slots_lock); ++ ++ if (drm_WARN_ON(&ptdev->base, vm->as.id < 0)) ++ return; ++ ++ ptdev->mmu->as.slots[vm->as.id].vm = NULL; ++ clear_bit(vm->as.id, &ptdev->mmu->as.alloc_mask); ++ refcount_set(&vm->as.active_cnt, 0); ++ list_del_init(&vm->as.lru_node); ++ vm->as.id = -1; ++} ++ ++/** ++ * panthor_vm_active() - Flag a VM as active ++ * @VM: VM to flag as active. ++ * ++ * Assigns an address space to a VM so it can be used by the GPU/MCU. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_vm_active(struct panthor_vm *vm) ++{ ++ struct panthor_device *ptdev = vm->ptdev; ++ u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features); ++ struct io_pgtable_cfg *cfg = &io_pgtable_ops_to_pgtable(vm->pgtbl_ops)->cfg; ++ int ret = 0, as, cookie; ++ u64 transtab, transcfg; ++ ++ if (!drm_dev_enter(&ptdev->base, &cookie)) ++ return -ENODEV; ++ ++ if (refcount_inc_not_zero(&vm->as.active_cnt)) ++ goto out_dev_exit; ++ ++ mutex_lock(&ptdev->mmu->as.slots_lock); ++ ++ if (refcount_inc_not_zero(&vm->as.active_cnt)) ++ goto out_unlock; ++ ++ as = vm->as.id; ++ if (as >= 0) { ++ /* Unhandled pagefault on this AS, the MMU was disabled. We need to ++ * re-enable the MMU after clearing+unmasking the AS interrupts. ++ */ ++ if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as)) ++ goto out_enable_as; ++ ++ goto out_make_active; ++ } ++ ++ /* Check for a free AS */ ++ if (vm->for_mcu) { ++ drm_WARN_ON(&ptdev->base, ptdev->mmu->as.alloc_mask & BIT(0)); ++ as = 0; ++ } else { ++ as = ffz(ptdev->mmu->as.alloc_mask | BIT(0)); ++ } ++ ++ if (!(BIT(as) & ptdev->gpu_info.as_present)) { ++ struct panthor_vm *lru_vm; ++ ++ lru_vm = list_first_entry_or_null(&ptdev->mmu->as.lru_list, ++ struct panthor_vm, ++ as.lru_node); ++ if (drm_WARN_ON(&ptdev->base, !lru_vm)) { ++ ret = -EBUSY; ++ goto out_unlock; ++ } ++ ++ drm_WARN_ON(&ptdev->base, refcount_read(&lru_vm->as.active_cnt)); ++ as = lru_vm->as.id; ++ panthor_vm_release_as_locked(lru_vm); ++ } ++ ++ /* Assign the free or reclaimed AS to the FD */ ++ vm->as.id = as; ++ set_bit(as, &ptdev->mmu->as.alloc_mask); ++ ptdev->mmu->as.slots[as].vm = vm; ++ ++out_enable_as: ++ transtab = cfg->arm_lpae_s1_cfg.ttbr; ++ transcfg = AS_TRANSCFG_PTW_MEMATTR_WB | ++ AS_TRANSCFG_PTW_RA | ++ AS_TRANSCFG_ADRMODE_AARCH64_4K | ++ AS_TRANSCFG_INA_BITS(55 - va_bits); ++ if (ptdev->coherent) ++ transcfg |= AS_TRANSCFG_PTW_SH_OS; ++ ++ /* If the VM is re-activated, we clear the fault. */ ++ vm->unhandled_fault = false; ++ ++ /* Unhandled pagefault on this AS, clear the fault and re-enable interrupts ++ * before enabling the AS. ++ */ ++ if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as)) { ++ gpu_write(ptdev, MMU_INT_CLEAR, panthor_mmu_as_fault_mask(ptdev, as)); ++ ptdev->mmu->as.faulty_mask &= ~panthor_mmu_as_fault_mask(ptdev, as); ++ gpu_write(ptdev, MMU_INT_MASK, ~ptdev->mmu->as.faulty_mask); ++ } ++ ++ ret = panthor_mmu_as_enable(vm->ptdev, vm->as.id, transtab, transcfg, vm->memattr); ++ ++out_make_active: ++ if (!ret) { ++ refcount_set(&vm->as.active_cnt, 1); ++ list_del_init(&vm->as.lru_node); ++ } ++ ++out_unlock: ++ mutex_unlock(&ptdev->mmu->as.slots_lock); ++ ++out_dev_exit: ++ drm_dev_exit(cookie); ++ return ret; ++} ++ ++/** ++ * panthor_vm_idle() - Flag a VM idle ++ * @VM: VM to flag as idle. ++ * ++ * When we know the GPU is done with the VM (no more jobs to process), ++ * we can relinquish the AS slot attached to this VM, if any. ++ * ++ * We don't release the slot immediately, but instead place the VM in ++ * the LRU list, so it can be evicted if another VM needs an AS slot. ++ * This way, VMs keep attached to the AS they were given until we run ++ * out of free slot, limiting the number of MMU operations (TLB flush ++ * and other AS updates). ++ */ ++void panthor_vm_idle(struct panthor_vm *vm) ++{ ++ struct panthor_device *ptdev = vm->ptdev; ++ ++ if (!refcount_dec_and_mutex_lock(&vm->as.active_cnt, &ptdev->mmu->as.slots_lock)) ++ return; ++ ++ if (!drm_WARN_ON(&ptdev->base, vm->as.id == -1 || !list_empty(&vm->as.lru_node))) ++ list_add_tail(&vm->as.lru_node, &ptdev->mmu->as.lru_list); ++ ++ refcount_set(&vm->as.active_cnt, 0); ++ mutex_unlock(&ptdev->mmu->as.slots_lock); ++} ++ ++static void panthor_vm_stop(struct panthor_vm *vm) ++{ ++ drm_sched_stop(&vm->sched, NULL); ++} ++ ++static void panthor_vm_start(struct panthor_vm *vm) ++{ ++ drm_sched_start(&vm->sched, true); ++} ++ ++/** ++ * panthor_vm_as() - Get the AS slot attached to a VM ++ * @vm: VM to get the AS slot of. ++ * ++ * Return: -1 if the VM is not assigned an AS slot yet, >= 0 otherwise. ++ */ ++int panthor_vm_as(struct panthor_vm *vm) ++{ ++ return vm->as.id; ++} ++ ++static size_t get_pgsize(u64 addr, size_t size, size_t *count) ++{ ++ /* ++ * io-pgtable only operates on multiple pages within a single table ++ * entry, so we need to split at boundaries of the table size, i.e. ++ * the next block size up. The distance from address A to the next ++ * boundary of block size B is logically B - A % B, but in unsigned ++ * two's complement where B is a power of two we get the equivalence ++ * B - A % B == (B - A) % B == (n * B - A) % B, and choose n = 0 :) ++ */ ++ size_t blk_offset = -addr % SZ_2M; ++ ++ if (blk_offset || size < SZ_2M) { ++ *count = min_not_zero(blk_offset, size) / SZ_4K; ++ return SZ_4K; ++ } ++ blk_offset = -addr % SZ_1G ?: SZ_1G; ++ *count = min(blk_offset, size) / SZ_2M; ++ return SZ_2M; ++} ++ ++static int panthor_vm_flush_range(struct panthor_vm *vm, u64 iova, u64 size) ++{ ++ struct panthor_device *ptdev = vm->ptdev; ++ int ret = 0, cookie; ++ ++ if (vm->as.id < 0) ++ return 0; ++ ++ /* If the device is unplugged, we just silently skip the flush. */ ++ if (!drm_dev_enter(&ptdev->base, &cookie)) ++ return 0; ++ ++ /* Flush the PTs only if we're already awake */ ++ if (pm_runtime_active(ptdev->base.dev)) ++ ret = mmu_hw_do_operation(vm, iova, size, AS_COMMAND_FLUSH_PT); ++ ++ drm_dev_exit(cookie); ++ return ret; ++} ++ ++static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size) ++{ ++ struct panthor_device *ptdev = vm->ptdev; ++ struct io_pgtable_ops *ops = vm->pgtbl_ops; ++ u64 offset = 0; ++ ++ drm_dbg(&ptdev->base, "unmap: as=%d, iova=%llx, len=%llx", vm->as.id, iova, size); ++ ++ while (offset < size) { ++ size_t unmapped_sz = 0, pgcount; ++ size_t pgsize = get_pgsize(iova + offset, size - offset, &pgcount); ++ ++ unmapped_sz = ops->unmap_pages(ops, iova + offset, pgsize, pgcount, NULL); ++ ++ if (drm_WARN_ON(&ptdev->base, unmapped_sz != pgsize * pgcount)) { ++ drm_err(&ptdev->base, "failed to unmap range %llx-%llx (requested range %llx-%llx)\n", ++ iova + offset + unmapped_sz, ++ iova + offset + pgsize * pgcount, ++ iova, iova + size); ++ panthor_vm_flush_range(vm, iova, offset + unmapped_sz); ++ return -EINVAL; ++ } ++ offset += unmapped_sz; ++ } ++ ++ return panthor_vm_flush_range(vm, iova, size); ++} ++ ++static int ++panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot, ++ struct sg_table *sgt, u64 offset, u64 size) ++{ ++ struct panthor_device *ptdev = vm->ptdev; ++ unsigned int count; ++ struct scatterlist *sgl; ++ struct io_pgtable_ops *ops = vm->pgtbl_ops; ++ u64 start_iova = iova; ++ int ret; ++ ++ if (!size) ++ return 0; ++ ++ for_each_sgtable_dma_sg(sgt, sgl, count) { ++ dma_addr_t paddr = sg_dma_address(sgl); ++ size_t len = sg_dma_len(sgl); ++ ++ if (len <= offset) { ++ offset -= len; ++ continue; ++ } ++ ++ paddr += offset; ++ len -= offset; ++ len = min_t(size_t, len, size); ++ size -= len; ++ ++ drm_dbg(&ptdev->base, "map: as=%d, iova=%llx, paddr=%pad, len=%zx", ++ vm->as.id, iova, &paddr, len); ++ ++ while (len) { ++ size_t pgcount, mapped = 0; ++ size_t pgsize = get_pgsize(iova | paddr, len, &pgcount); ++ ++ ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, ++ GFP_KERNEL, &mapped); ++ iova += mapped; ++ paddr += mapped; ++ len -= mapped; ++ ++ if (drm_WARN_ON(&ptdev->base, !ret && !mapped)) ++ ret = -ENOMEM; ++ ++ if (ret) { ++ /* If something failed, unmap what we've already mapped before ++ * returning. The unmap call is not supposed to fail. ++ */ ++ drm_WARN_ON(&ptdev->base, ++ panthor_vm_unmap_pages(vm, start_iova, ++ iova - start_iova)); ++ return ret; ++ } ++ } ++ ++ if (!size) ++ break; ++ } ++ ++ return panthor_vm_flush_range(vm, start_iova, iova - start_iova); ++} ++ ++static int flags_to_prot(u32 flags) ++{ ++ int prot = 0; ++ ++ if (flags & DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC) ++ prot |= IOMMU_NOEXEC; ++ ++ if (!(flags & DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED)) ++ prot |= IOMMU_CACHE; ++ ++ if (flags & DRM_PANTHOR_VM_BIND_OP_MAP_READONLY) ++ prot |= IOMMU_READ; ++ else ++ prot |= IOMMU_READ | IOMMU_WRITE; ++ ++ return prot; ++} ++ ++/** ++ * panthor_vm_alloc_va() - Allocate a region in the auto-va space ++ * @VM: VM to allocate a region on. ++ * @va: start of the VA range. Can be PANTHOR_VM_KERNEL_AUTO_VA if the user ++ * wants the VA to be automatically allocated from the auto-VA range. ++ * @size: size of the VA range. ++ * @va_node: drm_mm_node to initialize. Must be zero-initialized. ++ * ++ * Some GPU objects, like heap chunks, are fully managed by the kernel and ++ * need to be mapped to the userspace VM, in the region reserved for kernel ++ * objects. ++ * ++ * This function takes care of allocating a region in the kernel auto-VA space. ++ * ++ * Return: 0 on success, an error code otherwise. ++ */ ++int ++panthor_vm_alloc_va(struct panthor_vm *vm, u64 va, u64 size, ++ struct drm_mm_node *va_node) ++{ ++ int ret; ++ ++ if (!size || (size & ~PAGE_MASK)) ++ return -EINVAL; ++ ++ if (va != PANTHOR_VM_KERNEL_AUTO_VA && (va & ~PAGE_MASK)) ++ return -EINVAL; ++ ++ mutex_lock(&vm->mm_lock); ++ if (va != PANTHOR_VM_KERNEL_AUTO_VA) { ++ va_node->start = va; ++ va_node->size = size; ++ ret = drm_mm_reserve_node(&vm->mm, va_node); ++ } else { ++ ret = drm_mm_insert_node_in_range(&vm->mm, va_node, size, ++ size >= SZ_2M ? SZ_2M : SZ_4K, ++ 0, vm->kernel_auto_va.start, ++ vm->kernel_auto_va.end, ++ DRM_MM_INSERT_BEST); ++ } ++ mutex_unlock(&vm->mm_lock); ++ ++ return ret; ++} ++ ++/** ++ * panthor_vm_free_va() - Free a region allocated with panthor_vm_alloc_va() ++ * @VM: VM to free the region on. ++ * @va_node: Memory node representing the region to free. ++ */ ++void panthor_vm_free_va(struct panthor_vm *vm, struct drm_mm_node *va_node) ++{ ++ mutex_lock(&vm->mm_lock); ++ drm_mm_remove_node(va_node); ++ mutex_unlock(&vm->mm_lock); ++} ++ ++static void panthor_vm_bo_put(struct drm_gpuvm_bo *vm_bo) ++{ ++ struct panthor_gem_object *bo = to_panthor_bo(vm_bo->obj); ++ struct drm_gpuvm *vm = vm_bo->vm; ++ bool unpin; ++ ++ /* We must retain the GEM before calling drm_gpuvm_bo_put(), ++ * otherwise the mutex might be destroyed while we hold it. ++ * Same goes for the VM, since we take the VM resv lock. ++ */ ++ drm_gem_object_get(&bo->base.base); ++ drm_gpuvm_get(vm); ++ ++ /* We take the resv lock to protect against concurrent accesses to the ++ * gpuvm evicted/extobj lists that are modified in ++ * drm_gpuvm_bo_destroy(), which is called if drm_gpuvm_bo_put() ++ * releases sthe last vm_bo reference. ++ * We take the BO GPUVA list lock to protect the vm_bo removal from the ++ * GEM vm_bo list. ++ */ ++ dma_resv_lock(drm_gpuvm_resv(vm), NULL); ++ mutex_lock(&bo->gpuva_list_lock); ++ unpin = drm_gpuvm_bo_put(vm_bo); ++ mutex_unlock(&bo->gpuva_list_lock); ++ dma_resv_unlock(drm_gpuvm_resv(vm)); ++ ++ /* If the vm_bo object was destroyed, release the pin reference that ++ * was hold by this object. ++ */ ++ if (unpin && !bo->base.base.import_attach) ++ drm_gem_shmem_unpin(&bo->base); ++ ++ drm_gpuvm_put(vm); ++ drm_gem_object_put(&bo->base.base); ++} ++ ++static void panthor_vm_cleanup_op_ctx(struct panthor_vm_op_ctx *op_ctx, ++ struct panthor_vm *vm) ++{ ++ struct panthor_vma *vma, *tmp_vma; ++ ++ u32 remaining_pt_count = op_ctx->rsvd_page_tables.count - ++ op_ctx->rsvd_page_tables.ptr; ++ ++ if (remaining_pt_count) { ++ kmem_cache_free_bulk(pt_cache, remaining_pt_count, ++ op_ctx->rsvd_page_tables.pages + ++ op_ctx->rsvd_page_tables.ptr); ++ } ++ ++ kfree(op_ctx->rsvd_page_tables.pages); ++ ++ if (op_ctx->map.vm_bo) ++ panthor_vm_bo_put(op_ctx->map.vm_bo); ++ ++ for (u32 i = 0; i < ARRAY_SIZE(op_ctx->preallocated_vmas); i++) ++ kfree(op_ctx->preallocated_vmas[i]); ++ ++ list_for_each_entry_safe(vma, tmp_vma, &op_ctx->returned_vmas, node) { ++ list_del(&vma->node); ++ panthor_vm_bo_put(vma->base.vm_bo); ++ kfree(vma); ++ } ++} ++ ++static struct panthor_vma * ++panthor_vm_op_ctx_get_vma(struct panthor_vm_op_ctx *op_ctx) ++{ ++ for (u32 i = 0; i < ARRAY_SIZE(op_ctx->preallocated_vmas); i++) { ++ struct panthor_vma *vma = op_ctx->preallocated_vmas[i]; ++ ++ if (vma) { ++ op_ctx->preallocated_vmas[i] = NULL; ++ return vma; ++ } ++ } ++ ++ return NULL; ++} ++ ++static int ++panthor_vm_op_ctx_prealloc_vmas(struct panthor_vm_op_ctx *op_ctx) ++{ ++ u32 vma_count; ++ ++ switch (op_ctx->flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) { ++ case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP: ++ /* One VMA for the new mapping, and two more VMAs for the remap case ++ * which might contain both a prev and next VA. ++ */ ++ vma_count = 3; ++ break; ++ ++ case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP: ++ /* Partial unmaps might trigger a remap with either a prev or a next VA, ++ * but not both. ++ */ ++ vma_count = 1; ++ break; ++ ++ default: ++ return 0; ++ } ++ ++ for (u32 i = 0; i < vma_count; i++) { ++ struct panthor_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL); ++ ++ if (!vma) ++ return -ENOMEM; ++ ++ op_ctx->preallocated_vmas[i] = vma; ++ } ++ ++ return 0; ++} ++ ++#define PANTHOR_VM_BIND_OP_MAP_FLAGS \ ++ (DRM_PANTHOR_VM_BIND_OP_MAP_READONLY | \ ++ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | \ ++ DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED | \ ++ DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) ++ ++static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx, ++ struct panthor_vm *vm, ++ struct panthor_gem_object *bo, ++ u64 offset, ++ u64 size, u64 va, ++ u32 flags) ++{ ++ struct drm_gpuvm_bo *preallocated_vm_bo; ++ struct sg_table *sgt = NULL; ++ u64 pt_count; ++ int ret; ++ ++ if (!bo) ++ return -EINVAL; ++ ++ if ((flags & ~PANTHOR_VM_BIND_OP_MAP_FLAGS) || ++ (flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) != DRM_PANTHOR_VM_BIND_OP_TYPE_MAP) ++ return -EINVAL; ++ ++ /* Make sure the VA and size are aligned and in-bounds. */ ++ if (size > bo->base.base.size || offset > bo->base.base.size - size) ++ return -EINVAL; ++ ++ /* If the BO has an exclusive VM attached, it can't be mapped to other VMs. */ ++ if (bo->exclusive_vm_root_gem && ++ bo->exclusive_vm_root_gem != panthor_vm_root_gem(vm)) ++ return -EINVAL; ++ ++ memset(op_ctx, 0, sizeof(*op_ctx)); ++ INIT_LIST_HEAD(&op_ctx->returned_vmas); ++ op_ctx->flags = flags; ++ op_ctx->va.range = size; ++ op_ctx->va.addr = va; ++ ++ ret = panthor_vm_op_ctx_prealloc_vmas(op_ctx); ++ if (ret) ++ goto err_cleanup; ++ ++ if (!bo->base.base.import_attach) { ++ /* Pre-reserve the BO pages, so the map operation doesn't have to ++ * allocate. ++ */ ++ ret = drm_gem_shmem_pin(&bo->base); ++ if (ret) ++ goto err_cleanup; ++ } ++ ++ sgt = drm_gem_shmem_get_pages_sgt(&bo->base); ++ if (IS_ERR(sgt)) { ++ if (!bo->base.base.import_attach) ++ drm_gem_shmem_unpin(&bo->base); ++ ++ ret = PTR_ERR(sgt); ++ goto err_cleanup; ++ } ++ ++ op_ctx->map.sgt = sgt; ++ ++ preallocated_vm_bo = drm_gpuvm_bo_create(&vm->base, &bo->base.base); ++ if (!preallocated_vm_bo) { ++ if (!bo->base.base.import_attach) ++ drm_gem_shmem_unpin(&bo->base); ++ ++ ret = -ENOMEM; ++ goto err_cleanup; ++ } ++ ++ mutex_lock(&bo->gpuva_list_lock); ++ op_ctx->map.vm_bo = drm_gpuvm_bo_obtain_prealloc(preallocated_vm_bo); ++ mutex_unlock(&bo->gpuva_list_lock); ++ ++ /* If the a vm_bo for this combination exists, it already ++ * retains a pin ref, and we can release the one we took earlier. ++ * ++ * If our pre-allocated vm_bo is picked, it now retains the pin ref, ++ * which will be released in panthor_vm_bo_put(). ++ */ ++ if (preallocated_vm_bo != op_ctx->map.vm_bo && ++ !bo->base.base.import_attach) ++ drm_gem_shmem_unpin(&bo->base); ++ ++ op_ctx->map.bo_offset = offset; ++ ++ /* L1, L2 and L3 page tables. ++ * We could optimize L3 allocation by iterating over the sgt and merging ++ * 2M contiguous blocks, but it's simpler to over-provision and return ++ * the pages if they're not used. ++ */ ++ pt_count = ((ALIGN(va + size, 1ull << 39) - ALIGN_DOWN(va, 1ull << 39)) >> 39) + ++ ((ALIGN(va + size, 1ull << 30) - ALIGN_DOWN(va, 1ull << 30)) >> 30) + ++ ((ALIGN(va + size, 1ull << 21) - ALIGN_DOWN(va, 1ull << 21)) >> 21); ++ ++ op_ctx->rsvd_page_tables.pages = kcalloc(pt_count, ++ sizeof(*op_ctx->rsvd_page_tables.pages), ++ GFP_KERNEL); ++ if (!op_ctx->rsvd_page_tables.pages) ++ goto err_cleanup; ++ ++ ret = kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, pt_count, ++ op_ctx->rsvd_page_tables.pages); ++ op_ctx->rsvd_page_tables.count = ret; ++ if (ret != pt_count) { ++ ret = -ENOMEM; ++ goto err_cleanup; ++ } ++ ++ /* Insert BO into the extobj list last, when we know nothing can fail. */ ++ dma_resv_lock(panthor_vm_resv(vm), NULL); ++ drm_gpuvm_bo_extobj_add(op_ctx->map.vm_bo); ++ dma_resv_unlock(panthor_vm_resv(vm)); ++ ++ return 0; ++ ++err_cleanup: ++ panthor_vm_cleanup_op_ctx(op_ctx, vm); ++ return ret; ++} ++ ++static int panthor_vm_prepare_unmap_op_ctx(struct panthor_vm_op_ctx *op_ctx, ++ struct panthor_vm *vm, ++ u64 va, u64 size) ++{ ++ u32 pt_count = 0; ++ int ret; ++ ++ memset(op_ctx, 0, sizeof(*op_ctx)); ++ INIT_LIST_HEAD(&op_ctx->returned_vmas); ++ op_ctx->va.range = size; ++ op_ctx->va.addr = va; ++ op_ctx->flags = DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP; ++ ++ /* Pre-allocate L3 page tables to account for the split-2M-block ++ * situation on unmap. ++ */ ++ if (va != ALIGN(va, SZ_2M)) ++ pt_count++; ++ ++ if (va + size != ALIGN(va + size, SZ_2M) && ++ ALIGN(va + size, SZ_2M) != ALIGN(va, SZ_2M)) ++ pt_count++; ++ ++ ret = panthor_vm_op_ctx_prealloc_vmas(op_ctx); ++ if (ret) ++ goto err_cleanup; ++ ++ if (pt_count) { ++ op_ctx->rsvd_page_tables.pages = kcalloc(pt_count, ++ sizeof(*op_ctx->rsvd_page_tables.pages), ++ GFP_KERNEL); ++ if (!op_ctx->rsvd_page_tables.pages) ++ goto err_cleanup; ++ ++ ret = kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, pt_count, ++ op_ctx->rsvd_page_tables.pages); ++ if (ret != pt_count) { ++ ret = -ENOMEM; ++ goto err_cleanup; ++ } ++ op_ctx->rsvd_page_tables.count = pt_count; ++ } ++ ++ return 0; ++ ++err_cleanup: ++ panthor_vm_cleanup_op_ctx(op_ctx, vm); ++ return ret; ++} ++ ++static void panthor_vm_prepare_sync_only_op_ctx(struct panthor_vm_op_ctx *op_ctx, ++ struct panthor_vm *vm) ++{ ++ memset(op_ctx, 0, sizeof(*op_ctx)); ++ INIT_LIST_HEAD(&op_ctx->returned_vmas); ++ op_ctx->flags = DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY; ++} ++ ++/** ++ * panthor_vm_get_bo_for_va() - Get the GEM object mapped at a virtual address ++ * @vm: VM to look into. ++ * @va: Virtual address to search for. ++ * @bo_offset: Offset of the GEM object mapped at this virtual address. ++ * Only valid on success. ++ * ++ * The object returned by this function might no longer be mapped when the ++ * function returns. It's the caller responsibility to ensure there's no ++ * concurrent map/unmap operations making the returned value invalid, or ++ * make sure it doesn't matter if the object is no longer mapped. ++ * ++ * Return: A valid pointer on success, an ERR_PTR() otherwise. ++ */ ++struct panthor_gem_object * ++panthor_vm_get_bo_for_va(struct panthor_vm *vm, u64 va, u64 *bo_offset) ++{ ++ struct panthor_gem_object *bo = ERR_PTR(-ENOENT); ++ struct drm_gpuva *gpuva; ++ struct panthor_vma *vma; ++ ++ /* Take the VM lock to prevent concurrent map/unmap operations. */ ++ mutex_lock(&vm->op_lock); ++ gpuva = drm_gpuva_find_first(&vm->base, va, 1); ++ vma = gpuva ? container_of(gpuva, struct panthor_vma, base) : NULL; ++ if (vma && vma->base.gem.obj) { ++ drm_gem_object_get(vma->base.gem.obj); ++ bo = to_panthor_bo(vma->base.gem.obj); ++ *bo_offset = vma->base.gem.offset + (va - vma->base.va.addr); ++ } ++ mutex_unlock(&vm->op_lock); ++ ++ return bo; ++} ++ ++#define PANTHOR_VM_MIN_KERNEL_VA_SIZE SZ_256M ++ ++static u64 ++panthor_vm_create_get_user_va_range(const struct drm_panthor_vm_create *args, ++ u64 full_va_range) ++{ ++ u64 user_va_range; ++ ++ /* Make sure we have a minimum amount of VA space for kernel objects. */ ++ if (full_va_range < PANTHOR_VM_MIN_KERNEL_VA_SIZE) ++ return 0; ++ ++ if (args->user_va_range) { ++ /* Use the user provided value if != 0. */ ++ user_va_range = args->user_va_range; ++ } else if (TASK_SIZE_OF(current) < full_va_range) { ++ /* If the task VM size is smaller than the GPU VA range, pick this ++ * as our default user VA range, so userspace can CPU/GPU map buffers ++ * at the same address. ++ */ ++ user_va_range = TASK_SIZE_OF(current); ++ } else { ++ /* If the GPU VA range is smaller than the task VM size, we ++ * just have to live with the fact we won't be able to map ++ * all buffers at the same GPU/CPU address. ++ * ++ * If the GPU VA range is bigger than 4G (more than 32-bit of ++ * VA), we split the range in two, and assign half of it to ++ * the user and the other half to the kernel, if it's not, we ++ * keep the kernel VA space as small as possible. ++ */ ++ user_va_range = full_va_range > SZ_4G ? ++ full_va_range / 2 : ++ full_va_range - PANTHOR_VM_MIN_KERNEL_VA_SIZE; ++ } ++ ++ if (full_va_range - PANTHOR_VM_MIN_KERNEL_VA_SIZE < user_va_range) ++ user_va_range = full_va_range - PANTHOR_VM_MIN_KERNEL_VA_SIZE; ++ ++ return user_va_range; ++} ++ ++#define PANTHOR_VM_CREATE_FLAGS 0 ++ ++static int ++panthor_vm_create_check_args(const struct panthor_device *ptdev, ++ const struct drm_panthor_vm_create *args, ++ u64 *kernel_va_start, u64 *kernel_va_range) ++{ ++ u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features); ++ u64 full_va_range = 1ull << va_bits; ++ u64 user_va_range; ++ ++ if (args->flags & ~PANTHOR_VM_CREATE_FLAGS) ++ return -EINVAL; ++ ++ user_va_range = panthor_vm_create_get_user_va_range(args, full_va_range); ++ if (!user_va_range || (args->user_va_range && args->user_va_range > user_va_range)) ++ return -EINVAL; ++ ++ /* Pick a kernel VA range that's a power of two, to have a clear split. */ ++ *kernel_va_range = rounddown_pow_of_two(full_va_range - user_va_range); ++ *kernel_va_start = full_va_range - *kernel_va_range; ++ return 0; ++} ++ ++/* ++ * Only 32 VMs per open file. If that becomes a limiting factor, we can ++ * increase this number. ++ */ ++#define PANTHOR_MAX_VMS_PER_FILE 32 ++ ++/** ++ * panthor_vm_pool_create_vm() - Create a VM ++ * @pool: The VM to create this VM on. ++ * @kernel_va_start: Start of the region reserved for kernel objects. ++ * @kernel_va_range: Size of the region reserved for kernel objects. ++ * ++ * Return: a positive VM ID on success, a negative error code otherwise. ++ */ ++int panthor_vm_pool_create_vm(struct panthor_device *ptdev, ++ struct panthor_vm_pool *pool, ++ struct drm_panthor_vm_create *args) ++{ ++ u64 kernel_va_start, kernel_va_range; ++ struct panthor_vm *vm; ++ int ret; ++ u32 id; ++ ++ ret = panthor_vm_create_check_args(ptdev, args, &kernel_va_start, &kernel_va_range); ++ if (ret) ++ return ret; ++ ++ vm = panthor_vm_create(ptdev, false, kernel_va_start, kernel_va_range, ++ kernel_va_start, kernel_va_range); ++ if (IS_ERR(vm)) ++ return PTR_ERR(vm); ++ ++ ret = xa_alloc(&pool->xa, &id, vm, ++ XA_LIMIT(1, PANTHOR_MAX_VMS_PER_FILE), GFP_KERNEL); ++ ++ if (ret) { ++ panthor_vm_put(vm); ++ return ret; ++ } ++ ++ args->user_va_range = kernel_va_start; ++ return id; ++} ++ ++static void panthor_vm_destroy(struct panthor_vm *vm) ++{ ++ if (!vm) ++ return; ++ ++ vm->destroyed = true; ++ ++ mutex_lock(&vm->heaps.lock); ++ panthor_heap_pool_destroy(vm->heaps.pool); ++ vm->heaps.pool = NULL; ++ mutex_unlock(&vm->heaps.lock); ++ ++ drm_WARN_ON(&vm->ptdev->base, ++ panthor_vm_unmap_range(vm, vm->base.mm_start, vm->base.mm_range)); ++ panthor_vm_put(vm); ++} ++ ++/** ++ * panthor_vm_pool_destroy_vm() - Destroy a VM. ++ * @pool: VM pool. ++ * @handle: VM handle. ++ * ++ * This function doesn't free the VM object or its resources, it just kills ++ * all mappings, and makes sure nothing can be mapped after that point. ++ * ++ * If there was any active jobs at the time this function is called, these ++ * jobs should experience page faults and be killed as a result. ++ * ++ * The VM resources are freed when the last reference on the VM object is ++ * dropped. ++ */ ++int panthor_vm_pool_destroy_vm(struct panthor_vm_pool *pool, u32 handle) ++{ ++ struct panthor_vm *vm; ++ ++ vm = xa_erase(&pool->xa, handle); ++ ++ panthor_vm_destroy(vm); ++ ++ return vm ? 0 : -EINVAL; ++} ++ ++/** ++ * panthor_vm_pool_get_vm() - Retrieve VM object bound to a VM handle ++ * @pool: VM pool to check. ++ * @handle: Handle of the VM to retrieve. ++ * ++ * Return: A valid pointer if the VM exists, NULL otherwise. ++ */ ++struct panthor_vm * ++panthor_vm_pool_get_vm(struct panthor_vm_pool *pool, u32 handle) ++{ ++ struct panthor_vm *vm; ++ ++ vm = panthor_vm_get(xa_load(&pool->xa, handle)); ++ ++ return vm; ++} ++ ++/** ++ * panthor_vm_pool_destroy() - Destroy a VM pool. ++ * @pfile: File. ++ * ++ * Destroy all VMs in the pool, and release the pool resources. ++ * ++ * Note that VMs can outlive the pool they were created from if other ++ * objects hold a reference to there VMs. ++ */ ++void panthor_vm_pool_destroy(struct panthor_file *pfile) ++{ ++ struct panthor_vm *vm; ++ unsigned long i; ++ ++ if (!pfile->vms) ++ return; ++ ++ xa_for_each(&pfile->vms->xa, i, vm) ++ panthor_vm_destroy(vm); ++ ++ xa_destroy(&pfile->vms->xa); ++ kfree(pfile->vms); ++} ++ ++/** ++ * panthor_vm_pool_create() - Create a VM pool ++ * @pfile: File. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_vm_pool_create(struct panthor_file *pfile) ++{ ++ pfile->vms = kzalloc(sizeof(*pfile->vms), GFP_KERNEL); ++ if (!pfile->vms) ++ return -ENOMEM; ++ ++ xa_init_flags(&pfile->vms->xa, XA_FLAGS_ALLOC1); ++ return 0; ++} ++ ++/* dummy TLB ops, the real TLB flush happens in panthor_vm_flush_range() */ ++static void mmu_tlb_flush_all(void *cookie) ++{ ++} ++ ++static void mmu_tlb_flush_walk(unsigned long iova, size_t size, size_t granule, void *cookie) ++{ ++} ++ ++static const struct iommu_flush_ops mmu_tlb_ops = { ++ .tlb_flush_all = mmu_tlb_flush_all, ++ .tlb_flush_walk = mmu_tlb_flush_walk, ++}; ++ ++static const char *access_type_name(struct panthor_device *ptdev, ++ u32 fault_status) ++{ ++ switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) { ++ case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC: ++ return "ATOMIC"; ++ case AS_FAULTSTATUS_ACCESS_TYPE_READ: ++ return "READ"; ++ case AS_FAULTSTATUS_ACCESS_TYPE_WRITE: ++ return "WRITE"; ++ case AS_FAULTSTATUS_ACCESS_TYPE_EX: ++ return "EXECUTE"; ++ default: ++ drm_WARN_ON(&ptdev->base, 1); ++ return NULL; ++ } ++} ++ ++static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status) ++{ ++ bool has_unhandled_faults = false; ++ ++ status = panthor_mmu_fault_mask(ptdev, status); ++ while (status) { ++ u32 as = ffs(status | (status >> 16)) - 1; ++ u32 mask = panthor_mmu_as_fault_mask(ptdev, as); ++ u32 new_int_mask; ++ u64 addr; ++ u32 fault_status; ++ u32 exception_type; ++ u32 access_type; ++ u32 source_id; ++ ++ fault_status = gpu_read(ptdev, AS_FAULTSTATUS(as)); ++ addr = gpu_read(ptdev, AS_FAULTADDRESS_LO(as)); ++ addr |= (u64)gpu_read(ptdev, AS_FAULTADDRESS_HI(as)) << 32; ++ ++ /* decode the fault status */ ++ exception_type = fault_status & 0xFF; ++ access_type = (fault_status >> 8) & 0x3; ++ source_id = (fault_status >> 16); ++ ++ mutex_lock(&ptdev->mmu->as.slots_lock); ++ ++ ptdev->mmu->as.faulty_mask |= mask; ++ new_int_mask = ++ panthor_mmu_fault_mask(ptdev, ~ptdev->mmu->as.faulty_mask); ++ ++ /* terminal fault, print info about the fault */ ++ drm_err(&ptdev->base, ++ "Unhandled Page fault in AS%d at VA 0x%016llX\n" ++ "raw fault status: 0x%X\n" ++ "decoded fault status: %s\n" ++ "exception type 0x%X: %s\n" ++ "access type 0x%X: %s\n" ++ "source id 0x%X\n", ++ as, addr, ++ fault_status, ++ (fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"), ++ exception_type, panthor_exception_name(ptdev, exception_type), ++ access_type, access_type_name(ptdev, fault_status), ++ source_id); ++ ++ /* Ignore MMU interrupts on this AS until it's been ++ * re-enabled. ++ */ ++ ptdev->mmu->irq.mask = new_int_mask; ++ gpu_write(ptdev, MMU_INT_MASK, new_int_mask); ++ ++ if (ptdev->mmu->as.slots[as].vm) ++ ptdev->mmu->as.slots[as].vm->unhandled_fault = true; ++ ++ /* Disable the MMU to kill jobs on this AS. */ ++ panthor_mmu_as_disable(ptdev, as); ++ mutex_unlock(&ptdev->mmu->as.slots_lock); ++ ++ status &= ~mask; ++ has_unhandled_faults = true; ++ } ++ ++ if (has_unhandled_faults) ++ panthor_sched_report_mmu_fault(ptdev); ++} ++PANTHOR_IRQ_HANDLER(mmu, MMU, panthor_mmu_irq_handler); ++ ++/** ++ * panthor_mmu_suspend() - Suspend the MMU logic ++ * @ptdev: Device. ++ * ++ * All we do here is de-assign the AS slots on all active VMs, so things ++ * get flushed to the main memory, and no further access to these VMs are ++ * possible. ++ * ++ * We also suspend the MMU IRQ. ++ */ ++void panthor_mmu_suspend(struct panthor_device *ptdev) ++{ ++ mutex_lock(&ptdev->mmu->as.slots_lock); ++ for (u32 i = 0; i < ARRAY_SIZE(ptdev->mmu->as.slots); i++) { ++ struct panthor_vm *vm = ptdev->mmu->as.slots[i].vm; ++ ++ if (vm) { ++ drm_WARN_ON(&ptdev->base, panthor_mmu_as_disable(ptdev, i)); ++ panthor_vm_release_as_locked(vm); ++ } ++ } ++ mutex_unlock(&ptdev->mmu->as.slots_lock); ++ ++ panthor_mmu_irq_suspend(&ptdev->mmu->irq); ++} ++ ++/** ++ * panthor_mmu_resume() - Resume the MMU logic ++ * @ptdev: Device. ++ * ++ * Resume the IRQ. ++ * ++ * We don't re-enable previously active VMs. We assume other parts of the ++ * driver will call panthor_vm_active() on the VMs they intend to use. ++ */ ++void panthor_mmu_resume(struct panthor_device *ptdev) ++{ ++ mutex_lock(&ptdev->mmu->as.slots_lock); ++ ptdev->mmu->as.alloc_mask = 0; ++ ptdev->mmu->as.faulty_mask = 0; ++ mutex_unlock(&ptdev->mmu->as.slots_lock); ++ ++ panthor_mmu_irq_resume(&ptdev->mmu->irq, panthor_mmu_fault_mask(ptdev, ~0)); ++} ++ ++/** ++ * panthor_mmu_pre_reset() - Prepare for a reset ++ * @ptdev: Device. ++ * ++ * Suspend the IRQ, and make sure all VM_BIND queues are stopped, so we ++ * don't get asked to do a VM operation while the GPU is down. ++ * ++ * We don't cleanly shutdown the AS slots here, because the reset might ++ * come from an AS_ACTIVE_BIT stuck situation. ++ */ ++void panthor_mmu_pre_reset(struct panthor_device *ptdev) ++{ ++ struct panthor_vm *vm; ++ ++ panthor_mmu_irq_suspend(&ptdev->mmu->irq); ++ ++ mutex_lock(&ptdev->mmu->vm.lock); ++ ptdev->mmu->vm.reset_in_progress = true; ++ list_for_each_entry(vm, &ptdev->mmu->vm.list, node) ++ panthor_vm_stop(vm); ++ mutex_unlock(&ptdev->mmu->vm.lock); ++} ++ ++/** ++ * panthor_mmu_post_reset() - Restore things after a reset ++ * @ptdev: Device. ++ * ++ * Put the MMU logic back in action after a reset. That implies resuming the ++ * IRQ and re-enabling the VM_BIND queues. ++ */ ++void panthor_mmu_post_reset(struct panthor_device *ptdev) ++{ ++ struct panthor_vm *vm; ++ ++ mutex_lock(&ptdev->mmu->as.slots_lock); ++ ++ /* Now that the reset is effective, we can assume that none of the ++ * AS slots are setup, and clear the faulty flags too. ++ */ ++ ptdev->mmu->as.alloc_mask = 0; ++ ptdev->mmu->as.faulty_mask = 0; ++ ++ for (u32 i = 0; i < ARRAY_SIZE(ptdev->mmu->as.slots); i++) { ++ struct panthor_vm *vm = ptdev->mmu->as.slots[i].vm; ++ ++ if (vm) ++ panthor_vm_release_as_locked(vm); ++ } ++ ++ mutex_unlock(&ptdev->mmu->as.slots_lock); ++ ++ panthor_mmu_irq_resume(&ptdev->mmu->irq, panthor_mmu_fault_mask(ptdev, ~0)); ++ ++ /* Restart the VM_BIND queues. */ ++ mutex_lock(&ptdev->mmu->vm.lock); ++ list_for_each_entry(vm, &ptdev->mmu->vm.list, node) { ++ panthor_vm_start(vm); ++ } ++ ptdev->mmu->vm.reset_in_progress = false; ++ mutex_unlock(&ptdev->mmu->vm.lock); ++} ++ ++static void panthor_vm_free(struct drm_gpuvm *gpuvm) ++{ ++ struct panthor_vm *vm = container_of(gpuvm, struct panthor_vm, base); ++ struct panthor_device *ptdev = vm->ptdev; ++ ++ mutex_lock(&vm->heaps.lock); ++ if (drm_WARN_ON(&ptdev->base, vm->heaps.pool)) ++ panthor_heap_pool_destroy(vm->heaps.pool); ++ mutex_unlock(&vm->heaps.lock); ++ mutex_destroy(&vm->heaps.lock); ++ ++ mutex_lock(&ptdev->mmu->vm.lock); ++ list_del(&vm->node); ++ /* Restore the scheduler state so we can call drm_sched_entity_destroy() ++ * and drm_sched_fini(). If get there, that means we have no job left ++ * and no new jobs can be queued, so we can start the scheduler without ++ * risking interfering with the reset. ++ */ ++ if (ptdev->mmu->vm.reset_in_progress) ++ panthor_vm_start(vm); ++ mutex_unlock(&ptdev->mmu->vm.lock); ++ ++ drm_sched_entity_destroy(&vm->entity); ++ drm_sched_fini(&vm->sched); ++ ++ mutex_lock(&ptdev->mmu->as.slots_lock); ++ if (vm->as.id >= 0) { ++ int cookie; ++ ++ if (drm_dev_enter(&ptdev->base, &cookie)) { ++ panthor_mmu_as_disable(ptdev, vm->as.id); ++ drm_dev_exit(cookie); ++ } ++ ++ ptdev->mmu->as.slots[vm->as.id].vm = NULL; ++ clear_bit(vm->as.id, &ptdev->mmu->as.alloc_mask); ++ list_del(&vm->as.lru_node); ++ } ++ mutex_unlock(&ptdev->mmu->as.slots_lock); ++ ++ free_io_pgtable_ops(vm->pgtbl_ops); ++ ++ drm_mm_takedown(&vm->mm); ++ kfree(vm); ++} ++ ++/** ++ * panthor_vm_put() - Release a reference on a VM ++ * @vm: VM to release the reference on. Can be NULL. ++ */ ++void panthor_vm_put(struct panthor_vm *vm) ++{ ++ drm_gpuvm_put(vm ? &vm->base : NULL); ++} ++ ++/** ++ * panthor_vm_get() - Get a VM reference ++ * @vm: VM to get the reference on. Can be NULL. ++ * ++ * Return: @vm value. ++ */ ++struct panthor_vm *panthor_vm_get(struct panthor_vm *vm) ++{ ++ if (vm) ++ drm_gpuvm_get(&vm->base); ++ ++ return vm; ++} ++ ++/** ++ * panthor_vm_get_heap_pool() - Get the heap pool attached to a VM ++ * @vm: VM to query the heap pool on. ++ * @create: True if the heap pool should be created when it doesn't exist. ++ * ++ * Heap pools are per-VM. This function allows one to retrieve the heap pool ++ * attached to a VM. ++ * ++ * If no heap pool exists yet, and @create is true, we create one. ++ * ++ * The returned panthor_heap_pool should be released with panthor_heap_pool_put(). ++ * ++ * Return: A valid pointer on success, an ERR_PTR() otherwise. ++ */ ++struct panthor_heap_pool *panthor_vm_get_heap_pool(struct panthor_vm *vm, bool create) ++{ ++ struct panthor_heap_pool *pool; ++ ++ mutex_lock(&vm->heaps.lock); ++ if (!vm->heaps.pool && create) { ++ if (vm->destroyed) ++ pool = ERR_PTR(-EINVAL); ++ else ++ pool = panthor_heap_pool_create(vm->ptdev, vm); ++ ++ if (!IS_ERR(pool)) ++ vm->heaps.pool = panthor_heap_pool_get(pool); ++ } else { ++ pool = panthor_heap_pool_get(vm->heaps.pool); ++ } ++ mutex_unlock(&vm->heaps.lock); ++ ++ return pool; ++} ++ ++static u64 mair_to_memattr(u64 mair) ++{ ++ u64 memattr = 0; ++ u32 i; ++ ++ for (i = 0; i < 8; i++) { ++ u8 in_attr = mair >> (8 * i), out_attr; ++ u8 outer = in_attr >> 4, inner = in_attr & 0xf; ++ ++ /* For caching to be enabled, inner and outer caching policy ++ * have to be both write-back, if one of them is write-through ++ * or non-cacheable, we just choose non-cacheable. Device ++ * memory is also translated to non-cacheable. ++ */ ++ if (!(outer & 3) || !(outer & 4) || !(inner & 4)) { ++ out_attr = AS_MEMATTR_AARCH64_INNER_OUTER_NC | ++ AS_MEMATTR_AARCH64_SH_MIDGARD_INNER | ++ AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(false, false); ++ } else { ++ /* Use SH_CPU_INNER mode so SH_IS, which is used when ++ * IOMMU_CACHE is set, actually maps to the standard ++ * definition of inner-shareable and not Mali's ++ * internal-shareable mode. ++ */ ++ out_attr = AS_MEMATTR_AARCH64_INNER_OUTER_WB | ++ AS_MEMATTR_AARCH64_SH_CPU_INNER | ++ AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(inner & 1, inner & 2); ++ } ++ ++ memattr |= (u64)out_attr << (8 * i); ++ } ++ ++ return memattr; ++} ++ ++static void panthor_vma_link(struct panthor_vm *vm, ++ struct panthor_vma *vma, ++ struct drm_gpuvm_bo *vm_bo) ++{ ++ struct panthor_gem_object *bo = to_panthor_bo(vma->base.gem.obj); ++ ++ mutex_lock(&bo->gpuva_list_lock); ++ drm_gpuva_link(&vma->base, vm_bo); ++ drm_WARN_ON(&vm->ptdev->base, drm_gpuvm_bo_put(vm_bo)); ++ mutex_unlock(&bo->gpuva_list_lock); ++} ++ ++static void panthor_vma_unlink(struct panthor_vm *vm, ++ struct panthor_vma *vma) ++{ ++ struct panthor_gem_object *bo = to_panthor_bo(vma->base.gem.obj); ++ struct drm_gpuvm_bo *vm_bo = drm_gpuvm_bo_get(vma->base.vm_bo); ++ ++ mutex_lock(&bo->gpuva_list_lock); ++ drm_gpuva_unlink(&vma->base); ++ mutex_unlock(&bo->gpuva_list_lock); ++ ++ /* drm_gpuva_unlink() release the vm_bo, but we manually retained it ++ * when entering this function, so we can implement deferred VMA ++ * destruction. Re-assign it here. ++ */ ++ vma->base.vm_bo = vm_bo; ++ list_add_tail(&vma->node, &vm->op_ctx->returned_vmas); ++} ++ ++static void panthor_vma_init(struct panthor_vma *vma, u32 flags) ++{ ++ INIT_LIST_HEAD(&vma->node); ++ vma->flags = flags; ++} ++ ++#define PANTHOR_VM_MAP_FLAGS \ ++ (DRM_PANTHOR_VM_BIND_OP_MAP_READONLY | \ ++ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | \ ++ DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED) ++ ++static int panthor_gpuva_sm_step_map(struct drm_gpuva_op *op, void *priv) ++{ ++ struct panthor_vm *vm = priv; ++ struct panthor_vm_op_ctx *op_ctx = vm->op_ctx; ++ struct panthor_vma *vma = panthor_vm_op_ctx_get_vma(op_ctx); ++ int ret; ++ ++ if (!vma) ++ return -EINVAL; ++ ++ panthor_vma_init(vma, op_ctx->flags & PANTHOR_VM_MAP_FLAGS); ++ ++ ret = panthor_vm_map_pages(vm, op->map.va.addr, flags_to_prot(vma->flags), ++ op_ctx->map.sgt, op->map.gem.offset, ++ op->map.va.range); ++ if (ret) ++ return ret; ++ ++ /* Ref owned by the mapping now, clear the obj field so we don't release the ++ * pinning/obj ref behind GPUVA's back. ++ */ ++ drm_gpuva_map(&vm->base, &vma->base, &op->map); ++ panthor_vma_link(vm, vma, op_ctx->map.vm_bo); ++ op_ctx->map.vm_bo = NULL; ++ return 0; ++} ++ ++static int panthor_gpuva_sm_step_remap(struct drm_gpuva_op *op, ++ void *priv) ++{ ++ struct panthor_vma *unmap_vma = container_of(op->remap.unmap->va, struct panthor_vma, base); ++ struct panthor_vm *vm = priv; ++ struct panthor_vm_op_ctx *op_ctx = vm->op_ctx; ++ struct panthor_vma *prev_vma = NULL, *next_vma = NULL; ++ u64 unmap_start, unmap_range; ++ int ret; ++ ++ drm_gpuva_op_remap_to_unmap_range(&op->remap, &unmap_start, &unmap_range); ++ ret = panthor_vm_unmap_pages(vm, unmap_start, unmap_range); ++ if (ret) ++ return ret; ++ ++ if (op->remap.prev) { ++ prev_vma = panthor_vm_op_ctx_get_vma(op_ctx); ++ panthor_vma_init(prev_vma, unmap_vma->flags); ++ } ++ ++ if (op->remap.next) { ++ next_vma = panthor_vm_op_ctx_get_vma(op_ctx); ++ panthor_vma_init(next_vma, unmap_vma->flags); ++ } ++ ++ drm_gpuva_remap(prev_vma ? &prev_vma->base : NULL, ++ next_vma ? &next_vma->base : NULL, ++ &op->remap); ++ ++ if (prev_vma) { ++ /* panthor_vma_link() transfers the vm_bo ownership to ++ * the VMA object. Since the vm_bo we're passing is still ++ * owned by the old mapping which will be released when this ++ * mapping is destroyed, we need to grab a ref here. ++ */ ++ panthor_vma_link(vm, prev_vma, ++ drm_gpuvm_bo_get(op->remap.unmap->va->vm_bo)); ++ } ++ ++ if (next_vma) { ++ panthor_vma_link(vm, next_vma, ++ drm_gpuvm_bo_get(op->remap.unmap->va->vm_bo)); ++ } ++ ++ panthor_vma_unlink(vm, unmap_vma); ++ return 0; ++} ++ ++static int panthor_gpuva_sm_step_unmap(struct drm_gpuva_op *op, ++ void *priv) ++{ ++ struct panthor_vma *unmap_vma = container_of(op->unmap.va, struct panthor_vma, base); ++ struct panthor_vm *vm = priv; ++ int ret; ++ ++ ret = panthor_vm_unmap_pages(vm, unmap_vma->base.va.addr, ++ unmap_vma->base.va.range); ++ if (drm_WARN_ON(&vm->ptdev->base, ret)) ++ return ret; ++ ++ drm_gpuva_unmap(&op->unmap); ++ panthor_vma_unlink(vm, unmap_vma); ++ return 0; ++} ++ ++static const struct drm_gpuvm_ops panthor_gpuvm_ops = { ++ .vm_free = panthor_vm_free, ++ .sm_step_map = panthor_gpuva_sm_step_map, ++ .sm_step_remap = panthor_gpuva_sm_step_remap, ++ .sm_step_unmap = panthor_gpuva_sm_step_unmap, ++}; ++ ++/** ++ * panthor_vm_resv() - Get the dma_resv object attached to a VM. ++ * @vm: VM to get the dma_resv of. ++ * ++ * Return: A dma_resv object. ++ */ ++struct dma_resv *panthor_vm_resv(struct panthor_vm *vm) ++{ ++ return drm_gpuvm_resv(&vm->base); ++} ++ ++struct drm_gem_object *panthor_vm_root_gem(struct panthor_vm *vm) ++{ ++ if (!vm) ++ return NULL; ++ ++ return vm->base.r_obj; ++} ++ ++static int ++panthor_vm_exec_op(struct panthor_vm *vm, struct panthor_vm_op_ctx *op, ++ bool flag_vm_unusable_on_failure) ++{ ++ u32 op_type = op->flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK; ++ int ret; ++ ++ if (op_type == DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY) ++ return 0; ++ ++ mutex_lock(&vm->op_lock); ++ vm->op_ctx = op; ++ switch (op_type) { ++ case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP: ++ if (vm->unusable) { ++ ret = -EINVAL; ++ break; ++ } ++ ++ ret = drm_gpuvm_sm_map(&vm->base, vm, op->va.addr, op->va.range, ++ op->map.vm_bo->obj, op->map.bo_offset); ++ break; ++ ++ case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP: ++ ret = drm_gpuvm_sm_unmap(&vm->base, vm, op->va.addr, op->va.range); ++ break; ++ ++ default: ++ ret = -EINVAL; ++ break; ++ } ++ ++ if (ret && flag_vm_unusable_on_failure) ++ vm->unusable = true; ++ ++ vm->op_ctx = NULL; ++ mutex_unlock(&vm->op_lock); ++ ++ return ret; ++} ++ ++static struct dma_fence * ++panthor_vm_bind_run_job(struct drm_sched_job *sched_job) ++{ ++ struct panthor_vm_bind_job *job = container_of(sched_job, struct panthor_vm_bind_job, base); ++ bool cookie; ++ int ret; ++ ++ /* Not only we report an error whose result is propagated to the ++ * drm_sched finished fence, but we also flag the VM as unusable, because ++ * a failure in the async VM_BIND results in an inconsistent state. VM needs ++ * to be destroyed and recreated. ++ */ ++ cookie = dma_fence_begin_signalling(); ++ ret = panthor_vm_exec_op(job->vm, &job->ctx, true); ++ dma_fence_end_signalling(cookie); ++ ++ return ret ? ERR_PTR(ret) : NULL; ++} ++ ++static void panthor_vm_bind_job_release(struct kref *kref) ++{ ++ struct panthor_vm_bind_job *job = container_of(kref, struct panthor_vm_bind_job, refcount); ++ ++ if (job->base.s_fence) ++ drm_sched_job_cleanup(&job->base); ++ ++ panthor_vm_cleanup_op_ctx(&job->ctx, job->vm); ++ panthor_vm_put(job->vm); ++ kfree(job); ++} ++ ++/** ++ * panthor_vm_bind_job_put() - Release a VM_BIND job reference ++ * @sched_job: Job to release the reference on. ++ */ ++void panthor_vm_bind_job_put(struct drm_sched_job *sched_job) ++{ ++ struct panthor_vm_bind_job *job = ++ container_of(sched_job, struct panthor_vm_bind_job, base); ++ ++ if (sched_job) ++ kref_put(&job->refcount, panthor_vm_bind_job_release); ++} ++ ++static void ++panthor_vm_bind_free_job(struct drm_sched_job *sched_job) ++{ ++ struct panthor_vm_bind_job *job = ++ container_of(sched_job, struct panthor_vm_bind_job, base); ++ ++ drm_sched_job_cleanup(sched_job); ++ ++ /* Do the heavy cleanups asynchronously, so we're out of the ++ * dma-signaling path and can acquire dma-resv locks safely. ++ */ ++ queue_work(panthor_cleanup_wq, &job->cleanup_op_ctx_work); ++} ++ ++static enum drm_gpu_sched_stat ++panthor_vm_bind_timedout_job(struct drm_sched_job *sched_job) ++{ ++ WARN(1, "VM_BIND ops are synchronous for now, there should be no timeout!"); ++ return DRM_GPU_SCHED_STAT_NOMINAL; ++} ++ ++static const struct drm_sched_backend_ops panthor_vm_bind_ops = { ++ .run_job = panthor_vm_bind_run_job, ++ .free_job = panthor_vm_bind_free_job, ++ .timedout_job = panthor_vm_bind_timedout_job, ++}; ++ ++/** ++ * panthor_vm_create() - Create a VM ++ * @ptdev: Device. ++ * @for_mcu: True if this is the FW MCU VM. ++ * @kernel_va_start: Start of the range reserved for kernel BO mapping. ++ * @kernel_va_size: Size of the range reserved for kernel BO mapping. ++ * @auto_kernel_va_start: Start of the auto-VA kernel range. ++ * @auto_kernel_va_size: Size of the auto-VA kernel range. ++ * ++ * Return: A valid pointer on success, an ERR_PTR() otherwise. ++ */ ++struct panthor_vm * ++panthor_vm_create(struct panthor_device *ptdev, bool for_mcu, ++ u64 kernel_va_start, u64 kernel_va_size, ++ u64 auto_kernel_va_start, u64 auto_kernel_va_size) ++{ ++ u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features); ++ u32 pa_bits = GPU_MMU_FEATURES_PA_BITS(ptdev->gpu_info.mmu_features); ++ u64 full_va_range = 1ull << va_bits; ++ struct drm_gem_object *dummy_gem; ++ struct drm_gpu_scheduler *sched; ++ struct io_pgtable_cfg pgtbl_cfg; ++ u64 mair, min_va, va_range; ++ struct panthor_vm *vm; ++ int ret; ++ ++ vm = kzalloc(sizeof(*vm), GFP_KERNEL); ++ if (!vm) ++ return ERR_PTR(-ENOMEM); ++ ++ /* We allocate a dummy GEM for the VM. */ ++ dummy_gem = drm_gpuvm_resv_object_alloc(&ptdev->base); ++ if (!dummy_gem) { ++ ret = -ENOMEM; ++ goto err_free_vm; ++ } ++ ++ mutex_init(&vm->heaps.lock); ++ vm->for_mcu = for_mcu; ++ vm->ptdev = ptdev; ++ mutex_init(&vm->op_lock); ++ ++ if (for_mcu) { ++ /* CSF MCU is a cortex M7, and can only address 4G */ ++ min_va = 0; ++ va_range = SZ_4G; ++ } else { ++ min_va = 0; ++ va_range = full_va_range; ++ } ++ ++ mutex_init(&vm->mm_lock); ++ drm_mm_init(&vm->mm, kernel_va_start, kernel_va_size); ++ vm->kernel_auto_va.start = auto_kernel_va_start; ++ vm->kernel_auto_va.end = vm->kernel_auto_va.start + auto_kernel_va_size - 1; ++ ++ INIT_LIST_HEAD(&vm->node); ++ INIT_LIST_HEAD(&vm->as.lru_node); ++ vm->as.id = -1; ++ refcount_set(&vm->as.active_cnt, 0); ++ ++ pgtbl_cfg = (struct io_pgtable_cfg) { ++ .pgsize_bitmap = SZ_4K | SZ_2M, ++ .ias = va_bits, ++ .oas = pa_bits, ++ .coherent_walk = ptdev->coherent, ++ .tlb = &mmu_tlb_ops, ++ .iommu_dev = ptdev->base.dev, ++ .alloc = alloc_pt, ++ .free = free_pt, ++ }; ++ ++ vm->pgtbl_ops = alloc_io_pgtable_ops(ARM_64_LPAE_S1, &pgtbl_cfg, vm); ++ if (!vm->pgtbl_ops) { ++ ret = -EINVAL; ++ goto err_mm_takedown; ++ } ++ ++ /* Bind operations are synchronous for now, no timeout needed. */ ++ ret = drm_sched_init(&vm->sched, &panthor_vm_bind_ops, ptdev->mmu->vm.wq, ++ 1, 1, 0, ++ MAX_SCHEDULE_TIMEOUT, NULL, NULL, ++ "panthor-vm-bind", ptdev->base.dev); ++ if (ret) ++ goto err_free_io_pgtable; ++ ++ sched = &vm->sched; ++ ret = drm_sched_entity_init(&vm->entity, 0, &sched, 1, NULL); ++ if (ret) ++ goto err_sched_fini; ++ ++ mair = io_pgtable_ops_to_pgtable(vm->pgtbl_ops)->cfg.arm_lpae_s1_cfg.mair; ++ vm->memattr = mair_to_memattr(mair); ++ ++ mutex_lock(&ptdev->mmu->vm.lock); ++ list_add_tail(&vm->node, &ptdev->mmu->vm.list); ++ ++ /* If a reset is in progress, stop the scheduler. */ ++ if (ptdev->mmu->vm.reset_in_progress) ++ panthor_vm_stop(vm); ++ mutex_unlock(&ptdev->mmu->vm.lock); ++ ++ /* We intentionally leave the reserved range to zero, because we want kernel VMAs ++ * to be handled the same way user VMAs are. ++ */ ++ drm_gpuvm_init(&vm->base, for_mcu ? "panthor-MCU-VM" : "panthor-GPU-VM", ++ DRM_GPUVM_RESV_PROTECTED, &ptdev->base, dummy_gem, ++ min_va, va_range, 0, 0, &panthor_gpuvm_ops); ++ drm_gem_object_put(dummy_gem); ++ return vm; ++ ++err_sched_fini: ++ drm_sched_fini(&vm->sched); ++ ++err_free_io_pgtable: ++ free_io_pgtable_ops(vm->pgtbl_ops); ++ ++err_mm_takedown: ++ drm_mm_takedown(&vm->mm); ++ drm_gem_object_put(dummy_gem); ++ ++err_free_vm: ++ kfree(vm); ++ return ERR_PTR(ret); ++} ++ ++static int ++panthor_vm_bind_prepare_op_ctx(struct drm_file *file, ++ struct panthor_vm *vm, ++ const struct drm_panthor_vm_bind_op *op, ++ struct panthor_vm_op_ctx *op_ctx) ++{ ++ struct drm_gem_object *gem; ++ int ret; ++ ++ /* Aligned on page size. */ ++ if ((op->va | op->size) & ~PAGE_MASK) ++ return -EINVAL; ++ ++ switch (op->flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) { ++ case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP: ++ gem = drm_gem_object_lookup(file, op->bo_handle); ++ ret = panthor_vm_prepare_map_op_ctx(op_ctx, vm, ++ gem ? to_panthor_bo(gem) : NULL, ++ op->bo_offset, ++ op->size, ++ op->va, ++ op->flags); ++ drm_gem_object_put(gem); ++ return ret; ++ ++ case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP: ++ if (op->flags & ~DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) ++ return -EINVAL; ++ ++ if (op->bo_handle || op->bo_offset) ++ return -EINVAL; ++ ++ return panthor_vm_prepare_unmap_op_ctx(op_ctx, vm, op->va, op->size); ++ ++ case DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY: ++ if (op->flags & ~DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) ++ return -EINVAL; ++ ++ if (op->bo_handle || op->bo_offset) ++ return -EINVAL; ++ ++ if (op->va || op->size) ++ return -EINVAL; ++ ++ if (!op->syncs.count) ++ return -EINVAL; ++ ++ panthor_vm_prepare_sync_only_op_ctx(op_ctx, vm); ++ return 0; ++ ++ default: ++ return -EINVAL; ++ } ++} ++ ++static void panthor_vm_bind_job_cleanup_op_ctx_work(struct work_struct *work) ++{ ++ struct panthor_vm_bind_job *job = ++ container_of(work, struct panthor_vm_bind_job, cleanup_op_ctx_work); ++ ++ panthor_vm_bind_job_put(&job->base); ++} ++ ++/** ++ * panthor_vm_bind_job_create() - Create a VM_BIND job ++ * @file: File. ++ * @vm: VM targeted by the VM_BIND job. ++ * @op: VM operation data. ++ * ++ * Return: A valid pointer on success, an ERR_PTR() otherwise. ++ */ ++struct drm_sched_job * ++panthor_vm_bind_job_create(struct drm_file *file, ++ struct panthor_vm *vm, ++ const struct drm_panthor_vm_bind_op *op) ++{ ++ struct panthor_vm_bind_job *job; ++ int ret; ++ ++ if (!vm) ++ return ERR_PTR(-EINVAL); ++ ++ if (vm->destroyed || vm->unusable) ++ return ERR_PTR(-EINVAL); ++ ++ job = kzalloc(sizeof(*job), GFP_KERNEL); ++ if (!job) ++ return ERR_PTR(-ENOMEM); ++ ++ ret = panthor_vm_bind_prepare_op_ctx(file, vm, op, &job->ctx); ++ if (ret) { ++ kfree(job); ++ return ERR_PTR(ret); ++ } ++ ++ INIT_WORK(&job->cleanup_op_ctx_work, panthor_vm_bind_job_cleanup_op_ctx_work); ++ kref_init(&job->refcount); ++ job->vm = panthor_vm_get(vm); ++ ++ ret = drm_sched_job_init(&job->base, &vm->entity, 1, vm); ++ if (ret) ++ goto err_put_job; ++ ++ return &job->base; ++ ++err_put_job: ++ panthor_vm_bind_job_put(&job->base); ++ return ERR_PTR(ret); ++} ++ ++/** ++ * panthor_vm_bind_job_prepare_resvs() - Prepare VM_BIND job dma_resvs ++ * @exec: The locking/preparation context. ++ * @sched_job: The job to prepare resvs on. ++ * ++ * Locks and prepare the VM resv. ++ * ++ * If this is a map operation, locks and prepares the GEM resv. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_vm_bind_job_prepare_resvs(struct drm_exec *exec, ++ struct drm_sched_job *sched_job) ++{ ++ struct panthor_vm_bind_job *job = container_of(sched_job, struct panthor_vm_bind_job, base); ++ int ret; ++ ++ /* Acquire the VM lock an reserve a slot for this VM bind job. */ ++ ret = drm_gpuvm_prepare_vm(&job->vm->base, exec, 1); ++ if (ret) ++ return ret; ++ ++ if (job->ctx.map.vm_bo) { ++ /* Lock/prepare the GEM being mapped. */ ++ ret = drm_exec_prepare_obj(exec, job->ctx.map.vm_bo->obj, 1); ++ if (ret) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++/** ++ * panthor_vm_bind_job_update_resvs() - Update the resv objects touched by a job ++ * @exec: drm_exec context. ++ * @sched_job: Job to update the resvs on. ++ */ ++void panthor_vm_bind_job_update_resvs(struct drm_exec *exec, ++ struct drm_sched_job *sched_job) ++{ ++ struct panthor_vm_bind_job *job = container_of(sched_job, struct panthor_vm_bind_job, base); ++ ++ /* Explicit sync => we just register our job finished fence as bookkeep. */ ++ drm_gpuvm_resv_add_fence(&job->vm->base, exec, ++ &sched_job->s_fence->finished, ++ DMA_RESV_USAGE_BOOKKEEP, ++ DMA_RESV_USAGE_BOOKKEEP); ++} ++ ++void panthor_vm_update_resvs(struct panthor_vm *vm, struct drm_exec *exec, ++ struct dma_fence *fence, ++ enum dma_resv_usage private_usage, ++ enum dma_resv_usage extobj_usage) ++{ ++ drm_gpuvm_resv_add_fence(&vm->base, exec, fence, private_usage, extobj_usage); ++} ++ ++/** ++ * panthor_vm_bind_exec_sync_op() - Execute a VM_BIND operation synchronously. ++ * @file: File. ++ * @vm: VM targeted by the VM operation. ++ * @op: Data describing the VM operation. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_vm_bind_exec_sync_op(struct drm_file *file, ++ struct panthor_vm *vm, ++ struct drm_panthor_vm_bind_op *op) ++{ ++ struct panthor_vm_op_ctx op_ctx; ++ int ret; ++ ++ /* No sync objects allowed on synchronous operations. */ ++ if (op->syncs.count) ++ return -EINVAL; ++ ++ if (!op->size) ++ return 0; ++ ++ ret = panthor_vm_bind_prepare_op_ctx(file, vm, op, &op_ctx); ++ if (ret) ++ return ret; ++ ++ ret = panthor_vm_exec_op(vm, &op_ctx, false); ++ panthor_vm_cleanup_op_ctx(&op_ctx, vm); ++ ++ return ret; ++} ++ ++/** ++ * panthor_vm_map_bo_range() - Map a GEM object range to a VM ++ * @vm: VM to map the GEM to. ++ * @bo: GEM object to map. ++ * @offset: Offset in the GEM object. ++ * @size: Size to map. ++ * @va: Virtual address to map the object to. ++ * @flags: Combination of drm_panthor_vm_bind_op_flags flags. ++ * Only map-related flags are valid. ++ * ++ * Internal use only. For userspace requests, use ++ * panthor_vm_bind_exec_sync_op() instead. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_vm_map_bo_range(struct panthor_vm *vm, struct panthor_gem_object *bo, ++ u64 offset, u64 size, u64 va, u32 flags) ++{ ++ struct panthor_vm_op_ctx op_ctx; ++ int ret; ++ ++ ret = panthor_vm_prepare_map_op_ctx(&op_ctx, vm, bo, offset, size, va, flags); ++ if (ret) ++ return ret; ++ ++ ret = panthor_vm_exec_op(vm, &op_ctx, false); ++ panthor_vm_cleanup_op_ctx(&op_ctx, vm); ++ ++ return ret; ++} ++ ++/** ++ * panthor_vm_unmap_range() - Unmap a portion of the VA space ++ * @vm: VM to unmap the region from. ++ * @va: Virtual address to unmap. Must be 4k aligned. ++ * @size: Size of the region to unmap. Must be 4k aligned. ++ * ++ * Internal use only. For userspace requests, use ++ * panthor_vm_bind_exec_sync_op() instead. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_vm_unmap_range(struct panthor_vm *vm, u64 va, u64 size) ++{ ++ struct panthor_vm_op_ctx op_ctx; ++ int ret; ++ ++ ret = panthor_vm_prepare_unmap_op_ctx(&op_ctx, vm, va, size); ++ if (ret) ++ return ret; ++ ++ ret = panthor_vm_exec_op(vm, &op_ctx, false); ++ panthor_vm_cleanup_op_ctx(&op_ctx, vm); ++ ++ return ret; ++} ++ ++/** ++ * panthor_vm_prepare_mapped_bos_resvs() - Prepare resvs on VM BOs. ++ * @exec: Locking/preparation context. ++ * @vm: VM targeted by the GPU job. ++ * @slot_count: Number of slots to reserve. ++ * ++ * GPU jobs assume all BOs bound to the VM at the time the job is submitted ++ * are available when the job is executed. In order to guarantee that, we ++ * need to reserve a slot on all BOs mapped to a VM and update this slot with ++ * the job fence after its submission. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_vm_prepare_mapped_bos_resvs(struct drm_exec *exec, struct panthor_vm *vm, ++ u32 slot_count) ++{ ++ int ret; ++ ++ /* Acquire the VM lock and reserve a slot for this GPU job. */ ++ ret = drm_gpuvm_prepare_vm(&vm->base, exec, slot_count); ++ if (ret) ++ return ret; ++ ++ return drm_gpuvm_prepare_objects(&vm->base, exec, slot_count); ++} ++ ++/** ++ * panthor_mmu_unplug() - Unplug the MMU logic ++ * @ptdev: Device. ++ * ++ * No access to the MMU regs should be done after this function is called. ++ * We suspend the IRQ and disable all VMs to guarantee that. ++ */ ++void panthor_mmu_unplug(struct panthor_device *ptdev) ++{ ++ panthor_mmu_irq_suspend(&ptdev->mmu->irq); ++ ++ mutex_lock(&ptdev->mmu->as.slots_lock); ++ for (u32 i = 0; i < ARRAY_SIZE(ptdev->mmu->as.slots); i++) { ++ struct panthor_vm *vm = ptdev->mmu->as.slots[i].vm; ++ ++ if (vm) { ++ drm_WARN_ON(&ptdev->base, panthor_mmu_as_disable(ptdev, i)); ++ panthor_vm_release_as_locked(vm); ++ } ++ } ++ mutex_unlock(&ptdev->mmu->as.slots_lock); ++} ++ ++static void panthor_mmu_release_wq(struct drm_device *ddev, void *res) ++{ ++ destroy_workqueue(res); ++} ++ ++/** ++ * panthor_mmu_init() - Initialize the MMU logic. ++ * @ptdev: Device. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_mmu_init(struct panthor_device *ptdev) ++{ ++ u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features); ++ struct panthor_mmu *mmu; ++ int ret, irq; ++ ++ mmu = drmm_kzalloc(&ptdev->base, sizeof(*mmu), GFP_KERNEL); ++ if (!mmu) ++ return -ENOMEM; ++ ++ INIT_LIST_HEAD(&mmu->as.lru_list); ++ ++ ret = drmm_mutex_init(&ptdev->base, &mmu->as.slots_lock); ++ if (ret) ++ return ret; ++ ++ INIT_LIST_HEAD(&mmu->vm.list); ++ ret = drmm_mutex_init(&ptdev->base, &mmu->vm.lock); ++ if (ret) ++ return ret; ++ ++ ptdev->mmu = mmu; ++ ++ irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "mmu"); ++ if (irq <= 0) ++ return -ENODEV; ++ ++ ret = panthor_request_mmu_irq(ptdev, &mmu->irq, irq, ++ panthor_mmu_fault_mask(ptdev, ~0)); ++ if (ret) ++ return ret; ++ ++ mmu->vm.wq = alloc_workqueue("panthor-vm-bind", WQ_UNBOUND, 0); ++ if (!mmu->vm.wq) ++ return -ENOMEM; ++ ++ /* On 32-bit kernels, the VA space is limited by the io_pgtable_ops abstraction, ++ * which passes iova as an unsigned long. Patch the mmu_features to reflect this ++ * limitation. ++ */ ++ if (sizeof(unsigned long) * 8 < va_bits) { ++ ptdev->gpu_info.mmu_features &= ~GENMASK(7, 0); ++ ptdev->gpu_info.mmu_features |= sizeof(unsigned long) * 8; ++ } ++ ++ return drmm_add_action_or_reset(&ptdev->base, panthor_mmu_release_wq, mmu->vm.wq); ++} ++ ++#ifdef CONFIG_DEBUG_FS ++static int show_vm_gpuvas(struct panthor_vm *vm, struct seq_file *m) ++{ ++ int ret; ++ ++ mutex_lock(&vm->op_lock); ++ ret = drm_debugfs_gpuva_info(m, &vm->base); ++ mutex_unlock(&vm->op_lock); ++ ++ return ret; ++} ++ ++static int show_each_vm(struct seq_file *m, void *arg) ++{ ++ struct drm_info_node *node = (struct drm_info_node *)m->private; ++ struct drm_device *ddev = node->minor->dev; ++ struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base); ++ int (*show)(struct panthor_vm *, struct seq_file *) = node->info_ent->data; ++ struct panthor_vm *vm; ++ int ret = 0; ++ ++ mutex_lock(&ptdev->mmu->vm.lock); ++ list_for_each_entry(vm, &ptdev->mmu->vm.list, node) { ++ ret = show(vm, m); ++ if (ret < 0) ++ break; ++ ++ seq_puts(m, "\n"); ++ } ++ mutex_unlock(&ptdev->mmu->vm.lock); ++ ++ return ret; ++} ++ ++static struct drm_info_list panthor_mmu_debugfs_list[] = { ++ DRM_DEBUGFS_GPUVA_INFO(show_each_vm, show_vm_gpuvas), ++}; ++ ++/** ++ * panthor_mmu_debugfs_init() - Initialize MMU debugfs entries ++ * @minor: Minor. ++ */ ++void panthor_mmu_debugfs_init(struct drm_minor *minor) ++{ ++ drm_debugfs_create_files(panthor_mmu_debugfs_list, ++ ARRAY_SIZE(panthor_mmu_debugfs_list), ++ minor->debugfs_root, minor); ++} ++#endif /* CONFIG_DEBUG_FS */ ++ ++/** ++ * panthor_mmu_pt_cache_init() - Initialize the page table cache. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_mmu_pt_cache_init(void) ++{ ++ pt_cache = kmem_cache_create("panthor-mmu-pt", SZ_4K, SZ_4K, 0, NULL); ++ if (!pt_cache) ++ return -ENOMEM; ++ ++ return 0; ++} ++ ++/** ++ * panthor_mmu_pt_cache_fini() - Destroy the page table cache. ++ */ ++void panthor_mmu_pt_cache_fini(void) ++{ ++ kmem_cache_destroy(pt_cache); ++} +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_mmu.h +@@ -0,0 +1,102 @@ ++/* SPDX-License-Identifier: GPL-2.0 or MIT */ ++/* Copyright 2019 Linaro, Ltd, Rob Herring */ ++/* Copyright 2023 Collabora ltd. */ ++ ++#ifndef __PANTHOR_MMU_H__ ++#define __PANTHOR_MMU_H__ ++ ++#include ++ ++struct drm_exec; ++struct drm_sched_job; ++struct panthor_gem_object; ++struct panthor_heap_pool; ++struct panthor_vm; ++struct panthor_vma; ++struct panthor_mmu; ++ ++int panthor_mmu_init(struct panthor_device *ptdev); ++void panthor_mmu_unplug(struct panthor_device *ptdev); ++void panthor_mmu_pre_reset(struct panthor_device *ptdev); ++void panthor_mmu_post_reset(struct panthor_device *ptdev); ++void panthor_mmu_suspend(struct panthor_device *ptdev); ++void panthor_mmu_resume(struct panthor_device *ptdev); ++ ++int panthor_vm_map_bo_range(struct panthor_vm *vm, struct panthor_gem_object *bo, ++ u64 offset, u64 size, u64 va, u32 flags); ++int panthor_vm_unmap_range(struct panthor_vm *vm, u64 va, u64 size); ++struct panthor_gem_object * ++panthor_vm_get_bo_for_va(struct panthor_vm *vm, u64 va, u64 *bo_offset); ++ ++int panthor_vm_active(struct panthor_vm *vm); ++void panthor_vm_idle(struct panthor_vm *vm); ++int panthor_vm_as(struct panthor_vm *vm); ++ ++struct panthor_heap_pool * ++panthor_vm_get_heap_pool(struct panthor_vm *vm, bool create); ++ ++struct panthor_vm *panthor_vm_get(struct panthor_vm *vm); ++void panthor_vm_put(struct panthor_vm *vm); ++struct panthor_vm *panthor_vm_create(struct panthor_device *ptdev, bool for_mcu, ++ u64 kernel_va_start, u64 kernel_va_size, ++ u64 kernel_auto_va_start, ++ u64 kernel_auto_va_size); ++ ++int panthor_vm_prepare_mapped_bos_resvs(struct drm_exec *exec, ++ struct panthor_vm *vm, ++ u32 slot_count); ++int panthor_vm_add_bos_resvs_deps_to_job(struct panthor_vm *vm, ++ struct drm_sched_job *job); ++void panthor_vm_add_job_fence_to_bos_resvs(struct panthor_vm *vm, ++ struct drm_sched_job *job); ++ ++struct dma_resv *panthor_vm_resv(struct panthor_vm *vm); ++struct drm_gem_object *panthor_vm_root_gem(struct panthor_vm *vm); ++ ++void panthor_vm_pool_destroy(struct panthor_file *pfile); ++int panthor_vm_pool_create(struct panthor_file *pfile); ++int panthor_vm_pool_create_vm(struct panthor_device *ptdev, ++ struct panthor_vm_pool *pool, ++ struct drm_panthor_vm_create *args); ++int panthor_vm_pool_destroy_vm(struct panthor_vm_pool *pool, u32 handle); ++struct panthor_vm *panthor_vm_pool_get_vm(struct panthor_vm_pool *pool, u32 handle); ++ ++bool panthor_vm_has_unhandled_faults(struct panthor_vm *vm); ++bool panthor_vm_is_unusable(struct panthor_vm *vm); ++ ++/* ++ * PANTHOR_VM_KERNEL_AUTO_VA: Use this magic address when you want the GEM ++ * logic to auto-allocate the virtual address in the reserved kernel VA range. ++ */ ++#define PANTHOR_VM_KERNEL_AUTO_VA ~0ull ++ ++int panthor_vm_alloc_va(struct panthor_vm *vm, u64 va, u64 size, ++ struct drm_mm_node *va_node); ++void panthor_vm_free_va(struct panthor_vm *vm, struct drm_mm_node *va_node); ++ ++int panthor_vm_bind_exec_sync_op(struct drm_file *file, ++ struct panthor_vm *vm, ++ struct drm_panthor_vm_bind_op *op); ++ ++struct drm_sched_job * ++panthor_vm_bind_job_create(struct drm_file *file, ++ struct panthor_vm *vm, ++ const struct drm_panthor_vm_bind_op *op); ++void panthor_vm_bind_job_put(struct drm_sched_job *job); ++int panthor_vm_bind_job_prepare_resvs(struct drm_exec *exec, ++ struct drm_sched_job *job); ++void panthor_vm_bind_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *job); ++ ++void panthor_vm_update_resvs(struct panthor_vm *vm, struct drm_exec *exec, ++ struct dma_fence *fence, ++ enum dma_resv_usage private_usage, ++ enum dma_resv_usage extobj_usage); ++ ++int panthor_mmu_pt_cache_init(void); ++void panthor_mmu_pt_cache_fini(void); ++ ++#ifdef CONFIG_DEBUG_FS ++void panthor_mmu_debugfs_init(struct drm_minor *minor); ++#endif ++ ++#endif diff --git a/patches-6.6/034-37-v6.10-drm-panthor-Add-the-FW-logical-block.patch b/patches-6.6/034-37-v6.10-drm-panthor-Add-the-FW-logical-block.patch new file mode 100644 index 0000000..4b97490 --- /dev/null +++ b/patches-6.6/034-37-v6.10-drm-panthor-Add-the-FW-logical-block.patch @@ -0,0 +1,1929 @@ +From 2718d91816eeed03c09c8abe872e45f59078768c Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Thu, 29 Feb 2024 17:22:22 +0100 +Subject: [PATCH] drm/panthor: Add the FW logical block + +Contains everything that's FW related, that includes the code dealing +with the microcontroller unit (MCU) that's running the FW, and anything +related to allocating memory shared between the FW and the CPU. + +A few global FW events are processed in the IRQ handler, the rest is +forwarded to the scheduler, since scheduling is the primary reason for +the FW existence, and also the main source of FW <-> kernel +interactions. + +v6: +- Add Maxime's and Heiko's acks +- Keep header inclusion alphabetically ordered + +v5: +- Fix typo in GLB_PERFCNT_SAMPLE definition +- Fix unbalanced panthor_vm_idle/active() calls +- Fallback to a slow reset when the fast reset fails +- Add extra information when reporting a FW boot failure + +v4: +- Add a MODULE_FIRMWARE() entry for gen 10.8 +- Fix a wrong return ERR_PTR() in panthor_fw_load_section_entry() +- Fix typos +- Add Steve's R-b + +v3: +- Make the FW path more future-proof (Liviu) +- Use one waitqueue for all FW events +- Simplify propagation of FW events to the scheduler logic +- Drop the panthor_fw_mem abstraction and use panthor_kernel_bo instead +- Account for the panthor_vm changes +- Replace magic number with 0x7fffffff with ~0 to better signify that + it's the maximum permitted value. +- More accurate rounding when computing the firmware timeout. +- Add a 'sub iterator' helper function. This also adds a check that a + firmware entry doesn't overflow the firmware image. +- Drop __packed from FW structures, natural alignment is good enough. +- Other minor code improvements. + +Co-developed-by: Steven Price +Signed-off-by: Steven Price +Signed-off-by: Boris Brezillon +Reviewed-by: Steven Price +Acked-by: Maxime Ripard +Acked-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-9-boris.brezillon@collabora.com +--- + drivers/gpu/drm/panthor/panthor_fw.c | 1362 ++++++++++++++++++++++++++ + drivers/gpu/drm/panthor/panthor_fw.h | 503 ++++++++++ + 2 files changed, 1865 insertions(+) + create mode 100644 drivers/gpu/drm/panthor/panthor_fw.c + create mode 100644 drivers/gpu/drm/panthor/panthor_fw.h + +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_fw.c +@@ -0,0 +1,1362 @@ ++// SPDX-License-Identifier: GPL-2.0 or MIT ++/* Copyright 2023 Collabora ltd. */ ++ ++#ifdef CONFIG_ARM_ARCH_TIMER ++#include ++#endif ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++#include "panthor_device.h" ++#include "panthor_fw.h" ++#include "panthor_gem.h" ++#include "panthor_gpu.h" ++#include "panthor_mmu.h" ++#include "panthor_regs.h" ++#include "panthor_sched.h" ++ ++#define CSF_FW_NAME "mali_csffw.bin" ++ ++#define PING_INTERVAL_MS 12000 ++#define PROGRESS_TIMEOUT_CYCLES (5ull * 500 * 1024 * 1024) ++#define PROGRESS_TIMEOUT_SCALE_SHIFT 10 ++#define IDLE_HYSTERESIS_US 800 ++#define PWROFF_HYSTERESIS_US 10000 ++ ++/** ++ * struct panthor_fw_binary_hdr - Firmware binary header. ++ */ ++struct panthor_fw_binary_hdr { ++ /** @magic: Magic value to check binary validity. */ ++ u32 magic; ++#define CSF_FW_BINARY_HEADER_MAGIC 0xc3f13a6e ++ ++ /** @minor: Minor FW version. */ ++ u8 minor; ++ ++ /** @major: Major FW version. */ ++ u8 major; ++#define CSF_FW_BINARY_HEADER_MAJOR_MAX 0 ++ ++ /** @padding1: MBZ. */ ++ u16 padding1; ++ ++ /** @version_hash: FW version hash. */ ++ u32 version_hash; ++ ++ /** @padding2: MBZ. */ ++ u32 padding2; ++ ++ /** @size: FW binary size. */ ++ u32 size; ++}; ++ ++/** ++ * enum panthor_fw_binary_entry_type - Firmware binary entry type ++ */ ++enum panthor_fw_binary_entry_type { ++ /** @CSF_FW_BINARY_ENTRY_TYPE_IFACE: Host <-> FW interface. */ ++ CSF_FW_BINARY_ENTRY_TYPE_IFACE = 0, ++ ++ /** @CSF_FW_BINARY_ENTRY_TYPE_CONFIG: FW config. */ ++ CSF_FW_BINARY_ENTRY_TYPE_CONFIG = 1, ++ ++ /** @CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST: Unit-tests. */ ++ CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST = 2, ++ ++ /** @CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER: Trace buffer interface. */ ++ CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER = 3, ++ ++ /** @CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: Timeline metadata interface. */ ++ CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA = 4, ++}; ++ ++#define CSF_FW_BINARY_ENTRY_TYPE(ehdr) ((ehdr) & 0xff) ++#define CSF_FW_BINARY_ENTRY_SIZE(ehdr) (((ehdr) >> 8) & 0xff) ++#define CSF_FW_BINARY_ENTRY_UPDATE BIT(30) ++#define CSF_FW_BINARY_ENTRY_OPTIONAL BIT(31) ++ ++#define CSF_FW_BINARY_IFACE_ENTRY_RD_RD BIT(0) ++#define CSF_FW_BINARY_IFACE_ENTRY_RD_WR BIT(1) ++#define CSF_FW_BINARY_IFACE_ENTRY_RD_EX BIT(2) ++#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_NONE (0 << 3) ++#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED (1 << 3) ++#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_UNCACHED_COHERENT (2 << 3) ++#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED_COHERENT (3 << 3) ++#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK GENMASK(4, 3) ++#define CSF_FW_BINARY_IFACE_ENTRY_RD_PROT BIT(5) ++#define CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED BIT(30) ++#define CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO BIT(31) ++ ++#define CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS \ ++ (CSF_FW_BINARY_IFACE_ENTRY_RD_RD | \ ++ CSF_FW_BINARY_IFACE_ENTRY_RD_WR | \ ++ CSF_FW_BINARY_IFACE_ENTRY_RD_EX | \ ++ CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK | \ ++ CSF_FW_BINARY_IFACE_ENTRY_RD_PROT | \ ++ CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED | \ ++ CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO) ++ ++/** ++ * struct panthor_fw_binary_section_entry_hdr - Describes a section of FW binary ++ */ ++struct panthor_fw_binary_section_entry_hdr { ++ /** @flags: Section flags. */ ++ u32 flags; ++ ++ /** @va: MCU virtual range to map this binary section to. */ ++ struct { ++ /** @start: Start address. */ ++ u32 start; ++ ++ /** @end: End address. */ ++ u32 end; ++ } va; ++ ++ /** @data: Data to initialize the FW section with. */ ++ struct { ++ /** @start: Start offset in the FW binary. */ ++ u32 start; ++ ++ /** @end: End offset in the FW binary. */ ++ u32 end; ++ } data; ++}; ++ ++/** ++ * struct panthor_fw_binary_iter - Firmware binary iterator ++ * ++ * Used to parse a firmware binary. ++ */ ++struct panthor_fw_binary_iter { ++ /** @data: FW binary data. */ ++ const void *data; ++ ++ /** @size: FW binary size. */ ++ size_t size; ++ ++ /** @offset: Iterator offset. */ ++ size_t offset; ++}; ++ ++/** ++ * struct panthor_fw_section - FW section ++ */ ++struct panthor_fw_section { ++ /** @node: Used to keep track of FW sections. */ ++ struct list_head node; ++ ++ /** @flags: Section flags, as encoded in the FW binary. */ ++ u32 flags; ++ ++ /** @mem: Section memory. */ ++ struct panthor_kernel_bo *mem; ++ ++ /** ++ * @name: Name of the section, as specified in the binary. ++ * ++ * Can be NULL. ++ */ ++ const char *name; ++ ++ /** ++ * @data: Initial data copied to the FW memory. ++ * ++ * We keep data around so we can reload sections after a reset. ++ */ ++ struct { ++ /** @buf: Buffed used to store init data. */ ++ const void *buf; ++ ++ /** @size: Size of @buf in bytes. */ ++ size_t size; ++ } data; ++}; ++ ++#define CSF_MCU_SHARED_REGION_START 0x04000000ULL ++#define CSF_MCU_SHARED_REGION_SIZE 0x04000000ULL ++ ++#define MIN_CS_PER_CSG 8 ++#define MIN_CSGS 3 ++#define MAX_CSG_PRIO 0xf ++ ++#define CSF_IFACE_VERSION(major, minor, patch) \ ++ (((major) << 24) | ((minor) << 16) | (patch)) ++#define CSF_IFACE_VERSION_MAJOR(v) ((v) >> 24) ++#define CSF_IFACE_VERSION_MINOR(v) (((v) >> 16) & 0xff) ++#define CSF_IFACE_VERSION_PATCH(v) ((v) & 0xffff) ++ ++#define CSF_GROUP_CONTROL_OFFSET 0x1000 ++#define CSF_STREAM_CONTROL_OFFSET 0x40 ++#define CSF_UNPRESERVED_REG_COUNT 4 ++ ++/** ++ * struct panthor_fw_iface - FW interfaces ++ */ ++struct panthor_fw_iface { ++ /** @global: Global interface. */ ++ struct panthor_fw_global_iface global; ++ ++ /** @groups: Group slot interfaces. */ ++ struct panthor_fw_csg_iface groups[MAX_CSGS]; ++ ++ /** @streams: Command stream slot interfaces. */ ++ struct panthor_fw_cs_iface streams[MAX_CSGS][MAX_CS_PER_CSG]; ++}; ++ ++/** ++ * struct panthor_fw - Firmware management ++ */ ++struct panthor_fw { ++ /** @vm: MCU VM. */ ++ struct panthor_vm *vm; ++ ++ /** @sections: List of FW sections. */ ++ struct list_head sections; ++ ++ /** @shared_section: The section containing the FW interfaces. */ ++ struct panthor_fw_section *shared_section; ++ ++ /** @iface: FW interfaces. */ ++ struct panthor_fw_iface iface; ++ ++ /** @watchdog: Collection of fields relating to the FW watchdog. */ ++ struct { ++ /** @ping_work: Delayed work used to ping the FW. */ ++ struct delayed_work ping_work; ++ } watchdog; ++ ++ /** ++ * @req_waitqueue: FW request waitqueue. ++ * ++ * Everytime a request is sent to a command stream group or the global ++ * interface, the caller will first busy wait for the request to be ++ * acknowledged, and then fallback to a sleeping wait. ++ * ++ * This wait queue is here to support the sleeping wait flavor. ++ */ ++ wait_queue_head_t req_waitqueue; ++ ++ /** @booted: True is the FW is booted */ ++ bool booted; ++ ++ /** ++ * @fast_reset: True if the post_reset logic can proceed with a fast reset. ++ * ++ * A fast reset is just a reset where the driver doesn't reload the FW sections. ++ * ++ * Any time the firmware is properly suspended, a fast reset can take place. ++ * On the other hand, if the halt operation failed, the driver will reload ++ * all sections to make sure we start from a fresh state. ++ */ ++ bool fast_reset; ++ ++ /** @irq: Job irq data. */ ++ struct panthor_irq irq; ++}; ++ ++struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev) ++{ ++ return ptdev->fw->vm; ++} ++ ++/** ++ * panthor_fw_get_glb_iface() - Get the global interface ++ * @ptdev: Device. ++ * ++ * Return: The global interface. ++ */ ++struct panthor_fw_global_iface * ++panthor_fw_get_glb_iface(struct panthor_device *ptdev) ++{ ++ return &ptdev->fw->iface.global; ++} ++ ++/** ++ * panthor_fw_get_csg_iface() - Get a command stream group slot interface ++ * @ptdev: Device. ++ * @csg_slot: Index of the command stream group slot. ++ * ++ * Return: The command stream group slot interface. ++ */ ++struct panthor_fw_csg_iface * ++panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot) ++{ ++ if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS)) ++ return NULL; ++ ++ return &ptdev->fw->iface.groups[csg_slot]; ++} ++ ++/** ++ * panthor_fw_get_cs_iface() - Get a command stream slot interface ++ * @ptdev: Device. ++ * @csg_slot: Index of the command stream group slot. ++ * @cs_slot: Index of the command stream slot. ++ * ++ * Return: The command stream slot interface. ++ */ ++struct panthor_fw_cs_iface * ++panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot) ++{ ++ if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS || cs_slot > MAX_CS_PER_CSG)) ++ return NULL; ++ ++ return &ptdev->fw->iface.streams[csg_slot][cs_slot]; ++} ++ ++/** ++ * panthor_fw_conv_timeout() - Convert a timeout into a cycle-count ++ * @ptdev: Device. ++ * @timeout_us: Timeout expressed in micro-seconds. ++ * ++ * The FW has two timer sources: the GPU counter or arch-timer. We need ++ * to express timeouts in term of number of cycles and specify which ++ * timer source should be used. ++ * ++ * Return: A value suitable for timeout fields in the global interface. ++ */ ++static u32 panthor_fw_conv_timeout(struct panthor_device *ptdev, u32 timeout_us) ++{ ++ bool use_cycle_counter = false; ++ u32 timer_rate = 0; ++ u64 mod_cycles; ++ ++#ifdef CONFIG_ARM_ARCH_TIMER ++ timer_rate = arch_timer_get_cntfrq(); ++#endif ++ ++ if (!timer_rate) { ++ use_cycle_counter = true; ++ timer_rate = clk_get_rate(ptdev->clks.core); ++ } ++ ++ if (drm_WARN_ON(&ptdev->base, !timer_rate)) { ++ /* We couldn't get a valid clock rate, let's just pick the ++ * maximum value so the FW still handles the core ++ * power on/off requests. ++ */ ++ return GLB_TIMER_VAL(~0) | ++ GLB_TIMER_SOURCE_GPU_COUNTER; ++ } ++ ++ mod_cycles = DIV_ROUND_UP_ULL((u64)timeout_us * timer_rate, ++ 1000000ull << 10); ++ if (drm_WARN_ON(&ptdev->base, mod_cycles > GLB_TIMER_VAL(~0))) ++ mod_cycles = GLB_TIMER_VAL(~0); ++ ++ return GLB_TIMER_VAL(mod_cycles) | ++ (use_cycle_counter ? GLB_TIMER_SOURCE_GPU_COUNTER : 0); ++} ++ ++static int panthor_fw_binary_iter_read(struct panthor_device *ptdev, ++ struct panthor_fw_binary_iter *iter, ++ void *out, size_t size) ++{ ++ size_t new_offset = iter->offset + size; ++ ++ if (new_offset > iter->size || new_offset < iter->offset) { ++ drm_err(&ptdev->base, "Firmware too small\n"); ++ return -EINVAL; ++ } ++ ++ memcpy(out, iter->data + iter->offset, size); ++ iter->offset = new_offset; ++ return 0; ++} ++ ++static int panthor_fw_binary_sub_iter_init(struct panthor_device *ptdev, ++ struct panthor_fw_binary_iter *iter, ++ struct panthor_fw_binary_iter *sub_iter, ++ size_t size) ++{ ++ size_t new_offset = iter->offset + size; ++ ++ if (new_offset > iter->size || new_offset < iter->offset) { ++ drm_err(&ptdev->base, "Firmware entry too long\n"); ++ return -EINVAL; ++ } ++ ++ sub_iter->offset = 0; ++ sub_iter->data = iter->data + iter->offset; ++ sub_iter->size = size; ++ iter->offset = new_offset; ++ return 0; ++} ++ ++static void panthor_fw_init_section_mem(struct panthor_device *ptdev, ++ struct panthor_fw_section *section) ++{ ++ bool was_mapped = !!section->mem->kmap; ++ int ret; ++ ++ if (!section->data.size && ++ !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO)) ++ return; ++ ++ ret = panthor_kernel_bo_vmap(section->mem); ++ if (drm_WARN_ON(&ptdev->base, ret)) ++ return; ++ ++ memcpy(section->mem->kmap, section->data.buf, section->data.size); ++ if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO) { ++ memset(section->mem->kmap + section->data.size, 0, ++ panthor_kernel_bo_size(section->mem) - section->data.size); ++ } ++ ++ if (!was_mapped) ++ panthor_kernel_bo_vunmap(section->mem); ++} ++ ++/** ++ * panthor_fw_alloc_queue_iface_mem() - Allocate a ring-buffer interfaces. ++ * @ptdev: Device. ++ * @input: Pointer holding the input interface on success. ++ * Should be ignored on failure. ++ * @output: Pointer holding the output interface on success. ++ * Should be ignored on failure. ++ * @input_fw_va: Pointer holding the input interface FW VA on success. ++ * Should be ignored on failure. ++ * @output_fw_va: Pointer holding the output interface FW VA on success. ++ * Should be ignored on failure. ++ * ++ * Allocates panthor_fw_ringbuf_{input,out}_iface interfaces. The input ++ * interface is at offset 0, and the output interface at offset 4096. ++ * ++ * Return: A valid pointer in case of success, an ERR_PTR() otherwise. ++ */ ++struct panthor_kernel_bo * ++panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev, ++ struct panthor_fw_ringbuf_input_iface **input, ++ const struct panthor_fw_ringbuf_output_iface **output, ++ u32 *input_fw_va, u32 *output_fw_va) ++{ ++ struct panthor_kernel_bo *mem; ++ int ret; ++ ++ mem = panthor_kernel_bo_create(ptdev, ptdev->fw->vm, SZ_8K, ++ DRM_PANTHOR_BO_NO_MMAP, ++ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | ++ DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED, ++ PANTHOR_VM_KERNEL_AUTO_VA); ++ if (IS_ERR(mem)) ++ return mem; ++ ++ ret = panthor_kernel_bo_vmap(mem); ++ if (ret) { ++ panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), mem); ++ return ERR_PTR(ret); ++ } ++ ++ memset(mem->kmap, 0, panthor_kernel_bo_size(mem)); ++ *input = mem->kmap; ++ *output = mem->kmap + SZ_4K; ++ *input_fw_va = panthor_kernel_bo_gpuva(mem); ++ *output_fw_va = *input_fw_va + SZ_4K; ++ ++ return mem; ++} ++ ++/** ++ * panthor_fw_alloc_suspend_buf_mem() - Allocate a suspend buffer for a command stream group. ++ * @ptdev: Device. ++ * @size: Size of the suspend buffer. ++ * ++ * Return: A valid pointer in case of success, an ERR_PTR() otherwise. ++ */ ++struct panthor_kernel_bo * ++panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size) ++{ ++ return panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), size, ++ DRM_PANTHOR_BO_NO_MMAP, ++ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC, ++ PANTHOR_VM_KERNEL_AUTO_VA); ++} ++ ++static int panthor_fw_load_section_entry(struct panthor_device *ptdev, ++ const struct firmware *fw, ++ struct panthor_fw_binary_iter *iter, ++ u32 ehdr) ++{ ++ struct panthor_fw_binary_section_entry_hdr hdr; ++ struct panthor_fw_section *section; ++ u32 section_size; ++ u32 name_len; ++ int ret; ++ ++ ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr)); ++ if (ret) ++ return ret; ++ ++ if (hdr.data.end < hdr.data.start) { ++ drm_err(&ptdev->base, "Firmware corrupted, data.end < data.start (0x%x < 0x%x)\n", ++ hdr.data.end, hdr.data.start); ++ return -EINVAL; ++ } ++ ++ if (hdr.va.end < hdr.va.start) { ++ drm_err(&ptdev->base, "Firmware corrupted, hdr.va.end < hdr.va.start (0x%x < 0x%x)\n", ++ hdr.va.end, hdr.va.start); ++ return -EINVAL; ++ } ++ ++ if (hdr.data.end > fw->size) { ++ drm_err(&ptdev->base, "Firmware corrupted, file truncated? data_end=0x%x > fw size=0x%zx\n", ++ hdr.data.end, fw->size); ++ return -EINVAL; ++ } ++ ++ if ((hdr.va.start & ~PAGE_MASK) != 0 || ++ (hdr.va.end & ~PAGE_MASK) != 0) { ++ drm_err(&ptdev->base, "Firmware corrupted, virtual addresses not page aligned: 0x%x-0x%x\n", ++ hdr.va.start, hdr.va.end); ++ return -EINVAL; ++ } ++ ++ if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS) { ++ drm_err(&ptdev->base, "Firmware contains interface with unsupported flags (0x%x)\n", ++ hdr.flags); ++ return -EINVAL; ++ } ++ ++ if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_PROT) { ++ drm_warn(&ptdev->base, ++ "Firmware protected mode entry not be supported, ignoring"); ++ return 0; ++ } ++ ++ if (hdr.va.start == CSF_MCU_SHARED_REGION_START && ++ !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED)) { ++ drm_err(&ptdev->base, ++ "Interface at 0x%llx must be shared", CSF_MCU_SHARED_REGION_START); ++ return -EINVAL; ++ } ++ ++ name_len = iter->size - iter->offset; ++ ++ section = drmm_kzalloc(&ptdev->base, sizeof(*section), GFP_KERNEL); ++ if (!section) ++ return -ENOMEM; ++ ++ list_add_tail(§ion->node, &ptdev->fw->sections); ++ section->flags = hdr.flags; ++ section->data.size = hdr.data.end - hdr.data.start; ++ ++ if (section->data.size > 0) { ++ void *data = drmm_kmalloc(&ptdev->base, section->data.size, GFP_KERNEL); ++ ++ if (!data) ++ return -ENOMEM; ++ ++ memcpy(data, fw->data + hdr.data.start, section->data.size); ++ section->data.buf = data; ++ } ++ ++ if (name_len > 0) { ++ char *name = drmm_kmalloc(&ptdev->base, name_len + 1, GFP_KERNEL); ++ ++ if (!name) ++ return -ENOMEM; ++ ++ memcpy(name, iter->data + iter->offset, name_len); ++ name[name_len] = '\0'; ++ section->name = name; ++ } ++ ++ section_size = hdr.va.end - hdr.va.start; ++ if (section_size) { ++ u32 cache_mode = hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK; ++ struct panthor_gem_object *bo; ++ u32 vm_map_flags = 0; ++ struct sg_table *sgt; ++ u64 va = hdr.va.start; ++ ++ if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR)) ++ vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_READONLY; ++ ++ if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_EX)) ++ vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC; ++ ++ /* TODO: CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_*_COHERENT are mapped to ++ * non-cacheable for now. We might want to introduce a new ++ * IOMMU_xxx flag (or abuse IOMMU_MMIO, which maps to device ++ * memory and is currently not used by our driver) for ++ * AS_MEMATTR_AARCH64_SHARED memory, so we can take benefit ++ * of IO-coherent systems. ++ */ ++ if (cache_mode != CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED) ++ vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED; ++ ++ section->mem = panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), ++ section_size, ++ DRM_PANTHOR_BO_NO_MMAP, ++ vm_map_flags, va); ++ if (IS_ERR(section->mem)) ++ return PTR_ERR(section->mem); ++ ++ if (drm_WARN_ON(&ptdev->base, section->mem->va_node.start != hdr.va.start)) ++ return -EINVAL; ++ ++ if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED) { ++ ret = panthor_kernel_bo_vmap(section->mem); ++ if (ret) ++ return ret; ++ } ++ ++ panthor_fw_init_section_mem(ptdev, section); ++ ++ bo = to_panthor_bo(section->mem->obj); ++ sgt = drm_gem_shmem_get_pages_sgt(&bo->base); ++ if (IS_ERR(sgt)) ++ return PTR_ERR(sgt); ++ ++ dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE); ++ } ++ ++ if (hdr.va.start == CSF_MCU_SHARED_REGION_START) ++ ptdev->fw->shared_section = section; ++ ++ return 0; ++} ++ ++static void ++panthor_reload_fw_sections(struct panthor_device *ptdev, bool full_reload) ++{ ++ struct panthor_fw_section *section; ++ ++ list_for_each_entry(section, &ptdev->fw->sections, node) { ++ struct sg_table *sgt; ++ ++ if (!full_reload && !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR)) ++ continue; ++ ++ panthor_fw_init_section_mem(ptdev, section); ++ sgt = drm_gem_shmem_get_pages_sgt(&to_panthor_bo(section->mem->obj)->base); ++ if (!drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(sgt))) ++ dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE); ++ } ++} ++ ++static int panthor_fw_load_entry(struct panthor_device *ptdev, ++ const struct firmware *fw, ++ struct panthor_fw_binary_iter *iter) ++{ ++ struct panthor_fw_binary_iter eiter; ++ u32 ehdr; ++ int ret; ++ ++ ret = panthor_fw_binary_iter_read(ptdev, iter, &ehdr, sizeof(ehdr)); ++ if (ret) ++ return ret; ++ ++ if ((iter->offset % sizeof(u32)) || ++ (CSF_FW_BINARY_ENTRY_SIZE(ehdr) % sizeof(u32))) { ++ drm_err(&ptdev->base, "Firmware entry isn't 32 bit aligned, offset=0x%x size=0x%x\n", ++ (u32)(iter->offset - sizeof(u32)), CSF_FW_BINARY_ENTRY_SIZE(ehdr)); ++ return -EINVAL; ++ } ++ ++ if (panthor_fw_binary_sub_iter_init(ptdev, iter, &eiter, ++ CSF_FW_BINARY_ENTRY_SIZE(ehdr) - sizeof(ehdr))) ++ return -EINVAL; ++ ++ switch (CSF_FW_BINARY_ENTRY_TYPE(ehdr)) { ++ case CSF_FW_BINARY_ENTRY_TYPE_IFACE: ++ return panthor_fw_load_section_entry(ptdev, fw, &eiter, ehdr); ++ ++ /* FIXME: handle those entry types? */ ++ case CSF_FW_BINARY_ENTRY_TYPE_CONFIG: ++ case CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST: ++ case CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER: ++ case CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: ++ return 0; ++ default: ++ break; ++ } ++ ++ if (ehdr & CSF_FW_BINARY_ENTRY_OPTIONAL) ++ return 0; ++ ++ drm_err(&ptdev->base, ++ "Unsupported non-optional entry type %u in firmware\n", ++ CSF_FW_BINARY_ENTRY_TYPE(ehdr)); ++ return -EINVAL; ++} ++ ++static int panthor_fw_load(struct panthor_device *ptdev) ++{ ++ const struct firmware *fw = NULL; ++ struct panthor_fw_binary_iter iter = {}; ++ struct panthor_fw_binary_hdr hdr; ++ char fw_path[128]; ++ int ret; ++ ++ snprintf(fw_path, sizeof(fw_path), "arm/mali/arch%d.%d/%s", ++ (u32)GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id), ++ (u32)GPU_ARCH_MINOR(ptdev->gpu_info.gpu_id), ++ CSF_FW_NAME); ++ ++ ret = request_firmware(&fw, fw_path, ptdev->base.dev); ++ if (ret) { ++ drm_err(&ptdev->base, "Failed to load firmware image '%s'\n", ++ CSF_FW_NAME); ++ return ret; ++ } ++ ++ iter.data = fw->data; ++ iter.size = fw->size; ++ ret = panthor_fw_binary_iter_read(ptdev, &iter, &hdr, sizeof(hdr)); ++ if (ret) ++ goto out; ++ ++ if (hdr.magic != CSF_FW_BINARY_HEADER_MAGIC) { ++ ret = -EINVAL; ++ drm_err(&ptdev->base, "Invalid firmware magic\n"); ++ goto out; ++ } ++ ++ if (hdr.major != CSF_FW_BINARY_HEADER_MAJOR_MAX) { ++ ret = -EINVAL; ++ drm_err(&ptdev->base, "Unsupported firmware binary header version %d.%d (expected %d.x)\n", ++ hdr.major, hdr.minor, CSF_FW_BINARY_HEADER_MAJOR_MAX); ++ goto out; ++ } ++ ++ if (hdr.size > iter.size) { ++ drm_err(&ptdev->base, "Firmware image is truncated\n"); ++ goto out; ++ } ++ ++ iter.size = hdr.size; ++ ++ while (iter.offset < hdr.size) { ++ ret = panthor_fw_load_entry(ptdev, fw, &iter); ++ if (ret) ++ goto out; ++ } ++ ++ if (!ptdev->fw->shared_section) { ++ drm_err(&ptdev->base, "Shared interface region not found\n"); ++ ret = -EINVAL; ++ goto out; ++ } ++ ++out: ++ release_firmware(fw); ++ return ret; ++} ++ ++/** ++ * iface_fw_to_cpu_addr() - Turn an MCU address into a CPU address ++ * @ptdev: Device. ++ * @mcu_va: MCU address. ++ * ++ * Return: NULL if the address is not part of the shared section, non-NULL otherwise. ++ */ ++static void *iface_fw_to_cpu_addr(struct panthor_device *ptdev, u32 mcu_va) ++{ ++ u64 shared_mem_start = panthor_kernel_bo_gpuva(ptdev->fw->shared_section->mem); ++ u64 shared_mem_end = shared_mem_start + ++ panthor_kernel_bo_size(ptdev->fw->shared_section->mem); ++ if (mcu_va < shared_mem_start || mcu_va >= shared_mem_end) ++ return NULL; ++ ++ return ptdev->fw->shared_section->mem->kmap + (mcu_va - shared_mem_start); ++} ++ ++static int panthor_init_cs_iface(struct panthor_device *ptdev, ++ unsigned int csg_idx, unsigned int cs_idx) ++{ ++ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); ++ struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_idx); ++ struct panthor_fw_cs_iface *cs_iface = &ptdev->fw->iface.streams[csg_idx][cs_idx]; ++ u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem); ++ u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + ++ (csg_idx * glb_iface->control->group_stride) + ++ CSF_STREAM_CONTROL_OFFSET + ++ (cs_idx * csg_iface->control->stream_stride); ++ struct panthor_fw_cs_iface *first_cs_iface = ++ panthor_fw_get_cs_iface(ptdev, 0, 0); ++ ++ if (iface_offset + sizeof(*cs_iface) >= shared_section_sz) ++ return -EINVAL; ++ ++ spin_lock_init(&cs_iface->lock); ++ cs_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset; ++ cs_iface->input = iface_fw_to_cpu_addr(ptdev, cs_iface->control->input_va); ++ cs_iface->output = iface_fw_to_cpu_addr(ptdev, cs_iface->control->output_va); ++ ++ if (!cs_iface->input || !cs_iface->output) { ++ drm_err(&ptdev->base, "Invalid stream control interface input/output VA"); ++ return -EINVAL; ++ } ++ ++ if (cs_iface != first_cs_iface) { ++ if (cs_iface->control->features != first_cs_iface->control->features) { ++ drm_err(&ptdev->base, "Expecting identical CS slots"); ++ return -EINVAL; ++ } ++ } else { ++ u32 reg_count = CS_FEATURES_WORK_REGS(cs_iface->control->features); ++ ++ ptdev->csif_info.cs_reg_count = reg_count; ++ ptdev->csif_info.unpreserved_cs_reg_count = CSF_UNPRESERVED_REG_COUNT; ++ } ++ ++ return 0; ++} ++ ++static bool compare_csg(const struct panthor_fw_csg_control_iface *a, ++ const struct panthor_fw_csg_control_iface *b) ++{ ++ if (a->features != b->features) ++ return false; ++ if (a->suspend_size != b->suspend_size) ++ return false; ++ if (a->protm_suspend_size != b->protm_suspend_size) ++ return false; ++ if (a->stream_num != b->stream_num) ++ return false; ++ return true; ++} ++ ++static int panthor_init_csg_iface(struct panthor_device *ptdev, ++ unsigned int csg_idx) ++{ ++ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); ++ struct panthor_fw_csg_iface *csg_iface = &ptdev->fw->iface.groups[csg_idx]; ++ u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem); ++ u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + (csg_idx * glb_iface->control->group_stride); ++ unsigned int i; ++ ++ if (iface_offset + sizeof(*csg_iface) >= shared_section_sz) ++ return -EINVAL; ++ ++ spin_lock_init(&csg_iface->lock); ++ csg_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset; ++ csg_iface->input = iface_fw_to_cpu_addr(ptdev, csg_iface->control->input_va); ++ csg_iface->output = iface_fw_to_cpu_addr(ptdev, csg_iface->control->output_va); ++ ++ if (csg_iface->control->stream_num < MIN_CS_PER_CSG || ++ csg_iface->control->stream_num > MAX_CS_PER_CSG) ++ return -EINVAL; ++ ++ if (!csg_iface->input || !csg_iface->output) { ++ drm_err(&ptdev->base, "Invalid group control interface input/output VA"); ++ return -EINVAL; ++ } ++ ++ if (csg_idx > 0) { ++ struct panthor_fw_csg_iface *first_csg_iface = ++ panthor_fw_get_csg_iface(ptdev, 0); ++ ++ if (!compare_csg(first_csg_iface->control, csg_iface->control)) { ++ drm_err(&ptdev->base, "Expecting identical CSG slots"); ++ return -EINVAL; ++ } ++ } ++ ++ for (i = 0; i < csg_iface->control->stream_num; i++) { ++ int ret = panthor_init_cs_iface(ptdev, csg_idx, i); ++ ++ if (ret) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static u32 panthor_get_instr_features(struct panthor_device *ptdev) ++{ ++ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); ++ ++ if (glb_iface->control->version < CSF_IFACE_VERSION(1, 1, 0)) ++ return 0; ++ ++ return glb_iface->control->instr_features; ++} ++ ++static int panthor_fw_init_ifaces(struct panthor_device *ptdev) ++{ ++ struct panthor_fw_global_iface *glb_iface = &ptdev->fw->iface.global; ++ unsigned int i; ++ ++ if (!ptdev->fw->shared_section->mem->kmap) ++ return -EINVAL; ++ ++ spin_lock_init(&glb_iface->lock); ++ glb_iface->control = ptdev->fw->shared_section->mem->kmap; ++ ++ if (!glb_iface->control->version) { ++ drm_err(&ptdev->base, "Firmware version is 0. Firmware may have failed to boot"); ++ return -EINVAL; ++ } ++ ++ glb_iface->input = iface_fw_to_cpu_addr(ptdev, glb_iface->control->input_va); ++ glb_iface->output = iface_fw_to_cpu_addr(ptdev, glb_iface->control->output_va); ++ if (!glb_iface->input || !glb_iface->output) { ++ drm_err(&ptdev->base, "Invalid global control interface input/output VA"); ++ return -EINVAL; ++ } ++ ++ if (glb_iface->control->group_num > MAX_CSGS || ++ glb_iface->control->group_num < MIN_CSGS) { ++ drm_err(&ptdev->base, "Invalid number of control groups"); ++ return -EINVAL; ++ } ++ ++ for (i = 0; i < glb_iface->control->group_num; i++) { ++ int ret = panthor_init_csg_iface(ptdev, i); ++ ++ if (ret) ++ return ret; ++ } ++ ++ drm_info(&ptdev->base, "CSF FW v%d.%d.%d, Features %#x Instrumentation features %#x", ++ CSF_IFACE_VERSION_MAJOR(glb_iface->control->version), ++ CSF_IFACE_VERSION_MINOR(glb_iface->control->version), ++ CSF_IFACE_VERSION_PATCH(glb_iface->control->version), ++ glb_iface->control->features, ++ panthor_get_instr_features(ptdev)); ++ return 0; ++} ++ ++static void panthor_fw_init_global_iface(struct panthor_device *ptdev) ++{ ++ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); ++ ++ /* Enable all cores. */ ++ glb_iface->input->core_en_mask = ptdev->gpu_info.shader_present; ++ ++ /* Setup timers. */ ++ glb_iface->input->poweroff_timer = panthor_fw_conv_timeout(ptdev, PWROFF_HYSTERESIS_US); ++ glb_iface->input->progress_timer = PROGRESS_TIMEOUT_CYCLES >> PROGRESS_TIMEOUT_SCALE_SHIFT; ++ glb_iface->input->idle_timer = panthor_fw_conv_timeout(ptdev, IDLE_HYSTERESIS_US); ++ ++ /* Enable interrupts we care about. */ ++ glb_iface->input->ack_irq_mask = GLB_CFG_ALLOC_EN | ++ GLB_PING | ++ GLB_CFG_PROGRESS_TIMER | ++ GLB_CFG_POWEROFF_TIMER | ++ GLB_IDLE_EN | ++ GLB_IDLE; ++ ++ panthor_fw_update_reqs(glb_iface, req, GLB_IDLE_EN, GLB_IDLE_EN); ++ panthor_fw_toggle_reqs(glb_iface, req, ack, ++ GLB_CFG_ALLOC_EN | ++ GLB_CFG_POWEROFF_TIMER | ++ GLB_CFG_PROGRESS_TIMER); ++ ++ gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); ++ ++ /* Kick the watchdog. */ ++ mod_delayed_work(ptdev->reset.wq, &ptdev->fw->watchdog.ping_work, ++ msecs_to_jiffies(PING_INTERVAL_MS)); ++} ++ ++static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status) ++{ ++ if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF)) ++ ptdev->fw->booted = true; ++ ++ wake_up_all(&ptdev->fw->req_waitqueue); ++ ++ /* If the FW is not booted, don't process IRQs, just flag the FW as booted. */ ++ if (!ptdev->fw->booted) ++ return; ++ ++ panthor_sched_report_fw_events(ptdev, status); ++} ++PANTHOR_IRQ_HANDLER(job, JOB, panthor_job_irq_handler); ++ ++static int panthor_fw_start(struct panthor_device *ptdev) ++{ ++ bool timedout = false; ++ ++ ptdev->fw->booted = false; ++ panthor_job_irq_resume(&ptdev->fw->irq, ~0); ++ gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_AUTO); ++ ++ if (!wait_event_timeout(ptdev->fw->req_waitqueue, ++ ptdev->fw->booted, ++ msecs_to_jiffies(1000))) { ++ if (!ptdev->fw->booted && ++ !(gpu_read(ptdev, JOB_INT_STAT) & JOB_INT_GLOBAL_IF)) ++ timedout = true; ++ } ++ ++ if (timedout) { ++ static const char * const status_str[] = { ++ [MCU_STATUS_DISABLED] = "disabled", ++ [MCU_STATUS_ENABLED] = "enabled", ++ [MCU_STATUS_HALT] = "halt", ++ [MCU_STATUS_FATAL] = "fatal", ++ }; ++ u32 status = gpu_read(ptdev, MCU_STATUS); ++ ++ drm_err(&ptdev->base, "Failed to boot MCU (status=%s)", ++ status < ARRAY_SIZE(status_str) ? status_str[status] : "unknown"); ++ return -ETIMEDOUT; ++ } ++ ++ return 0; ++} ++ ++static void panthor_fw_stop(struct panthor_device *ptdev) ++{ ++ u32 status; ++ ++ gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE); ++ if (readl_poll_timeout(ptdev->iomem + MCU_STATUS, status, ++ status == MCU_STATUS_DISABLED, 10, 100000)) ++ drm_err(&ptdev->base, "Failed to stop MCU"); ++} ++ ++/** ++ * panthor_fw_pre_reset() - Call before a reset. ++ * @ptdev: Device. ++ * @on_hang: true if the reset was triggered on a GPU hang. ++ * ++ * If the reset is not triggered on a hang, we try to gracefully halt the ++ * MCU, so we can do a fast-reset when panthor_fw_post_reset() is called. ++ */ ++void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang) ++{ ++ /* Make sure we won't be woken up by a ping. */ ++ cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work); ++ ++ ptdev->fw->fast_reset = false; ++ ++ if (!on_hang) { ++ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); ++ u32 status; ++ ++ panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT); ++ gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); ++ if (!readl_poll_timeout(ptdev->iomem + MCU_STATUS, status, ++ status == MCU_STATUS_HALT, 10, 100000) && ++ glb_iface->output->halt_status == PANTHOR_FW_HALT_OK) { ++ ptdev->fw->fast_reset = true; ++ } else { ++ drm_warn(&ptdev->base, "Failed to cleanly suspend MCU"); ++ } ++ ++ /* The FW detects 0 -> 1 transitions. Make sure we reset ++ * the HALT bit before the FW is rebooted. ++ */ ++ panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT); ++ } ++ ++ panthor_job_irq_suspend(&ptdev->fw->irq); ++} ++ ++/** ++ * panthor_fw_post_reset() - Call after a reset. ++ * @ptdev: Device. ++ * ++ * Start the FW. If this is not a fast reset, all FW sections are reloaded to ++ * make sure we can recover from a memory corruption. ++ */ ++int panthor_fw_post_reset(struct panthor_device *ptdev) ++{ ++ int ret; ++ ++ /* Make the MCU VM active. */ ++ ret = panthor_vm_active(ptdev->fw->vm); ++ if (ret) ++ return ret; ++ ++ /* If this is a fast reset, try to start the MCU without reloading ++ * the FW sections. If it fails, go for a full reset. ++ */ ++ if (ptdev->fw->fast_reset) { ++ ret = panthor_fw_start(ptdev); ++ if (!ret) ++ goto out; ++ ++ /* Force a disable, so we get a fresh boot on the next ++ * panthor_fw_start() call. ++ */ ++ gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE); ++ drm_err(&ptdev->base, "FW fast reset failed, trying a slow reset"); ++ } ++ ++ /* Reload all sections, including RO ones. We're not supposed ++ * to end up here anyway, let's just assume the overhead of ++ * reloading everything is acceptable. ++ */ ++ panthor_reload_fw_sections(ptdev, true); ++ ++ ret = panthor_fw_start(ptdev); ++ if (ret) { ++ drm_err(&ptdev->base, "FW slow reset failed"); ++ return ret; ++ } ++ ++out: ++ /* We must re-initialize the global interface even on fast-reset. */ ++ panthor_fw_init_global_iface(ptdev); ++ return 0; ++} ++ ++/** ++ * panthor_fw_unplug() - Called when the device is unplugged. ++ * @ptdev: Device. ++ * ++ * This function must make sure all pending operations are flushed before ++ * will release device resources, thus preventing any interaction with ++ * the HW. ++ * ++ * If there is still FW-related work running after this function returns, ++ * they must use drm_dev_{enter,exit}() and skip any HW access when ++ * drm_dev_enter() returns false. ++ */ ++void panthor_fw_unplug(struct panthor_device *ptdev) ++{ ++ struct panthor_fw_section *section; ++ ++ cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work); ++ ++ /* Make sure the IRQ handler can be called after that point. */ ++ if (ptdev->fw->irq.irq) ++ panthor_job_irq_suspend(&ptdev->fw->irq); ++ ++ panthor_fw_stop(ptdev); ++ ++ list_for_each_entry(section, &ptdev->fw->sections, node) ++ panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), section->mem); ++ ++ /* We intentionally don't call panthor_vm_idle() and let ++ * panthor_mmu_unplug() release the AS we acquired with ++ * panthor_vm_active() so we don't have to track the VM active/idle ++ * state to keep the active_refcnt balanced. ++ */ ++ panthor_vm_put(ptdev->fw->vm); ++ ++ panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000); ++} ++ ++/** ++ * panthor_fw_wait_acks() - Wait for requests to be acknowledged by the FW. ++ * @req_ptr: Pointer to the req register. ++ * @ack_ptr: Pointer to the ack register. ++ * @wq: Wait queue to use for the sleeping wait. ++ * @req_mask: Mask of requests to wait for. ++ * @acked: Pointer to field that's updated with the acked requests. ++ * If the function returns 0, *acked == req_mask. ++ * @timeout_ms: Timeout expressed in milliseconds. ++ * ++ * Return: 0 on success, -ETIMEDOUT otherwise. ++ */ ++static int panthor_fw_wait_acks(const u32 *req_ptr, const u32 *ack_ptr, ++ wait_queue_head_t *wq, ++ u32 req_mask, u32 *acked, ++ u32 timeout_ms) ++{ ++ u32 ack, req = READ_ONCE(*req_ptr) & req_mask; ++ int ret; ++ ++ /* Busy wait for a few µsecs before falling back to a sleeping wait. */ ++ *acked = req_mask; ++ ret = read_poll_timeout_atomic(READ_ONCE, ack, ++ (ack & req_mask) == req, ++ 0, 10, 0, ++ *ack_ptr); ++ if (!ret) ++ return 0; ++ ++ if (wait_event_timeout(*wq, (READ_ONCE(*ack_ptr) & req_mask) == req, ++ msecs_to_jiffies(timeout_ms))) ++ return 0; ++ ++ /* Check one last time, in case we were not woken up for some reason. */ ++ ack = READ_ONCE(*ack_ptr); ++ if ((ack & req_mask) == req) ++ return 0; ++ ++ *acked = ~(req ^ ack) & req_mask; ++ return -ETIMEDOUT; ++} ++ ++/** ++ * panthor_fw_glb_wait_acks() - Wait for global requests to be acknowledged. ++ * @ptdev: Device. ++ * @req_mask: Mask of requests to wait for. ++ * @acked: Pointer to field that's updated with the acked requests. ++ * If the function returns 0, *acked == req_mask. ++ * @timeout_ms: Timeout expressed in milliseconds. ++ * ++ * Return: 0 on success, -ETIMEDOUT otherwise. ++ */ ++int panthor_fw_glb_wait_acks(struct panthor_device *ptdev, ++ u32 req_mask, u32 *acked, ++ u32 timeout_ms) ++{ ++ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); ++ ++ /* GLB_HALT doesn't get acked through the FW interface. */ ++ if (drm_WARN_ON(&ptdev->base, req_mask & (~GLB_REQ_MASK | GLB_HALT))) ++ return -EINVAL; ++ ++ return panthor_fw_wait_acks(&glb_iface->input->req, ++ &glb_iface->output->ack, ++ &ptdev->fw->req_waitqueue, ++ req_mask, acked, timeout_ms); ++} ++ ++/** ++ * panthor_fw_csg_wait_acks() - Wait for command stream group requests to be acknowledged. ++ * @ptdev: Device. ++ * @csg_slot: CSG slot ID. ++ * @req_mask: Mask of requests to wait for. ++ * @acked: Pointer to field that's updated with the acked requests. ++ * If the function returns 0, *acked == req_mask. ++ * @timeout_ms: Timeout expressed in milliseconds. ++ * ++ * Return: 0 on success, -ETIMEDOUT otherwise. ++ */ ++int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_slot, ++ u32 req_mask, u32 *acked, u32 timeout_ms) ++{ ++ struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot); ++ int ret; ++ ++ if (drm_WARN_ON(&ptdev->base, req_mask & ~CSG_REQ_MASK)) ++ return -EINVAL; ++ ++ ret = panthor_fw_wait_acks(&csg_iface->input->req, ++ &csg_iface->output->ack, ++ &ptdev->fw->req_waitqueue, ++ req_mask, acked, timeout_ms); ++ ++ /* ++ * Check that all bits in the state field were updated, if any mismatch ++ * then clear all bits in the state field. This allows code to do ++ * (acked & CSG_STATE_MASK) and get the right value. ++ */ ++ ++ if ((*acked & CSG_STATE_MASK) != CSG_STATE_MASK) ++ *acked &= ~CSG_STATE_MASK; ++ ++ return ret; ++} ++ ++/** ++ * panthor_fw_ring_csg_doorbells() - Ring command stream group doorbells. ++ * @ptdev: Device. ++ * @csg_mask: Bitmask encoding the command stream group doorbells to ring. ++ * ++ * This function is toggling bits in the doorbell_req and ringing the ++ * global doorbell. It doesn't require a user doorbell to be attached to ++ * the group. ++ */ ++void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_mask) ++{ ++ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); ++ ++ panthor_fw_toggle_reqs(glb_iface, doorbell_req, doorbell_ack, csg_mask); ++ gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); ++} ++ ++static void panthor_fw_ping_work(struct work_struct *work) ++{ ++ struct panthor_fw *fw = container_of(work, struct panthor_fw, watchdog.ping_work.work); ++ struct panthor_device *ptdev = fw->irq.ptdev; ++ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); ++ u32 acked; ++ int ret; ++ ++ if (panthor_device_reset_is_pending(ptdev)) ++ return; ++ ++ panthor_fw_toggle_reqs(glb_iface, req, ack, GLB_PING); ++ gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); ++ ++ ret = panthor_fw_glb_wait_acks(ptdev, GLB_PING, &acked, 100); ++ if (ret) { ++ panthor_device_schedule_reset(ptdev); ++ drm_err(&ptdev->base, "FW ping timeout, scheduling a reset"); ++ } else { ++ mod_delayed_work(ptdev->reset.wq, &fw->watchdog.ping_work, ++ msecs_to_jiffies(PING_INTERVAL_MS)); ++ } ++} ++ ++/** ++ * panthor_fw_init() - Initialize FW related data. ++ * @ptdev: Device. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_fw_init(struct panthor_device *ptdev) ++{ ++ struct panthor_fw *fw; ++ int ret, irq; ++ ++ fw = drmm_kzalloc(&ptdev->base, sizeof(*fw), GFP_KERNEL); ++ if (!fw) ++ return -ENOMEM; ++ ++ ptdev->fw = fw; ++ init_waitqueue_head(&fw->req_waitqueue); ++ INIT_LIST_HEAD(&fw->sections); ++ INIT_DELAYED_WORK(&fw->watchdog.ping_work, panthor_fw_ping_work); ++ ++ irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "job"); ++ if (irq <= 0) ++ return -ENODEV; ++ ++ ret = panthor_request_job_irq(ptdev, &fw->irq, irq, 0); ++ if (ret) { ++ drm_err(&ptdev->base, "failed to request job irq"); ++ return ret; ++ } ++ ++ ret = panthor_gpu_l2_power_on(ptdev); ++ if (ret) ++ return ret; ++ ++ fw->vm = panthor_vm_create(ptdev, true, ++ 0, SZ_4G, ++ CSF_MCU_SHARED_REGION_START, ++ CSF_MCU_SHARED_REGION_SIZE); ++ if (IS_ERR(fw->vm)) { ++ ret = PTR_ERR(fw->vm); ++ fw->vm = NULL; ++ goto err_unplug_fw; ++ } ++ ++ ret = panthor_fw_load(ptdev); ++ if (ret) ++ goto err_unplug_fw; ++ ++ ret = panthor_vm_active(fw->vm); ++ if (ret) ++ goto err_unplug_fw; ++ ++ ret = panthor_fw_start(ptdev); ++ if (ret) ++ goto err_unplug_fw; ++ ++ ret = panthor_fw_init_ifaces(ptdev); ++ if (ret) ++ goto err_unplug_fw; ++ ++ panthor_fw_init_global_iface(ptdev); ++ return 0; ++ ++err_unplug_fw: ++ panthor_fw_unplug(ptdev); ++ return ret; ++} ++ ++MODULE_FIRMWARE("arm/mali/arch10.8/mali_csffw.bin"); +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_fw.h +@@ -0,0 +1,503 @@ ++/* SPDX-License-Identifier: GPL-2.0 or MIT */ ++/* Copyright 2023 Collabora ltd. */ ++ ++#ifndef __PANTHOR_MCU_H__ ++#define __PANTHOR_MCU_H__ ++ ++#include ++ ++struct panthor_device; ++struct panthor_kernel_bo; ++ ++#define MAX_CSGS 31 ++#define MAX_CS_PER_CSG 32 ++ ++struct panthor_fw_ringbuf_input_iface { ++ u64 insert; ++ u64 extract; ++}; ++ ++struct panthor_fw_ringbuf_output_iface { ++ u64 extract; ++ u32 active; ++}; ++ ++struct panthor_fw_cs_control_iface { ++#define CS_FEATURES_WORK_REGS(x) (((x) & GENMASK(7, 0)) + 1) ++#define CS_FEATURES_SCOREBOARDS(x) (((x) & GENMASK(15, 8)) >> 8) ++#define CS_FEATURES_COMPUTE BIT(16) ++#define CS_FEATURES_FRAGMENT BIT(17) ++#define CS_FEATURES_TILER BIT(18) ++ u32 features; ++ u32 input_va; ++ u32 output_va; ++}; ++ ++struct panthor_fw_cs_input_iface { ++#define CS_STATE_MASK GENMASK(2, 0) ++#define CS_STATE_STOP 0 ++#define CS_STATE_START 1 ++#define CS_EXTRACT_EVENT BIT(4) ++#define CS_IDLE_SYNC_WAIT BIT(8) ++#define CS_IDLE_PROTM_PENDING BIT(9) ++#define CS_IDLE_EMPTY BIT(10) ++#define CS_IDLE_RESOURCE_REQ BIT(11) ++#define CS_TILER_OOM BIT(26) ++#define CS_PROTM_PENDING BIT(27) ++#define CS_FATAL BIT(30) ++#define CS_FAULT BIT(31) ++#define CS_REQ_MASK (CS_STATE_MASK | \ ++ CS_EXTRACT_EVENT | \ ++ CS_IDLE_SYNC_WAIT | \ ++ CS_IDLE_PROTM_PENDING | \ ++ CS_IDLE_EMPTY | \ ++ CS_IDLE_RESOURCE_REQ) ++#define CS_EVT_MASK (CS_TILER_OOM | \ ++ CS_PROTM_PENDING | \ ++ CS_FATAL | \ ++ CS_FAULT) ++ u32 req; ++ ++#define CS_CONFIG_PRIORITY(x) ((x) & GENMASK(3, 0)) ++#define CS_CONFIG_DOORBELL(x) (((x) << 8) & GENMASK(15, 8)) ++ u32 config; ++ u32 reserved1; ++ u32 ack_irq_mask; ++ u64 ringbuf_base; ++ u32 ringbuf_size; ++ u32 reserved2; ++ u64 heap_start; ++ u64 heap_end; ++ u64 ringbuf_input; ++ u64 ringbuf_output; ++ u32 instr_config; ++ u32 instrbuf_size; ++ u64 instrbuf_base; ++ u64 instrbuf_offset_ptr; ++}; ++ ++struct panthor_fw_cs_output_iface { ++ u32 ack; ++ u32 reserved1[15]; ++ u64 status_cmd_ptr; ++ ++#define CS_STATUS_WAIT_SB_MASK GENMASK(15, 0) ++#define CS_STATUS_WAIT_SB_SRC_MASK GENMASK(19, 16) ++#define CS_STATUS_WAIT_SB_SRC_NONE (0 << 16) ++#define CS_STATUS_WAIT_SB_SRC_WAIT (8 << 16) ++#define CS_STATUS_WAIT_SYNC_COND_LE (0 << 24) ++#define CS_STATUS_WAIT_SYNC_COND_GT (1 << 24) ++#define CS_STATUS_WAIT_SYNC_COND_MASK GENMASK(27, 24) ++#define CS_STATUS_WAIT_PROGRESS BIT(28) ++#define CS_STATUS_WAIT_PROTM BIT(29) ++#define CS_STATUS_WAIT_SYNC_64B BIT(30) ++#define CS_STATUS_WAIT_SYNC BIT(31) ++ u32 status_wait; ++ u32 status_req_resource; ++ u64 status_wait_sync_ptr; ++ u32 status_wait_sync_value; ++ u32 status_scoreboards; ++ ++#define CS_STATUS_BLOCKED_REASON_UNBLOCKED 0 ++#define CS_STATUS_BLOCKED_REASON_SB_WAIT 1 ++#define CS_STATUS_BLOCKED_REASON_PROGRESS_WAIT 2 ++#define CS_STATUS_BLOCKED_REASON_SYNC_WAIT 3 ++#define CS_STATUS_BLOCKED_REASON_DEFERRED 5 ++#define CS_STATUS_BLOCKED_REASON_RES 6 ++#define CS_STATUS_BLOCKED_REASON_FLUSH 7 ++#define CS_STATUS_BLOCKED_REASON_MASK GENMASK(3, 0) ++ u32 status_blocked_reason; ++ u32 status_wait_sync_value_hi; ++ u32 reserved2[6]; ++ ++#define CS_EXCEPTION_TYPE(x) ((x) & GENMASK(7, 0)) ++#define CS_EXCEPTION_DATA(x) (((x) >> 8) & GENMASK(23, 0)) ++ u32 fault; ++ u32 fatal; ++ u64 fault_info; ++ u64 fatal_info; ++ u32 reserved3[10]; ++ u32 heap_vt_start; ++ u32 heap_vt_end; ++ u32 reserved4; ++ u32 heap_frag_end; ++ u64 heap_address; ++}; ++ ++struct panthor_fw_csg_control_iface { ++ u32 features; ++ u32 input_va; ++ u32 output_va; ++ u32 suspend_size; ++ u32 protm_suspend_size; ++ u32 stream_num; ++ u32 stream_stride; ++}; ++ ++struct panthor_fw_csg_input_iface { ++#define CSG_STATE_MASK GENMASK(2, 0) ++#define CSG_STATE_TERMINATE 0 ++#define CSG_STATE_START 1 ++#define CSG_STATE_SUSPEND 2 ++#define CSG_STATE_RESUME 3 ++#define CSG_ENDPOINT_CONFIG BIT(4) ++#define CSG_STATUS_UPDATE BIT(5) ++#define CSG_SYNC_UPDATE BIT(28) ++#define CSG_IDLE BIT(29) ++#define CSG_DOORBELL BIT(30) ++#define CSG_PROGRESS_TIMER_EVENT BIT(31) ++#define CSG_REQ_MASK (CSG_STATE_MASK | \ ++ CSG_ENDPOINT_CONFIG | \ ++ CSG_STATUS_UPDATE) ++#define CSG_EVT_MASK (CSG_SYNC_UPDATE | \ ++ CSG_IDLE | \ ++ CSG_PROGRESS_TIMER_EVENT) ++ u32 req; ++ u32 ack_irq_mask; ++ ++ u32 doorbell_req; ++ u32 cs_irq_ack; ++ u32 reserved1[4]; ++ u64 allow_compute; ++ u64 allow_fragment; ++ u32 allow_other; ++ ++#define CSG_EP_REQ_COMPUTE(x) ((x) & GENMASK(7, 0)) ++#define CSG_EP_REQ_FRAGMENT(x) (((x) << 8) & GENMASK(15, 8)) ++#define CSG_EP_REQ_TILER(x) (((x) << 16) & GENMASK(19, 16)) ++#define CSG_EP_REQ_EXCL_COMPUTE BIT(20) ++#define CSG_EP_REQ_EXCL_FRAGMENT BIT(21) ++#define CSG_EP_REQ_PRIORITY(x) (((x) << 28) & GENMASK(31, 28)) ++#define CSG_EP_REQ_PRIORITY_MASK GENMASK(31, 28) ++ u32 endpoint_req; ++ u32 reserved2[2]; ++ u64 suspend_buf; ++ u64 protm_suspend_buf; ++ u32 config; ++ u32 iter_trace_config; ++}; ++ ++struct panthor_fw_csg_output_iface { ++ u32 ack; ++ u32 reserved1; ++ u32 doorbell_ack; ++ u32 cs_irq_req; ++ u32 status_endpoint_current; ++ u32 status_endpoint_req; ++ ++#define CSG_STATUS_STATE_IS_IDLE BIT(0) ++ u32 status_state; ++ u32 resource_dep; ++}; ++ ++struct panthor_fw_global_control_iface { ++ u32 version; ++ u32 features; ++ u32 input_va; ++ u32 output_va; ++ u32 group_num; ++ u32 group_stride; ++ u32 perfcnt_size; ++ u32 instr_features; ++}; ++ ++struct panthor_fw_global_input_iface { ++#define GLB_HALT BIT(0) ++#define GLB_CFG_PROGRESS_TIMER BIT(1) ++#define GLB_CFG_ALLOC_EN BIT(2) ++#define GLB_CFG_POWEROFF_TIMER BIT(3) ++#define GLB_PROTM_ENTER BIT(4) ++#define GLB_PERFCNT_EN BIT(5) ++#define GLB_PERFCNT_SAMPLE BIT(6) ++#define GLB_COUNTER_EN BIT(7) ++#define GLB_PING BIT(8) ++#define GLB_FWCFG_UPDATE BIT(9) ++#define GLB_IDLE_EN BIT(10) ++#define GLB_SLEEP BIT(12) ++#define GLB_INACTIVE_COMPUTE BIT(20) ++#define GLB_INACTIVE_FRAGMENT BIT(21) ++#define GLB_INACTIVE_TILER BIT(22) ++#define GLB_PROTM_EXIT BIT(23) ++#define GLB_PERFCNT_THRESHOLD BIT(24) ++#define GLB_PERFCNT_OVERFLOW BIT(25) ++#define GLB_IDLE BIT(26) ++#define GLB_DBG_CSF BIT(30) ++#define GLB_DBG_HOST BIT(31) ++#define GLB_REQ_MASK GENMASK(10, 0) ++#define GLB_EVT_MASK GENMASK(26, 20) ++ u32 req; ++ u32 ack_irq_mask; ++ u32 doorbell_req; ++ u32 reserved1; ++ u32 progress_timer; ++ ++#define GLB_TIMER_VAL(x) ((x) & GENMASK(30, 0)) ++#define GLB_TIMER_SOURCE_GPU_COUNTER BIT(31) ++ u32 poweroff_timer; ++ u64 core_en_mask; ++ u32 reserved2; ++ u32 perfcnt_as; ++ u64 perfcnt_base; ++ u32 perfcnt_extract; ++ u32 reserved3[3]; ++ u32 perfcnt_config; ++ u32 perfcnt_csg_select; ++ u32 perfcnt_fw_enable; ++ u32 perfcnt_csg_enable; ++ u32 perfcnt_csf_enable; ++ u32 perfcnt_shader_enable; ++ u32 perfcnt_tiler_enable; ++ u32 perfcnt_mmu_l2_enable; ++ u32 reserved4[8]; ++ u32 idle_timer; ++}; ++ ++enum panthor_fw_halt_status { ++ PANTHOR_FW_HALT_OK = 0, ++ PANTHOR_FW_HALT_ON_PANIC = 0x4e, ++ PANTHOR_FW_HALT_ON_WATCHDOG_EXPIRATION = 0x4f, ++}; ++ ++struct panthor_fw_global_output_iface { ++ u32 ack; ++ u32 reserved1; ++ u32 doorbell_ack; ++ u32 reserved2; ++ u32 halt_status; ++ u32 perfcnt_status; ++ u32 perfcnt_insert; ++}; ++ ++/** ++ * struct panthor_fw_cs_iface - Firmware command stream slot interface ++ */ ++struct panthor_fw_cs_iface { ++ /** ++ * @lock: Lock protecting access to the panthor_fw_cs_input_iface::req ++ * field. ++ * ++ * Needed so we can update the req field concurrently from the interrupt ++ * handler and the scheduler logic. ++ * ++ * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW ++ * interface sections are mapped uncached/write-combined right now, and ++ * using cmpxchg() on such mappings leads to SError faults. Revisit when ++ * we have 'SHARED' GPU mappings hooked up. ++ */ ++ spinlock_t lock; ++ ++ /** ++ * @control: Command stream slot control interface. ++ * ++ * Used to expose command stream slot properties. ++ * ++ * This interface is read-only. ++ */ ++ struct panthor_fw_cs_control_iface *control; ++ ++ /** ++ * @input: Command stream slot input interface. ++ * ++ * Used for host updates/events. ++ */ ++ struct panthor_fw_cs_input_iface *input; ++ ++ /** ++ * @output: Command stream slot output interface. ++ * ++ * Used for FW updates/events. ++ * ++ * This interface is read-only. ++ */ ++ const struct panthor_fw_cs_output_iface *output; ++}; ++ ++/** ++ * struct panthor_fw_csg_iface - Firmware command stream group slot interface ++ */ ++struct panthor_fw_csg_iface { ++ /** ++ * @lock: Lock protecting access to the panthor_fw_csg_input_iface::req ++ * field. ++ * ++ * Needed so we can update the req field concurrently from the interrupt ++ * handler and the scheduler logic. ++ * ++ * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW ++ * interface sections are mapped uncached/write-combined right now, and ++ * using cmpxchg() on such mappings leads to SError faults. Revisit when ++ * we have 'SHARED' GPU mappings hooked up. ++ */ ++ spinlock_t lock; ++ ++ /** ++ * @control: Command stream group slot control interface. ++ * ++ * Used to expose command stream group slot properties. ++ * ++ * This interface is read-only. ++ */ ++ const struct panthor_fw_csg_control_iface *control; ++ ++ /** ++ * @input: Command stream slot input interface. ++ * ++ * Used for host updates/events. ++ */ ++ struct panthor_fw_csg_input_iface *input; ++ ++ /** ++ * @output: Command stream group slot output interface. ++ * ++ * Used for FW updates/events. ++ * ++ * This interface is read-only. ++ */ ++ const struct panthor_fw_csg_output_iface *output; ++}; ++ ++/** ++ * struct panthor_fw_global_iface - Firmware global interface ++ */ ++struct panthor_fw_global_iface { ++ /** ++ * @lock: Lock protecting access to the panthor_fw_global_input_iface::req ++ * field. ++ * ++ * Needed so we can update the req field concurrently from the interrupt ++ * handler and the scheduler/FW management logic. ++ * ++ * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW ++ * interface sections are mapped uncached/write-combined right now, and ++ * using cmpxchg() on such mappings leads to SError faults. Revisit when ++ * we have 'SHARED' GPU mappings hooked up. ++ */ ++ spinlock_t lock; ++ ++ /** ++ * @control: Command stream group slot control interface. ++ * ++ * Used to expose global FW properties. ++ * ++ * This interface is read-only. ++ */ ++ const struct panthor_fw_global_control_iface *control; ++ ++ /** ++ * @input: Global input interface. ++ * ++ * Used for host updates/events. ++ */ ++ struct panthor_fw_global_input_iface *input; ++ ++ /** ++ * @output: Global output interface. ++ * ++ * Used for FW updates/events. ++ * ++ * This interface is read-only. ++ */ ++ const struct panthor_fw_global_output_iface *output; ++}; ++ ++/** ++ * panthor_fw_toggle_reqs() - Toggle acknowledge bits to send an event to the FW ++ * @__iface: The interface to operate on. ++ * @__in_reg: Name of the register to update in the input section of the interface. ++ * @__out_reg: Name of the register to take as a reference in the output section of the ++ * interface. ++ * @__mask: Mask to apply to the update. ++ * ++ * The Host -> FW event/message passing was designed to be lockless, with each side of ++ * the channel having its writeable section. Events are signaled as a difference between ++ * the host and FW side in the req/ack registers (when a bit differs, there's an event ++ * pending, when they are the same, nothing needs attention). ++ * ++ * This helper allows one to update the req register based on the current value of the ++ * ack register managed by the FW. Toggling a specific bit will flag an event. In order ++ * for events to be re-evaluated, the interface doorbell needs to be rung. ++ * ++ * Concurrent accesses to the same req register is covered. ++ * ++ * Anything requiring atomic updates to multiple registers requires a dedicated lock. ++ */ ++#define panthor_fw_toggle_reqs(__iface, __in_reg, __out_reg, __mask) \ ++ do { \ ++ u32 __cur_val, __new_val, __out_val; \ ++ spin_lock(&(__iface)->lock); \ ++ __cur_val = READ_ONCE((__iface)->input->__in_reg); \ ++ __out_val = READ_ONCE((__iface)->output->__out_reg); \ ++ __new_val = ((__out_val ^ (__mask)) & (__mask)) | (__cur_val & ~(__mask)); \ ++ WRITE_ONCE((__iface)->input->__in_reg, __new_val); \ ++ spin_unlock(&(__iface)->lock); \ ++ } while (0) ++ ++/** ++ * panthor_fw_update_reqs() - Update bits to reflect a configuration change ++ * @__iface: The interface to operate on. ++ * @__in_reg: Name of the register to update in the input section of the interface. ++ * @__val: Value to set. ++ * @__mask: Mask to apply to the update. ++ * ++ * Some configuration get passed through req registers that are also used to ++ * send events to the FW. Those req registers being updated from the interrupt ++ * handler, they require special helpers to update the configuration part as well. ++ * ++ * Concurrent accesses to the same req register is covered. ++ * ++ * Anything requiring atomic updates to multiple registers requires a dedicated lock. ++ */ ++#define panthor_fw_update_reqs(__iface, __in_reg, __val, __mask) \ ++ do { \ ++ u32 __cur_val, __new_val; \ ++ spin_lock(&(__iface)->lock); \ ++ __cur_val = READ_ONCE((__iface)->input->__in_reg); \ ++ __new_val = (__cur_val & ~(__mask)) | ((__val) & (__mask)); \ ++ WRITE_ONCE((__iface)->input->__in_reg, __new_val); \ ++ spin_unlock(&(__iface)->lock); \ ++ } while (0) ++ ++struct panthor_fw_global_iface * ++panthor_fw_get_glb_iface(struct panthor_device *ptdev); ++ ++struct panthor_fw_csg_iface * ++panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot); ++ ++struct panthor_fw_cs_iface * ++panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot); ++ ++int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_id, u32 req_mask, ++ u32 *acked, u32 timeout_ms); ++ ++int panthor_fw_glb_wait_acks(struct panthor_device *ptdev, u32 req_mask, u32 *acked, ++ u32 timeout_ms); ++ ++void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_slot); ++ ++struct panthor_kernel_bo * ++panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev, ++ struct panthor_fw_ringbuf_input_iface **input, ++ const struct panthor_fw_ringbuf_output_iface **output, ++ u32 *input_fw_va, u32 *output_fw_va); ++struct panthor_kernel_bo * ++panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size); ++ ++struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev); ++ ++void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang); ++int panthor_fw_post_reset(struct panthor_device *ptdev); ++ ++static inline void panthor_fw_suspend(struct panthor_device *ptdev) ++{ ++ panthor_fw_pre_reset(ptdev, false); ++} ++ ++static inline int panthor_fw_resume(struct panthor_device *ptdev) ++{ ++ return panthor_fw_post_reset(ptdev); ++} ++ ++int panthor_fw_init(struct panthor_device *ptdev); ++void panthor_fw_unplug(struct panthor_device *ptdev); ++ ++#endif diff --git a/patches-6.6/034-38-v6.10-drm-panthor-Add-the-heap-logical-block.patch b/patches-6.6/034-38-v6.10-drm-panthor-Add-the-heap-logical-block.patch new file mode 100644 index 0000000..a509424 --- /dev/null +++ b/patches-6.6/034-38-v6.10-drm-panthor-Add-the-heap-logical-block.patch @@ -0,0 +1,696 @@ +From 9cca48fa4f8933a2dadf2f011d461329ca0a8337 Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Thu, 29 Feb 2024 17:22:23 +0100 +Subject: [PATCH] drm/panthor: Add the heap logical block + +Tiler heap growing requires some kernel driver involvement: when the +tiler runs out of heap memory, it will raise an exception which is +either directly handled by the firmware if some free heap chunks are +available in the heap context, or passed back to the kernel otherwise. +The heap helpers will be used by the scheduler logic to allocate more +heap chunks to a heap context, when such a situation happens. + +Heap context creation is explicitly requested by userspace (using +the TILER_HEAP_CREATE ioctl), and the returned context is attached to a +queue through some command stream instruction. + +All the kernel does is keep the list of heap chunks allocated to a +context, so they can be freed when TILER_HEAP_DESTROY is called, or +extended when the FW requests a new chunk. + +v6: +- Add Maxime's and Heiko's acks + +v5: +- Fix FIXME comment +- Add Steve's R-b + +v4: +- Rework locking to allow concurrent calls to panthor_heap_grow() +- Add a helper to return a heap chunk if we couldn't pass it to the + FW because the group was scheduled out + +v3: +- Add a FIXME for the heap OOM deadlock +- Use the panthor_kernel_bo abstraction for the heap context and heap + chunks +- Drop the panthor_heap_gpu_ctx struct as it is opaque to the driver +- Ensure that the heap context is aligned to the GPU cache line size +- Minor code tidy ups + +Co-developed-by: Steven Price +Signed-off-by: Steven Price +Signed-off-by: Boris Brezillon +Reviewed-by: Steven Price +Acked-by: Maxime Ripard +Acked-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-10-boris.brezillon@collabora.com +--- + drivers/gpu/drm/panthor/panthor_heap.c | 597 +++++++++++++++++++++++++ + drivers/gpu/drm/panthor/panthor_heap.h | 39 ++ + 2 files changed, 636 insertions(+) + create mode 100644 drivers/gpu/drm/panthor/panthor_heap.c + create mode 100644 drivers/gpu/drm/panthor/panthor_heap.h + +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_heap.c +@@ -0,0 +1,597 @@ ++// SPDX-License-Identifier: GPL-2.0 or MIT ++/* Copyright 2023 Collabora ltd. */ ++ ++#include ++#include ++ ++#include ++ ++#include "panthor_device.h" ++#include "panthor_gem.h" ++#include "panthor_heap.h" ++#include "panthor_mmu.h" ++#include "panthor_regs.h" ++ ++/* ++ * The GPU heap context is an opaque structure used by the GPU to track the ++ * heap allocations. The driver should only touch it to initialize it (zero all ++ * fields). Because the CPU and GPU can both access this structure it is ++ * required to be GPU cache line aligned. ++ */ ++#define HEAP_CONTEXT_SIZE 32 ++ ++/** ++ * struct panthor_heap_chunk_header - Heap chunk header ++ */ ++struct panthor_heap_chunk_header { ++ /** ++ * @next: Next heap chunk in the list. ++ * ++ * This is a GPU VA. ++ */ ++ u64 next; ++ ++ /** @unknown: MBZ. */ ++ u32 unknown[14]; ++}; ++ ++/** ++ * struct panthor_heap_chunk - Structure used to keep track of allocated heap chunks. ++ */ ++struct panthor_heap_chunk { ++ /** @node: Used to insert the heap chunk in panthor_heap::chunks. */ ++ struct list_head node; ++ ++ /** @bo: Buffer object backing the heap chunk. */ ++ struct panthor_kernel_bo *bo; ++}; ++ ++/** ++ * struct panthor_heap - Structure used to manage tiler heap contexts. ++ */ ++struct panthor_heap { ++ /** @chunks: List containing all heap chunks allocated so far. */ ++ struct list_head chunks; ++ ++ /** @lock: Lock protecting insertion in the chunks list. */ ++ struct mutex lock; ++ ++ /** @chunk_size: Size of each chunk. */ ++ u32 chunk_size; ++ ++ /** @max_chunks: Maximum number of chunks. */ ++ u32 max_chunks; ++ ++ /** ++ * @target_in_flight: Number of in-flight render passes after which ++ * we'd let the FW wait for fragment job to finish instead of allocating new chunks. ++ */ ++ u32 target_in_flight; ++ ++ /** @chunk_count: Number of heap chunks currently allocated. */ ++ u32 chunk_count; ++}; ++ ++#define MAX_HEAPS_PER_POOL 128 ++ ++/** ++ * struct panthor_heap_pool - Pool of heap contexts ++ * ++ * The pool is attached to a panthor_file and can't be shared across processes. ++ */ ++struct panthor_heap_pool { ++ /** @refcount: Reference count. */ ++ struct kref refcount; ++ ++ /** @ptdev: Device. */ ++ struct panthor_device *ptdev; ++ ++ /** @vm: VM this pool is bound to. */ ++ struct panthor_vm *vm; ++ ++ /** @lock: Lock protecting access to @xa. */ ++ struct rw_semaphore lock; ++ ++ /** @xa: Array storing panthor_heap objects. */ ++ struct xarray xa; ++ ++ /** @gpu_contexts: Buffer object containing the GPU heap contexts. */ ++ struct panthor_kernel_bo *gpu_contexts; ++}; ++ ++static int panthor_heap_ctx_stride(struct panthor_device *ptdev) ++{ ++ u32 l2_features = ptdev->gpu_info.l2_features; ++ u32 gpu_cache_line_size = GPU_L2_FEATURES_LINE_SIZE(l2_features); ++ ++ return ALIGN(HEAP_CONTEXT_SIZE, gpu_cache_line_size); ++} ++ ++static int panthor_get_heap_ctx_offset(struct panthor_heap_pool *pool, int id) ++{ ++ return panthor_heap_ctx_stride(pool->ptdev) * id; ++} ++ ++static void *panthor_get_heap_ctx(struct panthor_heap_pool *pool, int id) ++{ ++ return pool->gpu_contexts->kmap + ++ panthor_get_heap_ctx_offset(pool, id); ++} ++ ++static void panthor_free_heap_chunk(struct panthor_vm *vm, ++ struct panthor_heap *heap, ++ struct panthor_heap_chunk *chunk) ++{ ++ mutex_lock(&heap->lock); ++ list_del(&chunk->node); ++ heap->chunk_count--; ++ mutex_unlock(&heap->lock); ++ ++ panthor_kernel_bo_destroy(vm, chunk->bo); ++ kfree(chunk); ++} ++ ++static int panthor_alloc_heap_chunk(struct panthor_device *ptdev, ++ struct panthor_vm *vm, ++ struct panthor_heap *heap, ++ bool initial_chunk) ++{ ++ struct panthor_heap_chunk *chunk; ++ struct panthor_heap_chunk_header *hdr; ++ int ret; ++ ++ chunk = kmalloc(sizeof(*chunk), GFP_KERNEL); ++ if (!chunk) ++ return -ENOMEM; ++ ++ chunk->bo = panthor_kernel_bo_create(ptdev, vm, heap->chunk_size, ++ DRM_PANTHOR_BO_NO_MMAP, ++ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC, ++ PANTHOR_VM_KERNEL_AUTO_VA); ++ if (IS_ERR(chunk->bo)) { ++ ret = PTR_ERR(chunk->bo); ++ goto err_free_chunk; ++ } ++ ++ ret = panthor_kernel_bo_vmap(chunk->bo); ++ if (ret) ++ goto err_destroy_bo; ++ ++ hdr = chunk->bo->kmap; ++ memset(hdr, 0, sizeof(*hdr)); ++ ++ if (initial_chunk && !list_empty(&heap->chunks)) { ++ struct panthor_heap_chunk *prev_chunk; ++ u64 prev_gpuva; ++ ++ prev_chunk = list_first_entry(&heap->chunks, ++ struct panthor_heap_chunk, ++ node); ++ ++ prev_gpuva = panthor_kernel_bo_gpuva(prev_chunk->bo); ++ hdr->next = (prev_gpuva & GENMASK_ULL(63, 12)) | ++ (heap->chunk_size >> 12); ++ } ++ ++ panthor_kernel_bo_vunmap(chunk->bo); ++ ++ mutex_lock(&heap->lock); ++ list_add(&chunk->node, &heap->chunks); ++ heap->chunk_count++; ++ mutex_unlock(&heap->lock); ++ ++ return 0; ++ ++err_destroy_bo: ++ panthor_kernel_bo_destroy(vm, chunk->bo); ++ ++err_free_chunk: ++ kfree(chunk); ++ ++ return ret; ++} ++ ++static void panthor_free_heap_chunks(struct panthor_vm *vm, ++ struct panthor_heap *heap) ++{ ++ struct panthor_heap_chunk *chunk, *tmp; ++ ++ list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) ++ panthor_free_heap_chunk(vm, heap, chunk); ++} ++ ++static int panthor_alloc_heap_chunks(struct panthor_device *ptdev, ++ struct panthor_vm *vm, ++ struct panthor_heap *heap, ++ u32 chunk_count) ++{ ++ int ret; ++ u32 i; ++ ++ for (i = 0; i < chunk_count; i++) { ++ ret = panthor_alloc_heap_chunk(ptdev, vm, heap, true); ++ if (ret) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static int ++panthor_heap_destroy_locked(struct panthor_heap_pool *pool, u32 handle) ++{ ++ struct panthor_heap *heap; ++ ++ heap = xa_erase(&pool->xa, handle); ++ if (!heap) ++ return -EINVAL; ++ ++ panthor_free_heap_chunks(pool->vm, heap); ++ mutex_destroy(&heap->lock); ++ kfree(heap); ++ return 0; ++} ++ ++/** ++ * panthor_heap_destroy() - Destroy a heap context ++ * @pool: Pool this context belongs to. ++ * @handle: Handle returned by panthor_heap_create(). ++ */ ++int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle) ++{ ++ int ret; ++ ++ down_write(&pool->lock); ++ ret = panthor_heap_destroy_locked(pool, handle); ++ up_write(&pool->lock); ++ ++ return ret; ++} ++ ++/** ++ * panthor_heap_create() - Create a heap context ++ * @pool: Pool to instantiate the heap context from. ++ * @initial_chunk_count: Number of chunk allocated at initialization time. ++ * Must be at least 1. ++ * @chunk_size: The size of each chunk. Must be a power of two between 256k ++ * and 2M. ++ * @max_chunks: Maximum number of chunks that can be allocated. ++ * @target_in_flight: Maximum number of in-flight render passes. ++ * @heap_ctx_gpu_va: Pointer holding the GPU address of the allocated heap ++ * context. ++ * @first_chunk_gpu_va: Pointer holding the GPU address of the first chunk ++ * assigned to the heap context. ++ * ++ * Return: a positive handle on success, a negative error otherwise. ++ */ ++int panthor_heap_create(struct panthor_heap_pool *pool, ++ u32 initial_chunk_count, ++ u32 chunk_size, ++ u32 max_chunks, ++ u32 target_in_flight, ++ u64 *heap_ctx_gpu_va, ++ u64 *first_chunk_gpu_va) ++{ ++ struct panthor_heap *heap; ++ struct panthor_heap_chunk *first_chunk; ++ struct panthor_vm *vm; ++ int ret = 0; ++ u32 id; ++ ++ if (initial_chunk_count == 0) ++ return -EINVAL; ++ ++ if (hweight32(chunk_size) != 1 || ++ chunk_size < SZ_256K || chunk_size > SZ_2M) ++ return -EINVAL; ++ ++ down_read(&pool->lock); ++ vm = panthor_vm_get(pool->vm); ++ up_read(&pool->lock); ++ ++ /* The pool has been destroyed, we can't create a new heap. */ ++ if (!vm) ++ return -EINVAL; ++ ++ heap = kzalloc(sizeof(*heap), GFP_KERNEL); ++ if (!heap) { ++ ret = -ENOMEM; ++ goto err_put_vm; ++ } ++ ++ mutex_init(&heap->lock); ++ INIT_LIST_HEAD(&heap->chunks); ++ heap->chunk_size = chunk_size; ++ heap->max_chunks = max_chunks; ++ heap->target_in_flight = target_in_flight; ++ ++ ret = panthor_alloc_heap_chunks(pool->ptdev, vm, heap, ++ initial_chunk_count); ++ if (ret) ++ goto err_free_heap; ++ ++ first_chunk = list_first_entry(&heap->chunks, ++ struct panthor_heap_chunk, ++ node); ++ *first_chunk_gpu_va = panthor_kernel_bo_gpuva(first_chunk->bo); ++ ++ down_write(&pool->lock); ++ /* The pool has been destroyed, we can't create a new heap. */ ++ if (!pool->vm) { ++ ret = -EINVAL; ++ } else { ++ ret = xa_alloc(&pool->xa, &id, heap, XA_LIMIT(1, MAX_HEAPS_PER_POOL), GFP_KERNEL); ++ if (!ret) { ++ void *gpu_ctx = panthor_get_heap_ctx(pool, id); ++ ++ memset(gpu_ctx, 0, panthor_heap_ctx_stride(pool->ptdev)); ++ *heap_ctx_gpu_va = panthor_kernel_bo_gpuva(pool->gpu_contexts) + ++ panthor_get_heap_ctx_offset(pool, id); ++ } ++ } ++ up_write(&pool->lock); ++ ++ if (ret) ++ goto err_free_heap; ++ ++ panthor_vm_put(vm); ++ return id; ++ ++err_free_heap: ++ panthor_free_heap_chunks(pool->vm, heap); ++ mutex_destroy(&heap->lock); ++ kfree(heap); ++ ++err_put_vm: ++ panthor_vm_put(vm); ++ return ret; ++} ++ ++/** ++ * panthor_heap_return_chunk() - Return an unused heap chunk ++ * @pool: The pool this heap belongs to. ++ * @heap_gpu_va: The GPU address of the heap context. ++ * @chunk_gpu_va: The chunk VA to return. ++ * ++ * This function is used when a chunk allocated with panthor_heap_grow() ++ * couldn't be linked to the heap context through the FW interface because ++ * the group requesting the allocation was scheduled out in the meantime. ++ */ ++int panthor_heap_return_chunk(struct panthor_heap_pool *pool, ++ u64 heap_gpu_va, ++ u64 chunk_gpu_va) ++{ ++ u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts); ++ u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev); ++ struct panthor_heap_chunk *chunk, *tmp, *removed = NULL; ++ struct panthor_heap *heap; ++ int ret; ++ ++ if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL) ++ return -EINVAL; ++ ++ down_read(&pool->lock); ++ heap = xa_load(&pool->xa, heap_id); ++ if (!heap) { ++ ret = -EINVAL; ++ goto out_unlock; ++ } ++ ++ chunk_gpu_va &= GENMASK_ULL(63, 12); ++ ++ mutex_lock(&heap->lock); ++ list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) { ++ if (panthor_kernel_bo_gpuva(chunk->bo) == chunk_gpu_va) { ++ removed = chunk; ++ list_del(&chunk->node); ++ heap->chunk_count--; ++ break; ++ } ++ } ++ mutex_unlock(&heap->lock); ++ ++ if (removed) { ++ panthor_kernel_bo_destroy(pool->vm, chunk->bo); ++ kfree(chunk); ++ ret = 0; ++ } else { ++ ret = -EINVAL; ++ } ++ ++out_unlock: ++ up_read(&pool->lock); ++ return ret; ++} ++ ++/** ++ * panthor_heap_grow() - Make a heap context grow. ++ * @pool: The pool this heap belongs to. ++ * @heap_gpu_va: The GPU address of the heap context. ++ * @renderpasses_in_flight: Number of render passes currently in-flight. ++ * @pending_frag_count: Number of fragment jobs waiting for execution/completion. ++ * @new_chunk_gpu_va: Pointer used to return the chunk VA. ++ */ ++int panthor_heap_grow(struct panthor_heap_pool *pool, ++ u64 heap_gpu_va, ++ u32 renderpasses_in_flight, ++ u32 pending_frag_count, ++ u64 *new_chunk_gpu_va) ++{ ++ u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts); ++ u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev); ++ struct panthor_heap_chunk *chunk; ++ struct panthor_heap *heap; ++ int ret; ++ ++ if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL) ++ return -EINVAL; ++ ++ down_read(&pool->lock); ++ heap = xa_load(&pool->xa, heap_id); ++ if (!heap) { ++ ret = -EINVAL; ++ goto out_unlock; ++ } ++ ++ /* If we reached the target in-flight render passes, or if we ++ * reached the maximum number of chunks, let the FW figure another way to ++ * find some memory (wait for render passes to finish, or call the exception ++ * handler provided by the userspace driver, if any). ++ */ ++ if (renderpasses_in_flight > heap->target_in_flight || ++ (pending_frag_count > 0 && heap->chunk_count >= heap->max_chunks)) { ++ ret = -EBUSY; ++ goto out_unlock; ++ } else if (heap->chunk_count >= heap->max_chunks) { ++ ret = -ENOMEM; ++ goto out_unlock; ++ } ++ ++ /* FIXME: panthor_alloc_heap_chunk() triggers a kernel BO creation, ++ * which goes through the blocking allocation path. Ultimately, we ++ * want a non-blocking allocation, so we can immediately report to the ++ * FW when the system is running out of memory. In that case, the FW ++ * can call a user-provided exception handler, which might try to free ++ * some tiler memory by issuing an intermediate fragment job. If the ++ * exception handler can't do anything, it will flag the queue as ++ * faulty so the job that triggered this tiler chunk allocation and all ++ * further jobs in this queue fail immediately instead of having to ++ * wait for the job timeout. ++ */ ++ ret = panthor_alloc_heap_chunk(pool->ptdev, pool->vm, heap, false); ++ if (ret) ++ goto out_unlock; ++ ++ chunk = list_first_entry(&heap->chunks, ++ struct panthor_heap_chunk, ++ node); ++ *new_chunk_gpu_va = (panthor_kernel_bo_gpuva(chunk->bo) & GENMASK_ULL(63, 12)) | ++ (heap->chunk_size >> 12); ++ ret = 0; ++ ++out_unlock: ++ up_read(&pool->lock); ++ return ret; ++} ++ ++static void panthor_heap_pool_release(struct kref *refcount) ++{ ++ struct panthor_heap_pool *pool = ++ container_of(refcount, struct panthor_heap_pool, refcount); ++ ++ xa_destroy(&pool->xa); ++ kfree(pool); ++} ++ ++/** ++ * panthor_heap_pool_put() - Release a heap pool reference ++ * @pool: Pool to release the reference on. Can be NULL. ++ */ ++void panthor_heap_pool_put(struct panthor_heap_pool *pool) ++{ ++ if (pool) ++ kref_put(&pool->refcount, panthor_heap_pool_release); ++} ++ ++/** ++ * panthor_heap_pool_get() - Get a heap pool reference ++ * @pool: Pool to get the reference on. Can be NULL. ++ * ++ * Return: @pool. ++ */ ++struct panthor_heap_pool * ++panthor_heap_pool_get(struct panthor_heap_pool *pool) ++{ ++ if (pool) ++ kref_get(&pool->refcount); ++ ++ return pool; ++} ++ ++/** ++ * panthor_heap_pool_create() - Create a heap pool ++ * @ptdev: Device. ++ * @vm: The VM this heap pool will be attached to. ++ * ++ * Heap pools might contain up to 128 heap contexts, and are per-VM. ++ * ++ * Return: A valid pointer on success, a negative error code otherwise. ++ */ ++struct panthor_heap_pool * ++panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm) ++{ ++ size_t bosize = ALIGN(MAX_HEAPS_PER_POOL * ++ panthor_heap_ctx_stride(ptdev), ++ 4096); ++ struct panthor_heap_pool *pool; ++ int ret = 0; ++ ++ pool = kzalloc(sizeof(*pool), GFP_KERNEL); ++ if (!pool) ++ return ERR_PTR(-ENOMEM); ++ ++ /* We want a weak ref here: the heap pool belongs to the VM, so we're ++ * sure that, as long as the heap pool exists, the VM exists too. ++ */ ++ pool->vm = vm; ++ pool->ptdev = ptdev; ++ init_rwsem(&pool->lock); ++ xa_init_flags(&pool->xa, XA_FLAGS_ALLOC1); ++ kref_init(&pool->refcount); ++ ++ pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, bosize, ++ DRM_PANTHOR_BO_NO_MMAP, ++ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC, ++ PANTHOR_VM_KERNEL_AUTO_VA); ++ if (IS_ERR(pool->gpu_contexts)) { ++ ret = PTR_ERR(pool->gpu_contexts); ++ goto err_destroy_pool; ++ } ++ ++ ret = panthor_kernel_bo_vmap(pool->gpu_contexts); ++ if (ret) ++ goto err_destroy_pool; ++ ++ return pool; ++ ++err_destroy_pool: ++ panthor_heap_pool_destroy(pool); ++ return ERR_PTR(ret); ++} ++ ++/** ++ * panthor_heap_pool_destroy() - Destroy a heap pool. ++ * @pool: Pool to destroy. ++ * ++ * This function destroys all heap contexts and their resources. Thus ++ * preventing any use of the heap context or the chunk attached to them ++ * after that point. ++ * ++ * If the GPU still has access to some heap contexts, a fault should be ++ * triggered, which should flag the command stream groups using these ++ * context as faulty. ++ * ++ * The heap pool object is only released when all references to this pool ++ * are released. ++ */ ++void panthor_heap_pool_destroy(struct panthor_heap_pool *pool) ++{ ++ struct panthor_heap *heap; ++ unsigned long i; ++ ++ if (!pool) ++ return; ++ ++ down_write(&pool->lock); ++ xa_for_each(&pool->xa, i, heap) ++ drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i)); ++ ++ if (!IS_ERR_OR_NULL(pool->gpu_contexts)) ++ panthor_kernel_bo_destroy(pool->vm, pool->gpu_contexts); ++ ++ /* Reflects the fact the pool has been destroyed. */ ++ pool->vm = NULL; ++ up_write(&pool->lock); ++ ++ panthor_heap_pool_put(pool); ++} +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_heap.h +@@ -0,0 +1,39 @@ ++/* SPDX-License-Identifier: GPL-2.0 or MIT */ ++/* Copyright 2023 Collabora ltd. */ ++ ++#ifndef __PANTHOR_HEAP_H__ ++#define __PANTHOR_HEAP_H__ ++ ++#include ++ ++struct panthor_device; ++struct panthor_heap_pool; ++struct panthor_vm; ++ ++int panthor_heap_create(struct panthor_heap_pool *pool, ++ u32 initial_chunk_count, ++ u32 chunk_size, ++ u32 max_chunks, ++ u32 target_in_flight, ++ u64 *heap_ctx_gpu_va, ++ u64 *first_chunk_gpu_va); ++int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle); ++ ++struct panthor_heap_pool * ++panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm); ++void panthor_heap_pool_destroy(struct panthor_heap_pool *pool); ++ ++struct panthor_heap_pool * ++panthor_heap_pool_get(struct panthor_heap_pool *pool); ++void panthor_heap_pool_put(struct panthor_heap_pool *pool); ++ ++int panthor_heap_grow(struct panthor_heap_pool *pool, ++ u64 heap_gpu_va, ++ u32 renderpasses_in_flight, ++ u32 pending_frag_count, ++ u64 *new_chunk_gpu_va); ++int panthor_heap_return_chunk(struct panthor_heap_pool *pool, ++ u64 heap_gpu_va, ++ u64 chunk_gpu_va); ++ ++#endif diff --git a/patches-6.6/034-39-v6.10-drm-panthor-Add-the-scheduler-logical-block.patch b/patches-6.6/034-39-v6.10-drm-panthor-Add-the-scheduler-logical-block.patch new file mode 100644 index 0000000..a32107c --- /dev/null +++ b/patches-6.6/034-39-v6.10-drm-panthor-Add-the-scheduler-logical-block.patch @@ -0,0 +1,3647 @@ +From de85488138247d034eb3241840424a54d660926b Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Thu, 29 Feb 2024 17:22:24 +0100 +Subject: [PATCH] drm/panthor: Add the scheduler logical block + +This is the piece of software interacting with the FW scheduler, and +taking care of some scheduling aspects when the FW comes short of slots +scheduling slots. Indeed, the FW only expose a few slots, and the kernel +has to give all submission contexts, a chance to execute their jobs. + +The kernel-side scheduler is timeslice-based, with a round-robin queue +per priority level. + +Job submission is handled with a 1:1 drm_sched_entity:drm_gpu_scheduler, +allowing us to delegate the dependency tracking to the core. + +All the gory details should be documented inline. + +v6: +- Add Maxime's and Heiko's acks +- Make sure the scheduler is initialized before queueing the tick work + in the MMU fault handler +- Keep header inclusion alphabetically ordered + +v5: +- Fix typos +- Call panthor_kernel_bo_destroy(group->syncobjs) unconditionally +- Don't move the group to the waiting list tail when it was already + waiting for a different syncobj +- Fix fatal_queues flagging in the tiler OOM path +- Don't warn when more than one job timesout on a group +- Add a warning message when we fail to allocate a heap chunk +- Add Steve's R-b + +v4: +- Check drmm_mutex_init() return code +- s/drm_gem_vmap_unlocked/drm_gem_vunmap_unlocked/ in + panthor_queue_put_syncwait_obj() +- Drop unneeded WARN_ON() in cs_slot_sync_queue_state_locked() +- Use atomic_xchg() instead of atomic_fetch_and(0) +- Fix typos +- Let panthor_kernel_bo_destroy() check for IS_ERR_OR_NULL() BOs +- Defer TILER_OOM event handling to a separate workqueue to prevent + deadlocks when the heap chunk allocation is blocked on mem-reclaim. + This is just a temporary solution, until we add support for + non-blocking/failable allocations +- Pass the scheduler workqueue to drm_sched instead of instantiating + a separate one (no longer needed now that heap chunk allocation + happens on a dedicated wq) +- Set WQ_MEM_RECLAIM on the scheduler workqueue, so we can handle + job timeouts when the system is under mem pressure, and hopefully + free up some memory retained by these jobs + +v3: +- Rework the FW event handling logic to avoid races +- Make sure MMU faults kill the group immediately +- Use the panthor_kernel_bo abstraction for group/queue buffers +- Make in_progress an atomic_t, so we can check it without the reset lock + held +- Don't limit the number of groups per context to the FW scheduler + capacity. Fix the limit to 128 for now. +- Add a panthor_job_vm() helper +- Account for panthor_vm changes +- Add our job fence as DMA_RESV_USAGE_WRITE to all external objects + (was previously DMA_RESV_USAGE_BOOKKEEP). I don't get why, given + we're supposed to be fully-explicit, but other drivers do that, so + there must be a good reason +- Account for drm_sched changes +- Provide a panthor_queue_put_syncwait_obj() +- Unconditionally return groups to their idle list in + panthor_sched_suspend() +- Condition of sched_queue_{,delayed_}work fixed to be only when a reset + isn't pending or in progress. +- Several typos in comments fixed. + +Co-developed-by: Steven Price +Signed-off-by: Steven Price +Signed-off-by: Boris Brezillon +Reviewed-by: Steven Price +Acked-by: Maxime Ripard +Acked-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-11-boris.brezillon@collabora.com +--- + drivers/gpu/drm/panthor/panthor_sched.c | 3502 +++++++++++++++++++++++ + drivers/gpu/drm/panthor/panthor_sched.h | 50 + + 2 files changed, 3552 insertions(+) + create mode 100644 drivers/gpu/drm/panthor/panthor_sched.c + create mode 100644 drivers/gpu/drm/panthor/panthor_sched.h + +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_sched.c +@@ -0,0 +1,3502 @@ ++// SPDX-License-Identifier: GPL-2.0 or MIT ++/* Copyright 2023 Collabora ltd. */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "panthor_devfreq.h" ++#include "panthor_device.h" ++#include "panthor_fw.h" ++#include "panthor_gem.h" ++#include "panthor_gpu.h" ++#include "panthor_heap.h" ++#include "panthor_mmu.h" ++#include "panthor_regs.h" ++#include "panthor_sched.h" ++ ++/** ++ * DOC: Scheduler ++ * ++ * Mali CSF hardware adopts a firmware-assisted scheduling model, where ++ * the firmware takes care of scheduling aspects, to some extent. ++ * ++ * The scheduling happens at the scheduling group level, each group ++ * contains 1 to N queues (N is FW/hardware dependent, and exposed ++ * through the firmware interface). Each queue is assigned a command ++ * stream ring buffer, which serves as a way to get jobs submitted to ++ * the GPU, among other things. ++ * ++ * The firmware can schedule a maximum of M groups (M is FW/hardware ++ * dependent, and exposed through the firmware interface). Passed ++ * this maximum number of groups, the kernel must take care of ++ * rotating the groups passed to the firmware so every group gets ++ * a chance to have his queues scheduled for execution. ++ * ++ * The current implementation only supports with kernel-mode queues. ++ * In other terms, userspace doesn't have access to the ring-buffer. ++ * Instead, userspace passes indirect command stream buffers that are ++ * called from the queue ring-buffer by the kernel using a pre-defined ++ * sequence of command stream instructions to ensure the userspace driver ++ * always gets consistent results (cache maintenance, ++ * synchronization, ...). ++ * ++ * We rely on the drm_gpu_scheduler framework to deal with job ++ * dependencies and submission. As any other driver dealing with a ++ * FW-scheduler, we use the 1:1 entity:scheduler mode, such that each ++ * entity has its own job scheduler. When a job is ready to be executed ++ * (all its dependencies are met), it is pushed to the appropriate ++ * queue ring-buffer, and the group is scheduled for execution if it ++ * wasn't already active. ++ * ++ * Kernel-side group scheduling is timeslice-based. When we have less ++ * groups than there are slots, the periodic tick is disabled and we ++ * just let the FW schedule the active groups. When there are more ++ * groups than slots, we let each group a chance to execute stuff for ++ * a given amount of time, and then re-evaluate and pick new groups ++ * to schedule. The group selection algorithm is based on ++ * priority+round-robin. ++ * ++ * Even though user-mode queues is out of the scope right now, the ++ * current design takes them into account by avoiding any guess on the ++ * group/queue state that would be based on information we wouldn't have ++ * if userspace was in charge of the ring-buffer. That's also one of the ++ * reason we don't do 'cooperative' scheduling (encoding FW group slot ++ * reservation as dma_fence that would be returned from the ++ * drm_gpu_scheduler::prepare_job() hook, and treating group rotation as ++ * a queue of waiters, ordered by job submission order). This approach ++ * would work for kernel-mode queues, but would make user-mode queues a ++ * lot more complicated to retrofit. ++ */ ++ ++#define JOB_TIMEOUT_MS 5000 ++ ++#define MIN_CS_PER_CSG 8 ++ ++#define MIN_CSGS 3 ++#define MAX_CSG_PRIO 0xf ++ ++struct panthor_group; ++ ++/** ++ * struct panthor_csg_slot - Command stream group slot ++ * ++ * This represents a FW slot for a scheduling group. ++ */ ++struct panthor_csg_slot { ++ /** @group: Scheduling group bound to this slot. */ ++ struct panthor_group *group; ++ ++ /** @priority: Group priority. */ ++ u8 priority; ++ ++ /** ++ * @idle: True if the group bound to this slot is idle. ++ * ++ * A group is idle when it has nothing waiting for execution on ++ * all its queues, or when queues are blocked waiting for something ++ * to happen (synchronization object). ++ */ ++ bool idle; ++}; ++ ++/** ++ * enum panthor_csg_priority - Group priority ++ */ ++enum panthor_csg_priority { ++ /** @PANTHOR_CSG_PRIORITY_LOW: Low priority group. */ ++ PANTHOR_CSG_PRIORITY_LOW = 0, ++ ++ /** @PANTHOR_CSG_PRIORITY_MEDIUM: Medium priority group. */ ++ PANTHOR_CSG_PRIORITY_MEDIUM, ++ ++ /** @PANTHOR_CSG_PRIORITY_HIGH: High priority group. */ ++ PANTHOR_CSG_PRIORITY_HIGH, ++ ++ /** ++ * @PANTHOR_CSG_PRIORITY_RT: Real-time priority group. ++ * ++ * Real-time priority allows one to preempt scheduling of other ++ * non-real-time groups. When such a group becomes executable, ++ * it will evict the group with the lowest non-rt priority if ++ * there's no free group slot available. ++ * ++ * Currently not exposed to userspace. ++ */ ++ PANTHOR_CSG_PRIORITY_RT, ++ ++ /** @PANTHOR_CSG_PRIORITY_COUNT: Number of priority levels. */ ++ PANTHOR_CSG_PRIORITY_COUNT, ++}; ++ ++/** ++ * struct panthor_scheduler - Object used to manage the scheduler ++ */ ++struct panthor_scheduler { ++ /** @ptdev: Device. */ ++ struct panthor_device *ptdev; ++ ++ /** ++ * @wq: Workqueue used by our internal scheduler logic and ++ * drm_gpu_scheduler. ++ * ++ * Used for the scheduler tick, group update or other kind of FW ++ * event processing that can't be handled in the threaded interrupt ++ * path. Also passed to the drm_gpu_scheduler instances embedded ++ * in panthor_queue. ++ */ ++ struct workqueue_struct *wq; ++ ++ /** ++ * @heap_alloc_wq: Workqueue used to schedule tiler_oom works. ++ * ++ * We have a queue dedicated to heap chunk allocation works to avoid ++ * blocking the rest of the scheduler if the allocation tries to ++ * reclaim memory. ++ */ ++ struct workqueue_struct *heap_alloc_wq; ++ ++ /** @tick_work: Work executed on a scheduling tick. */ ++ struct delayed_work tick_work; ++ ++ /** ++ * @sync_upd_work: Work used to process synchronization object updates. ++ * ++ * We use this work to unblock queues/groups that were waiting on a ++ * synchronization object. ++ */ ++ struct work_struct sync_upd_work; ++ ++ /** ++ * @fw_events_work: Work used to process FW events outside the interrupt path. ++ * ++ * Even if the interrupt is threaded, we need any event processing ++ * that require taking the panthor_scheduler::lock to be processed ++ * outside the interrupt path so we don't block the tick logic when ++ * it calls panthor_fw_{csg,wait}_wait_acks(). Since most of the ++ * event processing requires taking this lock, we just delegate all ++ * FW event processing to the scheduler workqueue. ++ */ ++ struct work_struct fw_events_work; ++ ++ /** ++ * @fw_events: Bitmask encoding pending FW events. ++ */ ++ atomic_t fw_events; ++ ++ /** ++ * @resched_target: When the next tick should occur. ++ * ++ * Expressed in jiffies. ++ */ ++ u64 resched_target; ++ ++ /** ++ * @last_tick: When the last tick occurred. ++ * ++ * Expressed in jiffies. ++ */ ++ u64 last_tick; ++ ++ /** @tick_period: Tick period in jiffies. */ ++ u64 tick_period; ++ ++ /** ++ * @lock: Lock protecting access to all the scheduler fields. ++ * ++ * Should be taken in the tick work, the irq handler, and anywhere the @groups ++ * fields are touched. ++ */ ++ struct mutex lock; ++ ++ /** @groups: Various lists used to classify groups. */ ++ struct { ++ /** ++ * @runnable: Runnable group lists. ++ * ++ * When a group has queues that want to execute something, ++ * its panthor_group::run_node should be inserted here. ++ * ++ * One list per-priority. ++ */ ++ struct list_head runnable[PANTHOR_CSG_PRIORITY_COUNT]; ++ ++ /** ++ * @idle: Idle group lists. ++ * ++ * When all queues of a group are idle (either because they ++ * have nothing to execute, or because they are blocked), the ++ * panthor_group::run_node field should be inserted here. ++ * ++ * One list per-priority. ++ */ ++ struct list_head idle[PANTHOR_CSG_PRIORITY_COUNT]; ++ ++ /** ++ * @waiting: List of groups whose queues are blocked on a ++ * synchronization object. ++ * ++ * Insert panthor_group::wait_node here when a group is waiting ++ * for synchronization objects to be signaled. ++ * ++ * This list is evaluated in the @sync_upd_work work. ++ */ ++ struct list_head waiting; ++ } groups; ++ ++ /** ++ * @csg_slots: FW command stream group slots. ++ */ ++ struct panthor_csg_slot csg_slots[MAX_CSGS]; ++ ++ /** @csg_slot_count: Number of command stream group slots exposed by the FW. */ ++ u32 csg_slot_count; ++ ++ /** @cs_slot_count: Number of command stream slot per group slot exposed by the FW. */ ++ u32 cs_slot_count; ++ ++ /** @as_slot_count: Number of address space slots supported by the MMU. */ ++ u32 as_slot_count; ++ ++ /** @used_csg_slot_count: Number of command stream group slot currently used. */ ++ u32 used_csg_slot_count; ++ ++ /** @sb_slot_count: Number of scoreboard slots. */ ++ u32 sb_slot_count; ++ ++ /** ++ * @might_have_idle_groups: True if an active group might have become idle. ++ * ++ * This will force a tick, so other runnable groups can be scheduled if one ++ * or more active groups became idle. ++ */ ++ bool might_have_idle_groups; ++ ++ /** @pm: Power management related fields. */ ++ struct { ++ /** @has_ref: True if the scheduler owns a runtime PM reference. */ ++ bool has_ref; ++ } pm; ++ ++ /** @reset: Reset related fields. */ ++ struct { ++ /** @lock: Lock protecting the other reset fields. */ ++ struct mutex lock; ++ ++ /** ++ * @in_progress: True if a reset is in progress. ++ * ++ * Set to true in panthor_sched_pre_reset() and back to false in ++ * panthor_sched_post_reset(). ++ */ ++ atomic_t in_progress; ++ ++ /** ++ * @stopped_groups: List containing all groups that were stopped ++ * before a reset. ++ * ++ * Insert panthor_group::run_node in the pre_reset path. ++ */ ++ struct list_head stopped_groups; ++ } reset; ++}; ++ ++/** ++ * struct panthor_syncobj_32b - 32-bit FW synchronization object ++ */ ++struct panthor_syncobj_32b { ++ /** @seqno: Sequence number. */ ++ u32 seqno; ++ ++ /** ++ * @status: Status. ++ * ++ * Not zero on failure. ++ */ ++ u32 status; ++}; ++ ++/** ++ * struct panthor_syncobj_64b - 64-bit FW synchronization object ++ */ ++struct panthor_syncobj_64b { ++ /** @seqno: Sequence number. */ ++ u64 seqno; ++ ++ /** ++ * @status: Status. ++ * ++ * Not zero on failure. ++ */ ++ u32 status; ++ ++ /** @pad: MBZ. */ ++ u32 pad; ++}; ++ ++/** ++ * struct panthor_queue - Execution queue ++ */ ++struct panthor_queue { ++ /** @scheduler: DRM scheduler used for this queue. */ ++ struct drm_gpu_scheduler scheduler; ++ ++ /** @entity: DRM scheduling entity used for this queue. */ ++ struct drm_sched_entity entity; ++ ++ /** ++ * @remaining_time: Time remaining before the job timeout expires. ++ * ++ * The job timeout is suspended when the queue is not scheduled by the ++ * FW. Every time we suspend the timer, we need to save the remaining ++ * time so we can restore it later on. ++ */ ++ unsigned long remaining_time; ++ ++ /** @timeout_suspended: True if the job timeout was suspended. */ ++ bool timeout_suspended; ++ ++ /** ++ * @doorbell_id: Doorbell assigned to this queue. ++ * ++ * Right now, all groups share the same doorbell, and the doorbell ID ++ * is assigned to group_slot + 1 when the group is assigned a slot. But ++ * we might decide to provide fine grained doorbell assignment at some ++ * point, so don't have to wake up all queues in a group every time one ++ * of them is updated. ++ */ ++ u8 doorbell_id; ++ ++ /** ++ * @priority: Priority of the queue inside the group. ++ * ++ * Must be less than 16 (Only 4 bits available). ++ */ ++ u8 priority; ++#define CSF_MAX_QUEUE_PRIO GENMASK(3, 0) ++ ++ /** @ringbuf: Command stream ring-buffer. */ ++ struct panthor_kernel_bo *ringbuf; ++ ++ /** @iface: Firmware interface. */ ++ struct { ++ /** @mem: FW memory allocated for this interface. */ ++ struct panthor_kernel_bo *mem; ++ ++ /** @input: Input interface. */ ++ struct panthor_fw_ringbuf_input_iface *input; ++ ++ /** @output: Output interface. */ ++ const struct panthor_fw_ringbuf_output_iface *output; ++ ++ /** @input_fw_va: FW virtual address of the input interface buffer. */ ++ u32 input_fw_va; ++ ++ /** @output_fw_va: FW virtual address of the output interface buffer. */ ++ u32 output_fw_va; ++ } iface; ++ ++ /** ++ * @syncwait: Stores information about the synchronization object this ++ * queue is waiting on. ++ */ ++ struct { ++ /** @gpu_va: GPU address of the synchronization object. */ ++ u64 gpu_va; ++ ++ /** @ref: Reference value to compare against. */ ++ u64 ref; ++ ++ /** @gt: True if this is a greater-than test. */ ++ bool gt; ++ ++ /** @sync64: True if this is a 64-bit sync object. */ ++ bool sync64; ++ ++ /** @bo: Buffer object holding the synchronization object. */ ++ struct drm_gem_object *obj; ++ ++ /** @offset: Offset of the synchronization object inside @bo. */ ++ u64 offset; ++ ++ /** ++ * @kmap: Kernel mapping of the buffer object holding the ++ * synchronization object. ++ */ ++ void *kmap; ++ } syncwait; ++ ++ /** @fence_ctx: Fence context fields. */ ++ struct { ++ /** @lock: Used to protect access to all fences allocated by this context. */ ++ spinlock_t lock; ++ ++ /** ++ * @id: Fence context ID. ++ * ++ * Allocated with dma_fence_context_alloc(). ++ */ ++ u64 id; ++ ++ /** @seqno: Sequence number of the last initialized fence. */ ++ atomic64_t seqno; ++ ++ /** ++ * @in_flight_jobs: List containing all in-flight jobs. ++ * ++ * Used to keep track and signal panthor_job::done_fence when the ++ * synchronization object attached to the queue is signaled. ++ */ ++ struct list_head in_flight_jobs; ++ } fence_ctx; ++}; ++ ++/** ++ * enum panthor_group_state - Scheduling group state. ++ */ ++enum panthor_group_state { ++ /** @PANTHOR_CS_GROUP_CREATED: Group was created, but not scheduled yet. */ ++ PANTHOR_CS_GROUP_CREATED, ++ ++ /** @PANTHOR_CS_GROUP_ACTIVE: Group is currently scheduled. */ ++ PANTHOR_CS_GROUP_ACTIVE, ++ ++ /** ++ * @PANTHOR_CS_GROUP_SUSPENDED: Group was scheduled at least once, but is ++ * inactive/suspended right now. ++ */ ++ PANTHOR_CS_GROUP_SUSPENDED, ++ ++ /** ++ * @PANTHOR_CS_GROUP_TERMINATED: Group was terminated. ++ * ++ * Can no longer be scheduled. The only allowed action is a destruction. ++ */ ++ PANTHOR_CS_GROUP_TERMINATED, ++}; ++ ++/** ++ * struct panthor_group - Scheduling group object ++ */ ++struct panthor_group { ++ /** @refcount: Reference count */ ++ struct kref refcount; ++ ++ /** @ptdev: Device. */ ++ struct panthor_device *ptdev; ++ ++ /** @vm: VM bound to the group. */ ++ struct panthor_vm *vm; ++ ++ /** @compute_core_mask: Mask of shader cores that can be used for compute jobs. */ ++ u64 compute_core_mask; ++ ++ /** @fragment_core_mask: Mask of shader cores that can be used for fragment jobs. */ ++ u64 fragment_core_mask; ++ ++ /** @tiler_core_mask: Mask of tiler cores that can be used for tiler jobs. */ ++ u64 tiler_core_mask; ++ ++ /** @max_compute_cores: Maximum number of shader cores used for compute jobs. */ ++ u8 max_compute_cores; ++ ++ /** @max_compute_cores: Maximum number of shader cores used for fragment jobs. */ ++ u8 max_fragment_cores; ++ ++ /** @max_tiler_cores: Maximum number of tiler cores used for tiler jobs. */ ++ u8 max_tiler_cores; ++ ++ /** @priority: Group priority (check panthor_csg_priority). */ ++ u8 priority; ++ ++ /** @blocked_queues: Bitmask reflecting the blocked queues. */ ++ u32 blocked_queues; ++ ++ /** @idle_queues: Bitmask reflecting the idle queues. */ ++ u32 idle_queues; ++ ++ /** @fatal_lock: Lock used to protect access to fatal fields. */ ++ spinlock_t fatal_lock; ++ ++ /** @fatal_queues: Bitmask reflecting the queues that hit a fatal exception. */ ++ u32 fatal_queues; ++ ++ /** @tiler_oom: Mask of queues that have a tiler OOM event to process. */ ++ atomic_t tiler_oom; ++ ++ /** @queue_count: Number of queues in this group. */ ++ u32 queue_count; ++ ++ /** @queues: Queues owned by this group. */ ++ struct panthor_queue *queues[MAX_CS_PER_CSG]; ++ ++ /** ++ * @csg_id: ID of the FW group slot. ++ * ++ * -1 when the group is not scheduled/active. ++ */ ++ int csg_id; ++ ++ /** ++ * @destroyed: True when the group has been destroyed. ++ * ++ * If a group is destroyed it becomes useless: no further jobs can be submitted ++ * to its queues. We simply wait for all references to be dropped so we can ++ * release the group object. ++ */ ++ bool destroyed; ++ ++ /** ++ * @timedout: True when a timeout occurred on any of the queues owned by ++ * this group. ++ * ++ * Timeouts can be reported by drm_sched or by the FW. In any case, any ++ * timeout situation is unrecoverable, and the group becomes useless. ++ * We simply wait for all references to be dropped so we can release the ++ * group object. ++ */ ++ bool timedout; ++ ++ /** ++ * @syncobjs: Pool of per-queue synchronization objects. ++ * ++ * One sync object per queue. The position of the sync object is ++ * determined by the queue index. ++ */ ++ struct panthor_kernel_bo *syncobjs; ++ ++ /** @state: Group state. */ ++ enum panthor_group_state state; ++ ++ /** ++ * @suspend_buf: Suspend buffer. ++ * ++ * Stores the state of the group and its queues when a group is suspended. ++ * Used at resume time to restore the group in its previous state. ++ * ++ * The size of the suspend buffer is exposed through the FW interface. ++ */ ++ struct panthor_kernel_bo *suspend_buf; ++ ++ /** ++ * @protm_suspend_buf: Protection mode suspend buffer. ++ * ++ * Stores the state of the group and its queues when a group that's in ++ * protection mode is suspended. ++ * ++ * Used at resume time to restore the group in its previous state. ++ * ++ * The size of the protection mode suspend buffer is exposed through the ++ * FW interface. ++ */ ++ struct panthor_kernel_bo *protm_suspend_buf; ++ ++ /** @sync_upd_work: Work used to check/signal job fences. */ ++ struct work_struct sync_upd_work; ++ ++ /** @tiler_oom_work: Work used to process tiler OOM events happening on this group. */ ++ struct work_struct tiler_oom_work; ++ ++ /** @term_work: Work used to finish the group termination procedure. */ ++ struct work_struct term_work; ++ ++ /** ++ * @release_work: Work used to release group resources. ++ * ++ * We need to postpone the group release to avoid a deadlock when ++ * the last ref is released in the tick work. ++ */ ++ struct work_struct release_work; ++ ++ /** ++ * @run_node: Node used to insert the group in the ++ * panthor_group::groups::{runnable,idle} and ++ * panthor_group::reset.stopped_groups lists. ++ */ ++ struct list_head run_node; ++ ++ /** ++ * @wait_node: Node used to insert the group in the ++ * panthor_group::groups::waiting list. ++ */ ++ struct list_head wait_node; ++}; ++ ++/** ++ * group_queue_work() - Queue a group work ++ * @group: Group to queue the work for. ++ * @wname: Work name. ++ * ++ * Grabs a ref and queue a work item to the scheduler workqueue. If ++ * the work was already queued, we release the reference we grabbed. ++ * ++ * Work callbacks must release the reference we grabbed here. ++ */ ++#define group_queue_work(group, wname) \ ++ do { \ ++ group_get(group); \ ++ if (!queue_work((group)->ptdev->scheduler->wq, &(group)->wname ## _work)) \ ++ group_put(group); \ ++ } while (0) ++ ++/** ++ * sched_queue_work() - Queue a scheduler work. ++ * @sched: Scheduler object. ++ * @wname: Work name. ++ * ++ * Conditionally queues a scheduler work if no reset is pending/in-progress. ++ */ ++#define sched_queue_work(sched, wname) \ ++ do { \ ++ if (!atomic_read(&(sched)->reset.in_progress) && \ ++ !panthor_device_reset_is_pending((sched)->ptdev)) \ ++ queue_work((sched)->wq, &(sched)->wname ## _work); \ ++ } while (0) ++ ++/** ++ * sched_queue_delayed_work() - Queue a scheduler delayed work. ++ * @sched: Scheduler object. ++ * @wname: Work name. ++ * @delay: Work delay in jiffies. ++ * ++ * Conditionally queues a scheduler delayed work if no reset is ++ * pending/in-progress. ++ */ ++#define sched_queue_delayed_work(sched, wname, delay) \ ++ do { \ ++ if (!atomic_read(&sched->reset.in_progress) && \ ++ !panthor_device_reset_is_pending((sched)->ptdev)) \ ++ mod_delayed_work((sched)->wq, &(sched)->wname ## _work, delay); \ ++ } while (0) ++ ++/* ++ * We currently set the maximum of groups per file to an arbitrary low value. ++ * But this can be updated if we need more. ++ */ ++#define MAX_GROUPS_PER_POOL 128 ++ ++/** ++ * struct panthor_group_pool - Group pool ++ * ++ * Each file get assigned a group pool. ++ */ ++struct panthor_group_pool { ++ /** @xa: Xarray used to manage group handles. */ ++ struct xarray xa; ++}; ++ ++/** ++ * struct panthor_job - Used to manage GPU job ++ */ ++struct panthor_job { ++ /** @base: Inherit from drm_sched_job. */ ++ struct drm_sched_job base; ++ ++ /** @refcount: Reference count. */ ++ struct kref refcount; ++ ++ /** @group: Group of the queue this job will be pushed to. */ ++ struct panthor_group *group; ++ ++ /** @queue_idx: Index of the queue inside @group. */ ++ u32 queue_idx; ++ ++ /** @call_info: Information about the userspace command stream call. */ ++ struct { ++ /** @start: GPU address of the userspace command stream. */ ++ u64 start; ++ ++ /** @size: Size of the userspace command stream. */ ++ u32 size; ++ ++ /** ++ * @latest_flush: Flush ID at the time the userspace command ++ * stream was built. ++ * ++ * Needed for the flush reduction mechanism. ++ */ ++ u32 latest_flush; ++ } call_info; ++ ++ /** @ringbuf: Position of this job is in the ring buffer. */ ++ struct { ++ /** @start: Start offset. */ ++ u64 start; ++ ++ /** @end: End offset. */ ++ u64 end; ++ } ringbuf; ++ ++ /** ++ * @node: Used to insert the job in the panthor_queue::fence_ctx::in_flight_jobs ++ * list. ++ */ ++ struct list_head node; ++ ++ /** @done_fence: Fence signaled when the job is finished or cancelled. */ ++ struct dma_fence *done_fence; ++}; ++ ++static void ++panthor_queue_put_syncwait_obj(struct panthor_queue *queue) ++{ ++ if (queue->syncwait.kmap) { ++ struct iosys_map map = IOSYS_MAP_INIT_VADDR(queue->syncwait.kmap); ++ ++ drm_gem_vunmap_unlocked(queue->syncwait.obj, &map); ++ queue->syncwait.kmap = NULL; ++ } ++ ++ drm_gem_object_put(queue->syncwait.obj); ++ queue->syncwait.obj = NULL; ++} ++ ++static void * ++panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue *queue) ++{ ++ struct panthor_device *ptdev = group->ptdev; ++ struct panthor_gem_object *bo; ++ struct iosys_map map; ++ int ret; ++ ++ if (queue->syncwait.kmap) ++ return queue->syncwait.kmap + queue->syncwait.offset; ++ ++ bo = panthor_vm_get_bo_for_va(group->vm, ++ queue->syncwait.gpu_va, ++ &queue->syncwait.offset); ++ if (drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(bo))) ++ goto err_put_syncwait_obj; ++ ++ queue->syncwait.obj = &bo->base.base; ++ ret = drm_gem_vmap_unlocked(queue->syncwait.obj, &map); ++ if (drm_WARN_ON(&ptdev->base, ret)) ++ goto err_put_syncwait_obj; ++ ++ queue->syncwait.kmap = map.vaddr; ++ if (drm_WARN_ON(&ptdev->base, !queue->syncwait.kmap)) ++ goto err_put_syncwait_obj; ++ ++ return queue->syncwait.kmap + queue->syncwait.offset; ++ ++err_put_syncwait_obj: ++ panthor_queue_put_syncwait_obj(queue); ++ return NULL; ++} ++ ++static void group_free_queue(struct panthor_group *group, struct panthor_queue *queue) ++{ ++ if (IS_ERR_OR_NULL(queue)) ++ return; ++ ++ if (queue->entity.fence_context) ++ drm_sched_entity_destroy(&queue->entity); ++ ++ if (queue->scheduler.ops) ++ drm_sched_fini(&queue->scheduler); ++ ++ panthor_queue_put_syncwait_obj(queue); ++ ++ panthor_kernel_bo_destroy(group->vm, queue->ringbuf); ++ panthor_kernel_bo_destroy(panthor_fw_vm(group->ptdev), queue->iface.mem); ++ ++ kfree(queue); ++} ++ ++static void group_release_work(struct work_struct *work) ++{ ++ struct panthor_group *group = container_of(work, ++ struct panthor_group, ++ release_work); ++ struct panthor_device *ptdev = group->ptdev; ++ u32 i; ++ ++ for (i = 0; i < group->queue_count; i++) ++ group_free_queue(group, group->queues[i]); ++ ++ panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->suspend_buf); ++ panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->protm_suspend_buf); ++ panthor_kernel_bo_destroy(group->vm, group->syncobjs); ++ ++ panthor_vm_put(group->vm); ++ kfree(group); ++} ++ ++static void group_release(struct kref *kref) ++{ ++ struct panthor_group *group = container_of(kref, ++ struct panthor_group, ++ refcount); ++ struct panthor_device *ptdev = group->ptdev; ++ ++ drm_WARN_ON(&ptdev->base, group->csg_id >= 0); ++ drm_WARN_ON(&ptdev->base, !list_empty(&group->run_node)); ++ drm_WARN_ON(&ptdev->base, !list_empty(&group->wait_node)); ++ ++ queue_work(panthor_cleanup_wq, &group->release_work); ++} ++ ++static void group_put(struct panthor_group *group) ++{ ++ if (group) ++ kref_put(&group->refcount, group_release); ++} ++ ++static struct panthor_group * ++group_get(struct panthor_group *group) ++{ ++ if (group) ++ kref_get(&group->refcount); ++ ++ return group; ++} ++ ++/** ++ * group_bind_locked() - Bind a group to a group slot ++ * @group: Group. ++ * @csg_id: Slot. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++static int ++group_bind_locked(struct panthor_group *group, u32 csg_id) ++{ ++ struct panthor_device *ptdev = group->ptdev; ++ struct panthor_csg_slot *csg_slot; ++ int ret; ++ ++ lockdep_assert_held(&ptdev->scheduler->lock); ++ ++ if (drm_WARN_ON(&ptdev->base, group->csg_id != -1 || csg_id >= MAX_CSGS || ++ ptdev->scheduler->csg_slots[csg_id].group)) ++ return -EINVAL; ++ ++ ret = panthor_vm_active(group->vm); ++ if (ret) ++ return ret; ++ ++ csg_slot = &ptdev->scheduler->csg_slots[csg_id]; ++ group_get(group); ++ group->csg_id = csg_id; ++ ++ /* Dummy doorbell allocation: doorbell is assigned to the group and ++ * all queues use the same doorbell. ++ * ++ * TODO: Implement LRU-based doorbell assignment, so the most often ++ * updated queues get their own doorbell, thus avoiding useless checks ++ * on queues belonging to the same group that are rarely updated. ++ */ ++ for (u32 i = 0; i < group->queue_count; i++) ++ group->queues[i]->doorbell_id = csg_id + 1; ++ ++ csg_slot->group = group; ++ ++ return 0; ++} ++ ++/** ++ * group_unbind_locked() - Unbind a group from a slot. ++ * @group: Group to unbind. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++static int ++group_unbind_locked(struct panthor_group *group) ++{ ++ struct panthor_device *ptdev = group->ptdev; ++ struct panthor_csg_slot *slot; ++ ++ lockdep_assert_held(&ptdev->scheduler->lock); ++ ++ if (drm_WARN_ON(&ptdev->base, group->csg_id < 0 || group->csg_id >= MAX_CSGS)) ++ return -EINVAL; ++ ++ if (drm_WARN_ON(&ptdev->base, group->state == PANTHOR_CS_GROUP_ACTIVE)) ++ return -EINVAL; ++ ++ slot = &ptdev->scheduler->csg_slots[group->csg_id]; ++ panthor_vm_idle(group->vm); ++ group->csg_id = -1; ++ ++ /* Tiler OOM events will be re-issued next time the group is scheduled. */ ++ atomic_set(&group->tiler_oom, 0); ++ cancel_work(&group->tiler_oom_work); ++ ++ for (u32 i = 0; i < group->queue_count; i++) ++ group->queues[i]->doorbell_id = -1; ++ ++ slot->group = NULL; ++ ++ group_put(group); ++ return 0; ++} ++ ++/** ++ * cs_slot_prog_locked() - Program a queue slot ++ * @ptdev: Device. ++ * @csg_id: Group slot ID. ++ * @cs_id: Queue slot ID. ++ * ++ * Program a queue slot with the queue information so things can start being ++ * executed on this queue. ++ * ++ * The group slot must have a group bound to it already (group_bind_locked()). ++ */ ++static void ++cs_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id) ++{ ++ struct panthor_queue *queue = ptdev->scheduler->csg_slots[csg_id].group->queues[cs_id]; ++ struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id); ++ ++ lockdep_assert_held(&ptdev->scheduler->lock); ++ ++ queue->iface.input->extract = queue->iface.output->extract; ++ drm_WARN_ON(&ptdev->base, queue->iface.input->insert < queue->iface.input->extract); ++ ++ cs_iface->input->ringbuf_base = panthor_kernel_bo_gpuva(queue->ringbuf); ++ cs_iface->input->ringbuf_size = panthor_kernel_bo_size(queue->ringbuf); ++ cs_iface->input->ringbuf_input = queue->iface.input_fw_va; ++ cs_iface->input->ringbuf_output = queue->iface.output_fw_va; ++ cs_iface->input->config = CS_CONFIG_PRIORITY(queue->priority) | ++ CS_CONFIG_DOORBELL(queue->doorbell_id); ++ cs_iface->input->ack_irq_mask = ~0; ++ panthor_fw_update_reqs(cs_iface, req, ++ CS_IDLE_SYNC_WAIT | ++ CS_IDLE_EMPTY | ++ CS_STATE_START | ++ CS_EXTRACT_EVENT, ++ CS_IDLE_SYNC_WAIT | ++ CS_IDLE_EMPTY | ++ CS_STATE_MASK | ++ CS_EXTRACT_EVENT); ++ if (queue->iface.input->insert != queue->iface.input->extract && queue->timeout_suspended) { ++ drm_sched_resume_timeout(&queue->scheduler, queue->remaining_time); ++ queue->timeout_suspended = false; ++ } ++} ++ ++/** ++ * @cs_slot_reset_locked() - Reset a queue slot ++ * @ptdev: Device. ++ * @csg_id: Group slot. ++ * @cs_id: Queue slot. ++ * ++ * Change the queue slot state to STOP and suspend the queue timeout if ++ * the queue is not blocked. ++ * ++ * The group slot must have a group bound to it (group_bind_locked()). ++ */ ++static int ++cs_slot_reset_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id) ++{ ++ struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id); ++ struct panthor_group *group = ptdev->scheduler->csg_slots[csg_id].group; ++ struct panthor_queue *queue = group->queues[cs_id]; ++ ++ lockdep_assert_held(&ptdev->scheduler->lock); ++ ++ panthor_fw_update_reqs(cs_iface, req, ++ CS_STATE_STOP, ++ CS_STATE_MASK); ++ ++ /* If the queue is blocked, we want to keep the timeout running, so ++ * we can detect unbounded waits and kill the group when that happens. ++ */ ++ if (!(group->blocked_queues & BIT(cs_id)) && !queue->timeout_suspended) { ++ queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler); ++ queue->timeout_suspended = true; ++ WARN_ON(queue->remaining_time > msecs_to_jiffies(JOB_TIMEOUT_MS)); ++ } ++ ++ return 0; ++} ++ ++/** ++ * csg_slot_sync_priority_locked() - Synchronize the group slot priority ++ * @ptdev: Device. ++ * @csg_id: Group slot ID. ++ * ++ * Group slot priority update happens asynchronously. When we receive a ++ * %CSG_ENDPOINT_CONFIG, we know the update is effective, and can ++ * reflect it to our panthor_csg_slot object. ++ */ ++static void ++csg_slot_sync_priority_locked(struct panthor_device *ptdev, u32 csg_id) ++{ ++ struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id]; ++ struct panthor_fw_csg_iface *csg_iface; ++ ++ lockdep_assert_held(&ptdev->scheduler->lock); ++ ++ csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id); ++ csg_slot->priority = (csg_iface->input->endpoint_req & CSG_EP_REQ_PRIORITY_MASK) >> 28; ++} ++ ++/** ++ * cs_slot_sync_queue_state_locked() - Synchronize the queue slot priority ++ * @ptdev: Device. ++ * @csg_id: Group slot. ++ * @cs_id: Queue slot. ++ * ++ * Queue state is updated on group suspend or STATUS_UPDATE event. ++ */ ++static void ++cs_slot_sync_queue_state_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id) ++{ ++ struct panthor_group *group = ptdev->scheduler->csg_slots[csg_id].group; ++ struct panthor_queue *queue = group->queues[cs_id]; ++ struct panthor_fw_cs_iface *cs_iface = ++ panthor_fw_get_cs_iface(group->ptdev, csg_id, cs_id); ++ ++ u32 status_wait_cond; ++ ++ switch (cs_iface->output->status_blocked_reason) { ++ case CS_STATUS_BLOCKED_REASON_UNBLOCKED: ++ if (queue->iface.input->insert == queue->iface.output->extract && ++ cs_iface->output->status_scoreboards == 0) ++ group->idle_queues |= BIT(cs_id); ++ break; ++ ++ case CS_STATUS_BLOCKED_REASON_SYNC_WAIT: ++ if (list_empty(&group->wait_node)) { ++ list_move_tail(&group->wait_node, ++ &group->ptdev->scheduler->groups.waiting); ++ } ++ group->blocked_queues |= BIT(cs_id); ++ queue->syncwait.gpu_va = cs_iface->output->status_wait_sync_ptr; ++ queue->syncwait.ref = cs_iface->output->status_wait_sync_value; ++ status_wait_cond = cs_iface->output->status_wait & CS_STATUS_WAIT_SYNC_COND_MASK; ++ queue->syncwait.gt = status_wait_cond == CS_STATUS_WAIT_SYNC_COND_GT; ++ if (cs_iface->output->status_wait & CS_STATUS_WAIT_SYNC_64B) { ++ u64 sync_val_hi = cs_iface->output->status_wait_sync_value_hi; ++ ++ queue->syncwait.sync64 = true; ++ queue->syncwait.ref |= sync_val_hi << 32; ++ } else { ++ queue->syncwait.sync64 = false; ++ } ++ break; ++ ++ default: ++ /* Other reasons are not blocking. Consider the queue as runnable ++ * in those cases. ++ */ ++ break; ++ } ++} ++ ++static void ++csg_slot_sync_queues_state_locked(struct panthor_device *ptdev, u32 csg_id) ++{ ++ struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id]; ++ struct panthor_group *group = csg_slot->group; ++ u32 i; ++ ++ lockdep_assert_held(&ptdev->scheduler->lock); ++ ++ group->idle_queues = 0; ++ group->blocked_queues = 0; ++ ++ for (i = 0; i < group->queue_count; i++) { ++ if (group->queues[i]) ++ cs_slot_sync_queue_state_locked(ptdev, csg_id, i); ++ } ++} ++ ++static void ++csg_slot_sync_state_locked(struct panthor_device *ptdev, u32 csg_id) ++{ ++ struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id]; ++ struct panthor_fw_csg_iface *csg_iface; ++ struct panthor_group *group; ++ enum panthor_group_state new_state, old_state; ++ ++ lockdep_assert_held(&ptdev->scheduler->lock); ++ ++ csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id); ++ group = csg_slot->group; ++ ++ if (!group) ++ return; ++ ++ old_state = group->state; ++ switch (csg_iface->output->ack & CSG_STATE_MASK) { ++ case CSG_STATE_START: ++ case CSG_STATE_RESUME: ++ new_state = PANTHOR_CS_GROUP_ACTIVE; ++ break; ++ case CSG_STATE_TERMINATE: ++ new_state = PANTHOR_CS_GROUP_TERMINATED; ++ break; ++ case CSG_STATE_SUSPEND: ++ new_state = PANTHOR_CS_GROUP_SUSPENDED; ++ break; ++ } ++ ++ if (old_state == new_state) ++ return; ++ ++ if (new_state == PANTHOR_CS_GROUP_SUSPENDED) ++ csg_slot_sync_queues_state_locked(ptdev, csg_id); ++ ++ if (old_state == PANTHOR_CS_GROUP_ACTIVE) { ++ u32 i; ++ ++ /* Reset the queue slots so we start from a clean ++ * state when starting/resuming a new group on this ++ * CSG slot. No wait needed here, and no ringbell ++ * either, since the CS slot will only be re-used ++ * on the next CSG start operation. ++ */ ++ for (i = 0; i < group->queue_count; i++) { ++ if (group->queues[i]) ++ cs_slot_reset_locked(ptdev, csg_id, i); ++ } ++ } ++ ++ group->state = new_state; ++} ++ ++static int ++csg_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 priority) ++{ ++ struct panthor_fw_csg_iface *csg_iface; ++ struct panthor_csg_slot *csg_slot; ++ struct panthor_group *group; ++ u32 queue_mask = 0, i; ++ ++ lockdep_assert_held(&ptdev->scheduler->lock); ++ ++ if (priority > MAX_CSG_PRIO) ++ return -EINVAL; ++ ++ if (drm_WARN_ON(&ptdev->base, csg_id >= MAX_CSGS)) ++ return -EINVAL; ++ ++ csg_slot = &ptdev->scheduler->csg_slots[csg_id]; ++ group = csg_slot->group; ++ if (!group || group->state == PANTHOR_CS_GROUP_ACTIVE) ++ return 0; ++ ++ csg_iface = panthor_fw_get_csg_iface(group->ptdev, csg_id); ++ ++ for (i = 0; i < group->queue_count; i++) { ++ if (group->queues[i]) { ++ cs_slot_prog_locked(ptdev, csg_id, i); ++ queue_mask |= BIT(i); ++ } ++ } ++ ++ csg_iface->input->allow_compute = group->compute_core_mask; ++ csg_iface->input->allow_fragment = group->fragment_core_mask; ++ csg_iface->input->allow_other = group->tiler_core_mask; ++ csg_iface->input->endpoint_req = CSG_EP_REQ_COMPUTE(group->max_compute_cores) | ++ CSG_EP_REQ_FRAGMENT(group->max_fragment_cores) | ++ CSG_EP_REQ_TILER(group->max_tiler_cores) | ++ CSG_EP_REQ_PRIORITY(priority); ++ csg_iface->input->config = panthor_vm_as(group->vm); ++ ++ if (group->suspend_buf) ++ csg_iface->input->suspend_buf = panthor_kernel_bo_gpuva(group->suspend_buf); ++ else ++ csg_iface->input->suspend_buf = 0; ++ ++ if (group->protm_suspend_buf) { ++ csg_iface->input->protm_suspend_buf = ++ panthor_kernel_bo_gpuva(group->protm_suspend_buf); ++ } else { ++ csg_iface->input->protm_suspend_buf = 0; ++ } ++ ++ csg_iface->input->ack_irq_mask = ~0; ++ panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, queue_mask); ++ return 0; ++} ++ ++static void ++cs_slot_process_fatal_event_locked(struct panthor_device *ptdev, ++ u32 csg_id, u32 cs_id) ++{ ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id]; ++ struct panthor_group *group = csg_slot->group; ++ struct panthor_fw_cs_iface *cs_iface; ++ u32 fatal; ++ u64 info; ++ ++ lockdep_assert_held(&sched->lock); ++ ++ cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id); ++ fatal = cs_iface->output->fatal; ++ info = cs_iface->output->fatal_info; ++ ++ if (group) ++ group->fatal_queues |= BIT(cs_id); ++ ++ sched_queue_delayed_work(sched, tick, 0); ++ drm_warn(&ptdev->base, ++ "CSG slot %d CS slot: %d\n" ++ "CS_FATAL.EXCEPTION_TYPE: 0x%x (%s)\n" ++ "CS_FATAL.EXCEPTION_DATA: 0x%x\n" ++ "CS_FATAL_INFO.EXCEPTION_DATA: 0x%llx\n", ++ csg_id, cs_id, ++ (unsigned int)CS_EXCEPTION_TYPE(fatal), ++ panthor_exception_name(ptdev, CS_EXCEPTION_TYPE(fatal)), ++ (unsigned int)CS_EXCEPTION_DATA(fatal), ++ info); ++} ++ ++static void ++cs_slot_process_fault_event_locked(struct panthor_device *ptdev, ++ u32 csg_id, u32 cs_id) ++{ ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id]; ++ struct panthor_group *group = csg_slot->group; ++ struct panthor_queue *queue = group && cs_id < group->queue_count ? ++ group->queues[cs_id] : NULL; ++ struct panthor_fw_cs_iface *cs_iface; ++ u32 fault; ++ u64 info; ++ ++ lockdep_assert_held(&sched->lock); ++ ++ cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id); ++ fault = cs_iface->output->fault; ++ info = cs_iface->output->fault_info; ++ ++ if (queue && CS_EXCEPTION_TYPE(fault) == DRM_PANTHOR_EXCEPTION_CS_INHERIT_FAULT) { ++ u64 cs_extract = queue->iface.output->extract; ++ struct panthor_job *job; ++ ++ spin_lock(&queue->fence_ctx.lock); ++ list_for_each_entry(job, &queue->fence_ctx.in_flight_jobs, node) { ++ if (cs_extract >= job->ringbuf.end) ++ continue; ++ ++ if (cs_extract < job->ringbuf.start) ++ break; ++ ++ dma_fence_set_error(job->done_fence, -EINVAL); ++ } ++ spin_unlock(&queue->fence_ctx.lock); ++ } ++ ++ drm_warn(&ptdev->base, ++ "CSG slot %d CS slot: %d\n" ++ "CS_FAULT.EXCEPTION_TYPE: 0x%x (%s)\n" ++ "CS_FAULT.EXCEPTION_DATA: 0x%x\n" ++ "CS_FAULT_INFO.EXCEPTION_DATA: 0x%llx\n", ++ csg_id, cs_id, ++ (unsigned int)CS_EXCEPTION_TYPE(fault), ++ panthor_exception_name(ptdev, CS_EXCEPTION_TYPE(fault)), ++ (unsigned int)CS_EXCEPTION_DATA(fault), ++ info); ++} ++ ++static int group_process_tiler_oom(struct panthor_group *group, u32 cs_id) ++{ ++ struct panthor_device *ptdev = group->ptdev; ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ u32 renderpasses_in_flight, pending_frag_count; ++ struct panthor_heap_pool *heaps = NULL; ++ u64 heap_address, new_chunk_va = 0; ++ u32 vt_start, vt_end, frag_end; ++ int ret, csg_id; ++ ++ mutex_lock(&sched->lock); ++ csg_id = group->csg_id; ++ if (csg_id >= 0) { ++ struct panthor_fw_cs_iface *cs_iface; ++ ++ cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id); ++ heaps = panthor_vm_get_heap_pool(group->vm, false); ++ heap_address = cs_iface->output->heap_address; ++ vt_start = cs_iface->output->heap_vt_start; ++ vt_end = cs_iface->output->heap_vt_end; ++ frag_end = cs_iface->output->heap_frag_end; ++ renderpasses_in_flight = vt_start - frag_end; ++ pending_frag_count = vt_end - frag_end; ++ } ++ mutex_unlock(&sched->lock); ++ ++ /* The group got scheduled out, we stop here. We will get a new tiler OOM event ++ * when it's scheduled again. ++ */ ++ if (unlikely(csg_id < 0)) ++ return 0; ++ ++ if (!heaps || frag_end > vt_end || vt_end >= vt_start) { ++ ret = -EINVAL; ++ } else { ++ /* We do the allocation without holding the scheduler lock to avoid ++ * blocking the scheduling. ++ */ ++ ret = panthor_heap_grow(heaps, heap_address, ++ renderpasses_in_flight, ++ pending_frag_count, &new_chunk_va); ++ } ++ ++ if (ret && ret != -EBUSY) { ++ drm_warn(&ptdev->base, "Failed to extend the tiler heap\n"); ++ group->fatal_queues |= BIT(cs_id); ++ sched_queue_delayed_work(sched, tick, 0); ++ goto out_put_heap_pool; ++ } ++ ++ mutex_lock(&sched->lock); ++ csg_id = group->csg_id; ++ if (csg_id >= 0) { ++ struct panthor_fw_csg_iface *csg_iface; ++ struct panthor_fw_cs_iface *cs_iface; ++ ++ csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id); ++ cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id); ++ ++ cs_iface->input->heap_start = new_chunk_va; ++ cs_iface->input->heap_end = new_chunk_va; ++ panthor_fw_update_reqs(cs_iface, req, cs_iface->output->ack, CS_TILER_OOM); ++ panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, BIT(cs_id)); ++ panthor_fw_ring_csg_doorbells(ptdev, BIT(csg_id)); ++ } ++ mutex_unlock(&sched->lock); ++ ++ /* We allocated a chunck, but couldn't link it to the heap ++ * context because the group was scheduled out while we were ++ * allocating memory. We need to return this chunk to the heap. ++ */ ++ if (unlikely(csg_id < 0 && new_chunk_va)) ++ panthor_heap_return_chunk(heaps, heap_address, new_chunk_va); ++ ++ ret = 0; ++ ++out_put_heap_pool: ++ panthor_heap_pool_put(heaps); ++ return ret; ++} ++ ++static void group_tiler_oom_work(struct work_struct *work) ++{ ++ struct panthor_group *group = ++ container_of(work, struct panthor_group, tiler_oom_work); ++ u32 tiler_oom = atomic_xchg(&group->tiler_oom, 0); ++ ++ while (tiler_oom) { ++ u32 cs_id = ffs(tiler_oom) - 1; ++ ++ group_process_tiler_oom(group, cs_id); ++ tiler_oom &= ~BIT(cs_id); ++ } ++ ++ group_put(group); ++} ++ ++static void ++cs_slot_process_tiler_oom_event_locked(struct panthor_device *ptdev, ++ u32 csg_id, u32 cs_id) ++{ ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id]; ++ struct panthor_group *group = csg_slot->group; ++ ++ lockdep_assert_held(&sched->lock); ++ ++ if (drm_WARN_ON(&ptdev->base, !group)) ++ return; ++ ++ atomic_or(BIT(cs_id), &group->tiler_oom); ++ ++ /* We don't use group_queue_work() here because we want to queue the ++ * work item to the heap_alloc_wq. ++ */ ++ group_get(group); ++ if (!queue_work(sched->heap_alloc_wq, &group->tiler_oom_work)) ++ group_put(group); ++} ++ ++static bool cs_slot_process_irq_locked(struct panthor_device *ptdev, ++ u32 csg_id, u32 cs_id) ++{ ++ struct panthor_fw_cs_iface *cs_iface; ++ u32 req, ack, events; ++ ++ lockdep_assert_held(&ptdev->scheduler->lock); ++ ++ cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id); ++ req = cs_iface->input->req; ++ ack = cs_iface->output->ack; ++ events = (req ^ ack) & CS_EVT_MASK; ++ ++ if (events & CS_FATAL) ++ cs_slot_process_fatal_event_locked(ptdev, csg_id, cs_id); ++ ++ if (events & CS_FAULT) ++ cs_slot_process_fault_event_locked(ptdev, csg_id, cs_id); ++ ++ if (events & CS_TILER_OOM) ++ cs_slot_process_tiler_oom_event_locked(ptdev, csg_id, cs_id); ++ ++ /* We don't acknowledge the TILER_OOM event since its handling is ++ * deferred to a separate work. ++ */ ++ panthor_fw_update_reqs(cs_iface, req, ack, CS_FATAL | CS_FAULT); ++ ++ return (events & (CS_FAULT | CS_TILER_OOM)) != 0; ++} ++ ++static void csg_slot_sync_idle_state_locked(struct panthor_device *ptdev, u32 csg_id) ++{ ++ struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id]; ++ struct panthor_fw_csg_iface *csg_iface; ++ ++ lockdep_assert_held(&ptdev->scheduler->lock); ++ ++ csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id); ++ csg_slot->idle = csg_iface->output->status_state & CSG_STATUS_STATE_IS_IDLE; ++} ++ ++static void csg_slot_process_idle_event_locked(struct panthor_device *ptdev, u32 csg_id) ++{ ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ ++ lockdep_assert_held(&sched->lock); ++ ++ sched->might_have_idle_groups = true; ++ ++ /* Schedule a tick so we can evict idle groups and schedule non-idle ++ * ones. This will also update runtime PM and devfreq busy/idle states, ++ * so the device can lower its frequency or get suspended. ++ */ ++ sched_queue_delayed_work(sched, tick, 0); ++} ++ ++static void csg_slot_sync_update_locked(struct panthor_device *ptdev, ++ u32 csg_id) ++{ ++ struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id]; ++ struct panthor_group *group = csg_slot->group; ++ ++ lockdep_assert_held(&ptdev->scheduler->lock); ++ ++ if (group) ++ group_queue_work(group, sync_upd); ++ ++ sched_queue_work(ptdev->scheduler, sync_upd); ++} ++ ++static void ++csg_slot_process_progress_timer_event_locked(struct panthor_device *ptdev, u32 csg_id) ++{ ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id]; ++ struct panthor_group *group = csg_slot->group; ++ ++ lockdep_assert_held(&sched->lock); ++ ++ drm_warn(&ptdev->base, "CSG slot %d progress timeout\n", csg_id); ++ ++ group = csg_slot->group; ++ if (!drm_WARN_ON(&ptdev->base, !group)) ++ group->timedout = true; ++ ++ sched_queue_delayed_work(sched, tick, 0); ++} ++ ++static void sched_process_csg_irq_locked(struct panthor_device *ptdev, u32 csg_id) ++{ ++ u32 req, ack, cs_irq_req, cs_irq_ack, cs_irqs, csg_events; ++ struct panthor_fw_csg_iface *csg_iface; ++ u32 ring_cs_db_mask = 0; ++ ++ lockdep_assert_held(&ptdev->scheduler->lock); ++ ++ if (drm_WARN_ON(&ptdev->base, csg_id >= ptdev->scheduler->csg_slot_count)) ++ return; ++ ++ csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id); ++ req = READ_ONCE(csg_iface->input->req); ++ ack = READ_ONCE(csg_iface->output->ack); ++ cs_irq_req = READ_ONCE(csg_iface->output->cs_irq_req); ++ cs_irq_ack = READ_ONCE(csg_iface->input->cs_irq_ack); ++ csg_events = (req ^ ack) & CSG_EVT_MASK; ++ ++ /* There may not be any pending CSG/CS interrupts to process */ ++ if (req == ack && cs_irq_req == cs_irq_ack) ++ return; ++ ++ /* Immediately set IRQ_ACK bits to be same as the IRQ_REQ bits before ++ * examining the CS_ACK & CS_REQ bits. This would ensure that Host ++ * doesn't miss an interrupt for the CS in the race scenario where ++ * whilst Host is servicing an interrupt for the CS, firmware sends ++ * another interrupt for that CS. ++ */ ++ csg_iface->input->cs_irq_ack = cs_irq_req; ++ ++ panthor_fw_update_reqs(csg_iface, req, ack, ++ CSG_SYNC_UPDATE | ++ CSG_IDLE | ++ CSG_PROGRESS_TIMER_EVENT); ++ ++ if (csg_events & CSG_IDLE) ++ csg_slot_process_idle_event_locked(ptdev, csg_id); ++ ++ if (csg_events & CSG_PROGRESS_TIMER_EVENT) ++ csg_slot_process_progress_timer_event_locked(ptdev, csg_id); ++ ++ cs_irqs = cs_irq_req ^ cs_irq_ack; ++ while (cs_irqs) { ++ u32 cs_id = ffs(cs_irqs) - 1; ++ ++ if (cs_slot_process_irq_locked(ptdev, csg_id, cs_id)) ++ ring_cs_db_mask |= BIT(cs_id); ++ ++ cs_irqs &= ~BIT(cs_id); ++ } ++ ++ if (csg_events & CSG_SYNC_UPDATE) ++ csg_slot_sync_update_locked(ptdev, csg_id); ++ ++ if (ring_cs_db_mask) ++ panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, ring_cs_db_mask); ++ ++ panthor_fw_ring_csg_doorbells(ptdev, BIT(csg_id)); ++} ++ ++static void sched_process_idle_event_locked(struct panthor_device *ptdev) ++{ ++ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); ++ ++ lockdep_assert_held(&ptdev->scheduler->lock); ++ ++ /* Acknowledge the idle event and schedule a tick. */ ++ panthor_fw_update_reqs(glb_iface, req, glb_iface->output->ack, GLB_IDLE); ++ sched_queue_delayed_work(ptdev->scheduler, tick, 0); ++} ++ ++/** ++ * panthor_sched_process_global_irq() - Process the scheduling part of a global IRQ ++ * @ptdev: Device. ++ */ ++static void sched_process_global_irq_locked(struct panthor_device *ptdev) ++{ ++ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); ++ u32 req, ack, evts; ++ ++ lockdep_assert_held(&ptdev->scheduler->lock); ++ ++ req = READ_ONCE(glb_iface->input->req); ++ ack = READ_ONCE(glb_iface->output->ack); ++ evts = (req ^ ack) & GLB_EVT_MASK; ++ ++ if (evts & GLB_IDLE) ++ sched_process_idle_event_locked(ptdev); ++} ++ ++static void process_fw_events_work(struct work_struct *work) ++{ ++ struct panthor_scheduler *sched = container_of(work, struct panthor_scheduler, ++ fw_events_work); ++ u32 events = atomic_xchg(&sched->fw_events, 0); ++ struct panthor_device *ptdev = sched->ptdev; ++ ++ mutex_lock(&sched->lock); ++ ++ if (events & JOB_INT_GLOBAL_IF) { ++ sched_process_global_irq_locked(ptdev); ++ events &= ~JOB_INT_GLOBAL_IF; ++ } ++ ++ while (events) { ++ u32 csg_id = ffs(events) - 1; ++ ++ sched_process_csg_irq_locked(ptdev, csg_id); ++ events &= ~BIT(csg_id); ++ } ++ ++ mutex_unlock(&sched->lock); ++} ++ ++/** ++ * panthor_sched_report_fw_events() - Report FW events to the scheduler. ++ */ ++void panthor_sched_report_fw_events(struct panthor_device *ptdev, u32 events) ++{ ++ if (!ptdev->scheduler) ++ return; ++ ++ atomic_or(events, &ptdev->scheduler->fw_events); ++ sched_queue_work(ptdev->scheduler, fw_events); ++} ++ ++static const char *fence_get_driver_name(struct dma_fence *fence) ++{ ++ return "panthor"; ++} ++ ++static const char *queue_fence_get_timeline_name(struct dma_fence *fence) ++{ ++ return "queue-fence"; ++} ++ ++static const struct dma_fence_ops panthor_queue_fence_ops = { ++ .get_driver_name = fence_get_driver_name, ++ .get_timeline_name = queue_fence_get_timeline_name, ++}; ++ ++/** ++ */ ++struct panthor_csg_slots_upd_ctx { ++ u32 update_mask; ++ u32 timedout_mask; ++ struct { ++ u32 value; ++ u32 mask; ++ } requests[MAX_CSGS]; ++}; ++ ++static void csgs_upd_ctx_init(struct panthor_csg_slots_upd_ctx *ctx) ++{ ++ memset(ctx, 0, sizeof(*ctx)); ++} ++ ++static void csgs_upd_ctx_queue_reqs(struct panthor_device *ptdev, ++ struct panthor_csg_slots_upd_ctx *ctx, ++ u32 csg_id, u32 value, u32 mask) ++{ ++ if (drm_WARN_ON(&ptdev->base, !mask) || ++ drm_WARN_ON(&ptdev->base, csg_id >= ptdev->scheduler->csg_slot_count)) ++ return; ++ ++ ctx->requests[csg_id].value = (ctx->requests[csg_id].value & ~mask) | (value & mask); ++ ctx->requests[csg_id].mask |= mask; ++ ctx->update_mask |= BIT(csg_id); ++} ++ ++static int csgs_upd_ctx_apply_locked(struct panthor_device *ptdev, ++ struct panthor_csg_slots_upd_ctx *ctx) ++{ ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ u32 update_slots = ctx->update_mask; ++ ++ lockdep_assert_held(&sched->lock); ++ ++ if (!ctx->update_mask) ++ return 0; ++ ++ while (update_slots) { ++ struct panthor_fw_csg_iface *csg_iface; ++ u32 csg_id = ffs(update_slots) - 1; ++ ++ update_slots &= ~BIT(csg_id); ++ csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id); ++ panthor_fw_update_reqs(csg_iface, req, ++ ctx->requests[csg_id].value, ++ ctx->requests[csg_id].mask); ++ } ++ ++ panthor_fw_ring_csg_doorbells(ptdev, ctx->update_mask); ++ ++ update_slots = ctx->update_mask; ++ while (update_slots) { ++ struct panthor_fw_csg_iface *csg_iface; ++ u32 csg_id = ffs(update_slots) - 1; ++ u32 req_mask = ctx->requests[csg_id].mask, acked; ++ int ret; ++ ++ update_slots &= ~BIT(csg_id); ++ csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id); ++ ++ ret = panthor_fw_csg_wait_acks(ptdev, csg_id, req_mask, &acked, 100); ++ ++ if (acked & CSG_ENDPOINT_CONFIG) ++ csg_slot_sync_priority_locked(ptdev, csg_id); ++ ++ if (acked & CSG_STATE_MASK) ++ csg_slot_sync_state_locked(ptdev, csg_id); ++ ++ if (acked & CSG_STATUS_UPDATE) { ++ csg_slot_sync_queues_state_locked(ptdev, csg_id); ++ csg_slot_sync_idle_state_locked(ptdev, csg_id); ++ } ++ ++ if (ret && acked != req_mask && ++ ((csg_iface->input->req ^ csg_iface->output->ack) & req_mask) != 0) { ++ drm_err(&ptdev->base, "CSG %d update request timedout", csg_id); ++ ctx->timedout_mask |= BIT(csg_id); ++ } ++ } ++ ++ if (ctx->timedout_mask) ++ return -ETIMEDOUT; ++ ++ return 0; ++} ++ ++struct panthor_sched_tick_ctx { ++ struct list_head old_groups[PANTHOR_CSG_PRIORITY_COUNT]; ++ struct list_head groups[PANTHOR_CSG_PRIORITY_COUNT]; ++ u32 idle_group_count; ++ u32 group_count; ++ enum panthor_csg_priority min_priority; ++ struct panthor_vm *vms[MAX_CS_PER_CSG]; ++ u32 as_count; ++ bool immediate_tick; ++ u32 csg_upd_failed_mask; ++}; ++ ++static bool ++tick_ctx_is_full(const struct panthor_scheduler *sched, ++ const struct panthor_sched_tick_ctx *ctx) ++{ ++ return ctx->group_count == sched->csg_slot_count; ++} ++ ++static bool ++group_is_idle(struct panthor_group *group) ++{ ++ struct panthor_device *ptdev = group->ptdev; ++ u32 inactive_queues; ++ ++ if (group->csg_id >= 0) ++ return ptdev->scheduler->csg_slots[group->csg_id].idle; ++ ++ inactive_queues = group->idle_queues | group->blocked_queues; ++ return hweight32(inactive_queues) == group->queue_count; ++} ++ ++static bool ++group_can_run(struct panthor_group *group) ++{ ++ return group->state != PANTHOR_CS_GROUP_TERMINATED && ++ !group->destroyed && group->fatal_queues == 0 && ++ !group->timedout; ++} ++ ++static void ++tick_ctx_pick_groups_from_list(const struct panthor_scheduler *sched, ++ struct panthor_sched_tick_ctx *ctx, ++ struct list_head *queue, ++ bool skip_idle_groups, ++ bool owned_by_tick_ctx) ++{ ++ struct panthor_group *group, *tmp; ++ ++ if (tick_ctx_is_full(sched, ctx)) ++ return; ++ ++ list_for_each_entry_safe(group, tmp, queue, run_node) { ++ u32 i; ++ ++ if (!group_can_run(group)) ++ continue; ++ ++ if (skip_idle_groups && group_is_idle(group)) ++ continue; ++ ++ for (i = 0; i < ctx->as_count; i++) { ++ if (ctx->vms[i] == group->vm) ++ break; ++ } ++ ++ if (i == ctx->as_count && ctx->as_count == sched->as_slot_count) ++ continue; ++ ++ if (!owned_by_tick_ctx) ++ group_get(group); ++ ++ list_move_tail(&group->run_node, &ctx->groups[group->priority]); ++ ctx->group_count++; ++ if (group_is_idle(group)) ++ ctx->idle_group_count++; ++ ++ if (i == ctx->as_count) ++ ctx->vms[ctx->as_count++] = group->vm; ++ ++ if (ctx->min_priority > group->priority) ++ ctx->min_priority = group->priority; ++ ++ if (tick_ctx_is_full(sched, ctx)) ++ return; ++ } ++} ++ ++static void ++tick_ctx_insert_old_group(struct panthor_scheduler *sched, ++ struct panthor_sched_tick_ctx *ctx, ++ struct panthor_group *group, ++ bool full_tick) ++{ ++ struct panthor_csg_slot *csg_slot = &sched->csg_slots[group->csg_id]; ++ struct panthor_group *other_group; ++ ++ if (!full_tick) { ++ list_add_tail(&group->run_node, &ctx->old_groups[group->priority]); ++ return; ++ } ++ ++ /* Rotate to make sure groups with lower CSG slot ++ * priorities have a chance to get a higher CSG slot ++ * priority next time they get picked. This priority ++ * has an impact on resource request ordering, so it's ++ * important to make sure we don't let one group starve ++ * all other groups with the same group priority. ++ */ ++ list_for_each_entry(other_group, ++ &ctx->old_groups[csg_slot->group->priority], ++ run_node) { ++ struct panthor_csg_slot *other_csg_slot = &sched->csg_slots[other_group->csg_id]; ++ ++ if (other_csg_slot->priority > csg_slot->priority) { ++ list_add_tail(&csg_slot->group->run_node, &other_group->run_node); ++ return; ++ } ++ } ++ ++ list_add_tail(&group->run_node, &ctx->old_groups[group->priority]); ++} ++ ++static void ++tick_ctx_init(struct panthor_scheduler *sched, ++ struct panthor_sched_tick_ctx *ctx, ++ bool full_tick) ++{ ++ struct panthor_device *ptdev = sched->ptdev; ++ struct panthor_csg_slots_upd_ctx upd_ctx; ++ int ret; ++ u32 i; ++ ++ memset(ctx, 0, sizeof(*ctx)); ++ csgs_upd_ctx_init(&upd_ctx); ++ ++ ctx->min_priority = PANTHOR_CSG_PRIORITY_COUNT; ++ for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) { ++ INIT_LIST_HEAD(&ctx->groups[i]); ++ INIT_LIST_HEAD(&ctx->old_groups[i]); ++ } ++ ++ for (i = 0; i < sched->csg_slot_count; i++) { ++ struct panthor_csg_slot *csg_slot = &sched->csg_slots[i]; ++ struct panthor_group *group = csg_slot->group; ++ struct panthor_fw_csg_iface *csg_iface; ++ ++ if (!group) ++ continue; ++ ++ csg_iface = panthor_fw_get_csg_iface(ptdev, i); ++ group_get(group); ++ ++ /* If there was unhandled faults on the VM, force processing of ++ * CSG IRQs, so we can flag the faulty queue. ++ */ ++ if (panthor_vm_has_unhandled_faults(group->vm)) { ++ sched_process_csg_irq_locked(ptdev, i); ++ ++ /* No fatal fault reported, flag all queues as faulty. */ ++ if (!group->fatal_queues) ++ group->fatal_queues |= GENMASK(group->queue_count - 1, 0); ++ } ++ ++ tick_ctx_insert_old_group(sched, ctx, group, full_tick); ++ csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, i, ++ csg_iface->output->ack ^ CSG_STATUS_UPDATE, ++ CSG_STATUS_UPDATE); ++ } ++ ++ ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx); ++ if (ret) { ++ panthor_device_schedule_reset(ptdev); ++ ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask; ++ } ++} ++ ++#define NUM_INSTRS_PER_SLOT 16 ++ ++static void ++group_term_post_processing(struct panthor_group *group) ++{ ++ struct panthor_job *job, *tmp; ++ LIST_HEAD(faulty_jobs); ++ bool cookie; ++ u32 i = 0; ++ ++ if (drm_WARN_ON(&group->ptdev->base, group_can_run(group))) ++ return; ++ ++ cookie = dma_fence_begin_signalling(); ++ for (i = 0; i < group->queue_count; i++) { ++ struct panthor_queue *queue = group->queues[i]; ++ struct panthor_syncobj_64b *syncobj; ++ int err; ++ ++ if (group->fatal_queues & BIT(i)) ++ err = -EINVAL; ++ else if (group->timedout) ++ err = -ETIMEDOUT; ++ else ++ err = -ECANCELED; ++ ++ if (!queue) ++ continue; ++ ++ spin_lock(&queue->fence_ctx.lock); ++ list_for_each_entry_safe(job, tmp, &queue->fence_ctx.in_flight_jobs, node) { ++ list_move_tail(&job->node, &faulty_jobs); ++ dma_fence_set_error(job->done_fence, err); ++ dma_fence_signal_locked(job->done_fence); ++ } ++ spin_unlock(&queue->fence_ctx.lock); ++ ++ /* Manually update the syncobj seqno to unblock waiters. */ ++ syncobj = group->syncobjs->kmap + (i * sizeof(*syncobj)); ++ syncobj->status = ~0; ++ syncobj->seqno = atomic64_read(&queue->fence_ctx.seqno); ++ sched_queue_work(group->ptdev->scheduler, sync_upd); ++ } ++ dma_fence_end_signalling(cookie); ++ ++ list_for_each_entry_safe(job, tmp, &faulty_jobs, node) { ++ list_del_init(&job->node); ++ panthor_job_put(&job->base); ++ } ++} ++ ++static void group_term_work(struct work_struct *work) ++{ ++ struct panthor_group *group = ++ container_of(work, struct panthor_group, term_work); ++ ++ group_term_post_processing(group); ++ group_put(group); ++} ++ ++static void ++tick_ctx_cleanup(struct panthor_scheduler *sched, ++ struct panthor_sched_tick_ctx *ctx) ++{ ++ struct panthor_group *group, *tmp; ++ u32 i; ++ ++ for (i = 0; i < ARRAY_SIZE(ctx->old_groups); i++) { ++ list_for_each_entry_safe(group, tmp, &ctx->old_groups[i], run_node) { ++ /* If everything went fine, we should only have groups ++ * to be terminated in the old_groups lists. ++ */ ++ drm_WARN_ON(&group->ptdev->base, !ctx->csg_upd_failed_mask && ++ group_can_run(group)); ++ ++ if (!group_can_run(group)) { ++ list_del_init(&group->run_node); ++ list_del_init(&group->wait_node); ++ group_queue_work(group, term); ++ } else if (group->csg_id >= 0) { ++ list_del_init(&group->run_node); ++ } else { ++ list_move(&group->run_node, ++ group_is_idle(group) ? ++ &sched->groups.idle[group->priority] : ++ &sched->groups.runnable[group->priority]); ++ } ++ group_put(group); ++ } ++ } ++ ++ for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) { ++ /* If everything went fine, the groups to schedule lists should ++ * be empty. ++ */ ++ drm_WARN_ON(&group->ptdev->base, ++ !ctx->csg_upd_failed_mask && !list_empty(&ctx->groups[i])); ++ ++ list_for_each_entry_safe(group, tmp, &ctx->groups[i], run_node) { ++ if (group->csg_id >= 0) { ++ list_del_init(&group->run_node); ++ } else { ++ list_move(&group->run_node, ++ group_is_idle(group) ? ++ &sched->groups.idle[group->priority] : ++ &sched->groups.runnable[group->priority]); ++ } ++ group_put(group); ++ } ++ } ++} ++ ++static void ++tick_ctx_apply(struct panthor_scheduler *sched, struct panthor_sched_tick_ctx *ctx) ++{ ++ struct panthor_group *group, *tmp; ++ struct panthor_device *ptdev = sched->ptdev; ++ struct panthor_csg_slot *csg_slot; ++ int prio, new_csg_prio = MAX_CSG_PRIO, i; ++ u32 csg_mod_mask = 0, free_csg_slots = 0; ++ struct panthor_csg_slots_upd_ctx upd_ctx; ++ int ret; ++ ++ csgs_upd_ctx_init(&upd_ctx); ++ ++ for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) { ++ /* Suspend or terminate evicted groups. */ ++ list_for_each_entry(group, &ctx->old_groups[prio], run_node) { ++ bool term = !group_can_run(group); ++ int csg_id = group->csg_id; ++ ++ if (drm_WARN_ON(&ptdev->base, csg_id < 0)) ++ continue; ++ ++ csg_slot = &sched->csg_slots[csg_id]; ++ csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id, ++ term ? CSG_STATE_TERMINATE : CSG_STATE_SUSPEND, ++ CSG_STATE_MASK); ++ } ++ ++ /* Update priorities on already running groups. */ ++ list_for_each_entry(group, &ctx->groups[prio], run_node) { ++ struct panthor_fw_csg_iface *csg_iface; ++ int csg_id = group->csg_id; ++ ++ if (csg_id < 0) { ++ new_csg_prio--; ++ continue; ++ } ++ ++ csg_slot = &sched->csg_slots[csg_id]; ++ csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id); ++ if (csg_slot->priority == new_csg_prio) { ++ new_csg_prio--; ++ continue; ++ } ++ ++ panthor_fw_update_reqs(csg_iface, endpoint_req, ++ CSG_EP_REQ_PRIORITY(new_csg_prio), ++ CSG_EP_REQ_PRIORITY_MASK); ++ csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id, ++ csg_iface->output->ack ^ CSG_ENDPOINT_CONFIG, ++ CSG_ENDPOINT_CONFIG); ++ new_csg_prio--; ++ } ++ } ++ ++ ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx); ++ if (ret) { ++ panthor_device_schedule_reset(ptdev); ++ ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask; ++ return; ++ } ++ ++ /* Unbind evicted groups. */ ++ for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) { ++ list_for_each_entry(group, &ctx->old_groups[prio], run_node) { ++ /* This group is gone. Process interrupts to clear ++ * any pending interrupts before we start the new ++ * group. ++ */ ++ if (group->csg_id >= 0) ++ sched_process_csg_irq_locked(ptdev, group->csg_id); ++ ++ group_unbind_locked(group); ++ } ++ } ++ ++ for (i = 0; i < sched->csg_slot_count; i++) { ++ if (!sched->csg_slots[i].group) ++ free_csg_slots |= BIT(i); ++ } ++ ++ csgs_upd_ctx_init(&upd_ctx); ++ new_csg_prio = MAX_CSG_PRIO; ++ ++ /* Start new groups. */ ++ for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) { ++ list_for_each_entry(group, &ctx->groups[prio], run_node) { ++ int csg_id = group->csg_id; ++ struct panthor_fw_csg_iface *csg_iface; ++ ++ if (csg_id >= 0) { ++ new_csg_prio--; ++ continue; ++ } ++ ++ csg_id = ffs(free_csg_slots) - 1; ++ if (drm_WARN_ON(&ptdev->base, csg_id < 0)) ++ break; ++ ++ csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id); ++ csg_slot = &sched->csg_slots[csg_id]; ++ csg_mod_mask |= BIT(csg_id); ++ group_bind_locked(group, csg_id); ++ csg_slot_prog_locked(ptdev, csg_id, new_csg_prio--); ++ csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id, ++ group->state == PANTHOR_CS_GROUP_SUSPENDED ? ++ CSG_STATE_RESUME : CSG_STATE_START, ++ CSG_STATE_MASK); ++ csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id, ++ csg_iface->output->ack ^ CSG_ENDPOINT_CONFIG, ++ CSG_ENDPOINT_CONFIG); ++ free_csg_slots &= ~BIT(csg_id); ++ } ++ } ++ ++ ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx); ++ if (ret) { ++ panthor_device_schedule_reset(ptdev); ++ ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask; ++ return; ++ } ++ ++ for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) { ++ list_for_each_entry_safe(group, tmp, &ctx->groups[prio], run_node) { ++ list_del_init(&group->run_node); ++ ++ /* If the group has been destroyed while we were ++ * scheduling, ask for an immediate tick to ++ * re-evaluate as soon as possible and get rid of ++ * this dangling group. ++ */ ++ if (group->destroyed) ++ ctx->immediate_tick = true; ++ group_put(group); ++ } ++ ++ /* Return evicted groups to the idle or run queues. Groups ++ * that can no longer be run (because they've been destroyed ++ * or experienced an unrecoverable error) will be scheduled ++ * for destruction in tick_ctx_cleanup(). ++ */ ++ list_for_each_entry_safe(group, tmp, &ctx->old_groups[prio], run_node) { ++ if (!group_can_run(group)) ++ continue; ++ ++ if (group_is_idle(group)) ++ list_move_tail(&group->run_node, &sched->groups.idle[prio]); ++ else ++ list_move_tail(&group->run_node, &sched->groups.runnable[prio]); ++ group_put(group); ++ } ++ } ++ ++ sched->used_csg_slot_count = ctx->group_count; ++ sched->might_have_idle_groups = ctx->idle_group_count > 0; ++} ++ ++static u64 ++tick_ctx_update_resched_target(struct panthor_scheduler *sched, ++ const struct panthor_sched_tick_ctx *ctx) ++{ ++ /* We had space left, no need to reschedule until some external event happens. */ ++ if (!tick_ctx_is_full(sched, ctx)) ++ goto no_tick; ++ ++ /* If idle groups were scheduled, no need to wake up until some external ++ * event happens (group unblocked, new job submitted, ...). ++ */ ++ if (ctx->idle_group_count) ++ goto no_tick; ++ ++ if (drm_WARN_ON(&sched->ptdev->base, ctx->min_priority >= PANTHOR_CSG_PRIORITY_COUNT)) ++ goto no_tick; ++ ++ /* If there are groups of the same priority waiting, we need to ++ * keep the scheduler ticking, otherwise, we'll just wait for ++ * new groups with higher priority to be queued. ++ */ ++ if (!list_empty(&sched->groups.runnable[ctx->min_priority])) { ++ u64 resched_target = sched->last_tick + sched->tick_period; ++ ++ if (time_before64(sched->resched_target, sched->last_tick) || ++ time_before64(resched_target, sched->resched_target)) ++ sched->resched_target = resched_target; ++ ++ return sched->resched_target - sched->last_tick; ++ } ++ ++no_tick: ++ sched->resched_target = U64_MAX; ++ return U64_MAX; ++} ++ ++static void tick_work(struct work_struct *work) ++{ ++ struct panthor_scheduler *sched = container_of(work, struct panthor_scheduler, ++ tick_work.work); ++ struct panthor_device *ptdev = sched->ptdev; ++ struct panthor_sched_tick_ctx ctx; ++ u64 remaining_jiffies = 0, resched_delay; ++ u64 now = get_jiffies_64(); ++ int prio, ret, cookie; ++ ++ if (!drm_dev_enter(&ptdev->base, &cookie)) ++ return; ++ ++ ret = pm_runtime_resume_and_get(ptdev->base.dev); ++ if (drm_WARN_ON(&ptdev->base, ret)) ++ goto out_dev_exit; ++ ++ if (time_before64(now, sched->resched_target)) ++ remaining_jiffies = sched->resched_target - now; ++ ++ mutex_lock(&sched->lock); ++ if (panthor_device_reset_is_pending(sched->ptdev)) ++ goto out_unlock; ++ ++ tick_ctx_init(sched, &ctx, remaining_jiffies != 0); ++ if (ctx.csg_upd_failed_mask) ++ goto out_cleanup_ctx; ++ ++ if (remaining_jiffies) { ++ /* Scheduling forced in the middle of a tick. Only RT groups ++ * can preempt non-RT ones. Currently running RT groups can't be ++ * preempted. ++ */ ++ for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; ++ prio >= 0 && !tick_ctx_is_full(sched, &ctx); ++ prio--) { ++ tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio], ++ true, true); ++ if (prio == PANTHOR_CSG_PRIORITY_RT) { ++ tick_ctx_pick_groups_from_list(sched, &ctx, ++ &sched->groups.runnable[prio], ++ true, false); ++ } ++ } ++ } ++ ++ /* First pick non-idle groups */ ++ for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; ++ prio >= 0 && !tick_ctx_is_full(sched, &ctx); ++ prio--) { ++ tick_ctx_pick_groups_from_list(sched, &ctx, &sched->groups.runnable[prio], ++ true, false); ++ tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio], true, true); ++ } ++ ++ /* If we have free CSG slots left, pick idle groups */ ++ for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; ++ prio >= 0 && !tick_ctx_is_full(sched, &ctx); ++ prio--) { ++ /* Check the old_group queue first to avoid reprogramming the slots */ ++ tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio], false, true); ++ tick_ctx_pick_groups_from_list(sched, &ctx, &sched->groups.idle[prio], ++ false, false); ++ } ++ ++ tick_ctx_apply(sched, &ctx); ++ if (ctx.csg_upd_failed_mask) ++ goto out_cleanup_ctx; ++ ++ if (ctx.idle_group_count == ctx.group_count) { ++ panthor_devfreq_record_idle(sched->ptdev); ++ if (sched->pm.has_ref) { ++ pm_runtime_put_autosuspend(ptdev->base.dev); ++ sched->pm.has_ref = false; ++ } ++ } else { ++ panthor_devfreq_record_busy(sched->ptdev); ++ if (!sched->pm.has_ref) { ++ pm_runtime_get(ptdev->base.dev); ++ sched->pm.has_ref = true; ++ } ++ } ++ ++ sched->last_tick = now; ++ resched_delay = tick_ctx_update_resched_target(sched, &ctx); ++ if (ctx.immediate_tick) ++ resched_delay = 0; ++ ++ if (resched_delay != U64_MAX) ++ sched_queue_delayed_work(sched, tick, resched_delay); ++ ++out_cleanup_ctx: ++ tick_ctx_cleanup(sched, &ctx); ++ ++out_unlock: ++ mutex_unlock(&sched->lock); ++ pm_runtime_mark_last_busy(ptdev->base.dev); ++ pm_runtime_put_autosuspend(ptdev->base.dev); ++ ++out_dev_exit: ++ drm_dev_exit(cookie); ++} ++ ++static int panthor_queue_eval_syncwait(struct panthor_group *group, u8 queue_idx) ++{ ++ struct panthor_queue *queue = group->queues[queue_idx]; ++ union { ++ struct panthor_syncobj_64b sync64; ++ struct panthor_syncobj_32b sync32; ++ } *syncobj; ++ bool result; ++ u64 value; ++ ++ syncobj = panthor_queue_get_syncwait_obj(group, queue); ++ if (!syncobj) ++ return -EINVAL; ++ ++ value = queue->syncwait.sync64 ? ++ syncobj->sync64.seqno : ++ syncobj->sync32.seqno; ++ ++ if (queue->syncwait.gt) ++ result = value > queue->syncwait.ref; ++ else ++ result = value <= queue->syncwait.ref; ++ ++ if (result) ++ panthor_queue_put_syncwait_obj(queue); ++ ++ return result; ++} ++ ++static void sync_upd_work(struct work_struct *work) ++{ ++ struct panthor_scheduler *sched = container_of(work, ++ struct panthor_scheduler, ++ sync_upd_work); ++ struct panthor_group *group, *tmp; ++ bool immediate_tick = false; ++ ++ mutex_lock(&sched->lock); ++ list_for_each_entry_safe(group, tmp, &sched->groups.waiting, wait_node) { ++ u32 tested_queues = group->blocked_queues; ++ u32 unblocked_queues = 0; ++ ++ while (tested_queues) { ++ u32 cs_id = ffs(tested_queues) - 1; ++ int ret; ++ ++ ret = panthor_queue_eval_syncwait(group, cs_id); ++ drm_WARN_ON(&group->ptdev->base, ret < 0); ++ if (ret) ++ unblocked_queues |= BIT(cs_id); ++ ++ tested_queues &= ~BIT(cs_id); ++ } ++ ++ if (unblocked_queues) { ++ group->blocked_queues &= ~unblocked_queues; ++ ++ if (group->csg_id < 0) { ++ list_move(&group->run_node, ++ &sched->groups.runnable[group->priority]); ++ if (group->priority == PANTHOR_CSG_PRIORITY_RT) ++ immediate_tick = true; ++ } ++ } ++ ++ if (!group->blocked_queues) ++ list_del_init(&group->wait_node); ++ } ++ mutex_unlock(&sched->lock); ++ ++ if (immediate_tick) ++ sched_queue_delayed_work(sched, tick, 0); ++} ++ ++static void group_schedule_locked(struct panthor_group *group, u32 queue_mask) ++{ ++ struct panthor_device *ptdev = group->ptdev; ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ struct list_head *queue = &sched->groups.runnable[group->priority]; ++ u64 delay_jiffies = 0; ++ bool was_idle; ++ u64 now; ++ ++ if (!group_can_run(group)) ++ return; ++ ++ /* All updated queues are blocked, no need to wake up the scheduler. */ ++ if ((queue_mask & group->blocked_queues) == queue_mask) ++ return; ++ ++ was_idle = group_is_idle(group); ++ group->idle_queues &= ~queue_mask; ++ ++ /* Don't mess up with the lists if we're in a middle of a reset. */ ++ if (atomic_read(&sched->reset.in_progress)) ++ return; ++ ++ if (was_idle && !group_is_idle(group)) ++ list_move_tail(&group->run_node, queue); ++ ++ /* RT groups are preemptive. */ ++ if (group->priority == PANTHOR_CSG_PRIORITY_RT) { ++ sched_queue_delayed_work(sched, tick, 0); ++ return; ++ } ++ ++ /* Some groups might be idle, force an immediate tick to ++ * re-evaluate. ++ */ ++ if (sched->might_have_idle_groups) { ++ sched_queue_delayed_work(sched, tick, 0); ++ return; ++ } ++ ++ /* Scheduler is ticking, nothing to do. */ ++ if (sched->resched_target != U64_MAX) { ++ /* If there are free slots, force immediating ticking. */ ++ if (sched->used_csg_slot_count < sched->csg_slot_count) ++ sched_queue_delayed_work(sched, tick, 0); ++ ++ return; ++ } ++ ++ /* Scheduler tick was off, recalculate the resched_target based on the ++ * last tick event, and queue the scheduler work. ++ */ ++ now = get_jiffies_64(); ++ sched->resched_target = sched->last_tick + sched->tick_period; ++ if (sched->used_csg_slot_count == sched->csg_slot_count && ++ time_before64(now, sched->resched_target)) ++ delay_jiffies = min_t(unsigned long, sched->resched_target - now, ULONG_MAX); ++ ++ sched_queue_delayed_work(sched, tick, delay_jiffies); ++} ++ ++static void queue_stop(struct panthor_queue *queue, ++ struct panthor_job *bad_job) ++{ ++ drm_sched_stop(&queue->scheduler, bad_job ? &bad_job->base : NULL); ++} ++ ++static void queue_start(struct panthor_queue *queue) ++{ ++ struct panthor_job *job; ++ ++ /* Re-assign the parent fences. */ ++ list_for_each_entry(job, &queue->scheduler.pending_list, base.list) ++ job->base.s_fence->parent = dma_fence_get(job->done_fence); ++ ++ drm_sched_start(&queue->scheduler, true); ++} ++ ++static void panthor_group_stop(struct panthor_group *group) ++{ ++ struct panthor_scheduler *sched = group->ptdev->scheduler; ++ ++ lockdep_assert_held(&sched->reset.lock); ++ ++ for (u32 i = 0; i < group->queue_count; i++) ++ queue_stop(group->queues[i], NULL); ++ ++ group_get(group); ++ list_move_tail(&group->run_node, &sched->reset.stopped_groups); ++} ++ ++static void panthor_group_start(struct panthor_group *group) ++{ ++ struct panthor_scheduler *sched = group->ptdev->scheduler; ++ ++ lockdep_assert_held(&group->ptdev->scheduler->reset.lock); ++ ++ for (u32 i = 0; i < group->queue_count; i++) ++ queue_start(group->queues[i]); ++ ++ if (group_can_run(group)) { ++ list_move_tail(&group->run_node, ++ group_is_idle(group) ? ++ &sched->groups.idle[group->priority] : ++ &sched->groups.runnable[group->priority]); ++ } else { ++ list_del_init(&group->run_node); ++ list_del_init(&group->wait_node); ++ group_queue_work(group, term); ++ } ++ ++ group_put(group); ++} ++ ++static void panthor_sched_immediate_tick(struct panthor_device *ptdev) ++{ ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ ++ sched_queue_delayed_work(sched, tick, 0); ++} ++ ++/** ++ * panthor_sched_report_mmu_fault() - Report MMU faults to the scheduler. ++ */ ++void panthor_sched_report_mmu_fault(struct panthor_device *ptdev) ++{ ++ /* Force a tick to immediately kill faulty groups. */ ++ if (ptdev->scheduler) ++ panthor_sched_immediate_tick(ptdev); ++} ++ ++void panthor_sched_resume(struct panthor_device *ptdev) ++{ ++ /* Force a tick to re-evaluate after a resume. */ ++ panthor_sched_immediate_tick(ptdev); ++} ++ ++void panthor_sched_suspend(struct panthor_device *ptdev) ++{ ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ struct panthor_csg_slots_upd_ctx upd_ctx; ++ u64 suspended_slots, faulty_slots; ++ struct panthor_group *group; ++ u32 i; ++ ++ mutex_lock(&sched->lock); ++ csgs_upd_ctx_init(&upd_ctx); ++ for (i = 0; i < sched->csg_slot_count; i++) { ++ struct panthor_csg_slot *csg_slot = &sched->csg_slots[i]; ++ ++ if (csg_slot->group) { ++ csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, i, ++ CSG_STATE_SUSPEND, ++ CSG_STATE_MASK); ++ } ++ } ++ ++ suspended_slots = upd_ctx.update_mask; ++ ++ csgs_upd_ctx_apply_locked(ptdev, &upd_ctx); ++ suspended_slots &= ~upd_ctx.timedout_mask; ++ faulty_slots = upd_ctx.timedout_mask; ++ ++ if (faulty_slots) { ++ u32 slot_mask = faulty_slots; ++ ++ drm_err(&ptdev->base, "CSG suspend failed, escalating to termination"); ++ csgs_upd_ctx_init(&upd_ctx); ++ while (slot_mask) { ++ u32 csg_id = ffs(slot_mask) - 1; ++ ++ csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id, ++ CSG_STATE_TERMINATE, ++ CSG_STATE_MASK); ++ slot_mask &= ~BIT(csg_id); ++ } ++ ++ csgs_upd_ctx_apply_locked(ptdev, &upd_ctx); ++ ++ slot_mask = upd_ctx.timedout_mask; ++ while (slot_mask) { ++ u32 csg_id = ffs(slot_mask) - 1; ++ struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id]; ++ ++ /* Terminate command timedout, but the soft-reset will ++ * automatically terminate all active groups, so let's ++ * force the state to halted here. ++ */ ++ if (csg_slot->group->state != PANTHOR_CS_GROUP_TERMINATED) ++ csg_slot->group->state = PANTHOR_CS_GROUP_TERMINATED; ++ slot_mask &= ~BIT(csg_id); ++ } ++ } ++ ++ /* Flush L2 and LSC caches to make sure suspend state is up-to-date. ++ * If the flush fails, flag all queues for termination. ++ */ ++ if (suspended_slots) { ++ bool flush_caches_failed = false; ++ u32 slot_mask = suspended_slots; ++ ++ if (panthor_gpu_flush_caches(ptdev, CACHE_CLEAN, CACHE_CLEAN, 0)) ++ flush_caches_failed = true; ++ ++ while (slot_mask) { ++ u32 csg_id = ffs(slot_mask) - 1; ++ struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id]; ++ ++ if (flush_caches_failed) ++ csg_slot->group->state = PANTHOR_CS_GROUP_TERMINATED; ++ else ++ csg_slot_sync_update_locked(ptdev, csg_id); ++ ++ slot_mask &= ~BIT(csg_id); ++ } ++ ++ if (flush_caches_failed) ++ faulty_slots |= suspended_slots; ++ } ++ ++ for (i = 0; i < sched->csg_slot_count; i++) { ++ struct panthor_csg_slot *csg_slot = &sched->csg_slots[i]; ++ ++ group = csg_slot->group; ++ if (!group) ++ continue; ++ ++ group_get(group); ++ ++ if (group->csg_id >= 0) ++ sched_process_csg_irq_locked(ptdev, group->csg_id); ++ ++ group_unbind_locked(group); ++ ++ drm_WARN_ON(&group->ptdev->base, !list_empty(&group->run_node)); ++ ++ if (group_can_run(group)) { ++ list_add(&group->run_node, ++ &sched->groups.idle[group->priority]); ++ } else { ++ /* We don't bother stopping the scheduler if the group is ++ * faulty, the group termination work will finish the job. ++ */ ++ list_del_init(&group->wait_node); ++ group_queue_work(group, term); ++ } ++ group_put(group); ++ } ++ mutex_unlock(&sched->lock); ++} ++ ++void panthor_sched_pre_reset(struct panthor_device *ptdev) ++{ ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ struct panthor_group *group, *group_tmp; ++ u32 i; ++ ++ mutex_lock(&sched->reset.lock); ++ atomic_set(&sched->reset.in_progress, true); ++ ++ /* Cancel all scheduler works. Once this is done, these works can't be ++ * scheduled again until the reset operation is complete. ++ */ ++ cancel_work_sync(&sched->sync_upd_work); ++ cancel_delayed_work_sync(&sched->tick_work); ++ ++ panthor_sched_suspend(ptdev); ++ ++ /* Stop all groups that might still accept jobs, so we don't get passed ++ * new jobs while we're resetting. ++ */ ++ for (i = 0; i < ARRAY_SIZE(sched->groups.runnable); i++) { ++ /* All groups should be in the idle lists. */ ++ drm_WARN_ON(&ptdev->base, !list_empty(&sched->groups.runnable[i])); ++ list_for_each_entry_safe(group, group_tmp, &sched->groups.runnable[i], run_node) ++ panthor_group_stop(group); ++ } ++ ++ for (i = 0; i < ARRAY_SIZE(sched->groups.idle); i++) { ++ list_for_each_entry_safe(group, group_tmp, &sched->groups.idle[i], run_node) ++ panthor_group_stop(group); ++ } ++ ++ mutex_unlock(&sched->reset.lock); ++} ++ ++void panthor_sched_post_reset(struct panthor_device *ptdev) ++{ ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ struct panthor_group *group, *group_tmp; ++ ++ mutex_lock(&sched->reset.lock); ++ ++ list_for_each_entry_safe(group, group_tmp, &sched->reset.stopped_groups, run_node) ++ panthor_group_start(group); ++ ++ /* We're done resetting the GPU, clear the reset.in_progress bit so we can ++ * kick the scheduler. ++ */ ++ atomic_set(&sched->reset.in_progress, false); ++ mutex_unlock(&sched->reset.lock); ++ ++ sched_queue_delayed_work(sched, tick, 0); ++ ++ sched_queue_work(sched, sync_upd); ++} ++ ++static void group_sync_upd_work(struct work_struct *work) ++{ ++ struct panthor_group *group = ++ container_of(work, struct panthor_group, sync_upd_work); ++ struct panthor_job *job, *job_tmp; ++ LIST_HEAD(done_jobs); ++ u32 queue_idx; ++ bool cookie; ++ ++ cookie = dma_fence_begin_signalling(); ++ for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) { ++ struct panthor_queue *queue = group->queues[queue_idx]; ++ struct panthor_syncobj_64b *syncobj; ++ ++ if (!queue) ++ continue; ++ ++ syncobj = group->syncobjs->kmap + (queue_idx * sizeof(*syncobj)); ++ ++ spin_lock(&queue->fence_ctx.lock); ++ list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) { ++ if (!job->call_info.size) ++ continue; ++ ++ if (syncobj->seqno < job->done_fence->seqno) ++ break; ++ ++ list_move_tail(&job->node, &done_jobs); ++ dma_fence_signal_locked(job->done_fence); ++ } ++ spin_unlock(&queue->fence_ctx.lock); ++ } ++ dma_fence_end_signalling(cookie); ++ ++ list_for_each_entry_safe(job, job_tmp, &done_jobs, node) { ++ list_del_init(&job->node); ++ panthor_job_put(&job->base); ++ } ++ ++ group_put(group); ++} ++ ++static struct dma_fence * ++queue_run_job(struct drm_sched_job *sched_job) ++{ ++ struct panthor_job *job = container_of(sched_job, struct panthor_job, base); ++ struct panthor_group *group = job->group; ++ struct panthor_queue *queue = group->queues[job->queue_idx]; ++ struct panthor_device *ptdev = group->ptdev; ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ u32 ringbuf_size = panthor_kernel_bo_size(queue->ringbuf); ++ u32 ringbuf_insert = queue->iface.input->insert & (ringbuf_size - 1); ++ u64 addr_reg = ptdev->csif_info.cs_reg_count - ++ ptdev->csif_info.unpreserved_cs_reg_count; ++ u64 val_reg = addr_reg + 2; ++ u64 sync_addr = panthor_kernel_bo_gpuva(group->syncobjs) + ++ job->queue_idx * sizeof(struct panthor_syncobj_64b); ++ u32 waitall_mask = GENMASK(sched->sb_slot_count - 1, 0); ++ struct dma_fence *done_fence; ++ int ret; ++ ++ u64 call_instrs[NUM_INSTRS_PER_SLOT] = { ++ /* MOV32 rX+2, cs.latest_flush */ ++ (2ull << 56) | (val_reg << 48) | job->call_info.latest_flush, ++ ++ /* FLUSH_CACHE2.clean_inv_all.no_wait.signal(0) rX+2 */ ++ (36ull << 56) | (0ull << 48) | (val_reg << 40) | (0 << 16) | 0x233, ++ ++ /* MOV48 rX:rX+1, cs.start */ ++ (1ull << 56) | (addr_reg << 48) | job->call_info.start, ++ ++ /* MOV32 rX+2, cs.size */ ++ (2ull << 56) | (val_reg << 48) | job->call_info.size, ++ ++ /* WAIT(0) => waits for FLUSH_CACHE2 instruction */ ++ (3ull << 56) | (1 << 16), ++ ++ /* CALL rX:rX+1, rX+2 */ ++ (32ull << 56) | (addr_reg << 40) | (val_reg << 32), ++ ++ /* MOV48 rX:rX+1, sync_addr */ ++ (1ull << 56) | (addr_reg << 48) | sync_addr, ++ ++ /* MOV48 rX+2, #1 */ ++ (1ull << 56) | (val_reg << 48) | 1, ++ ++ /* WAIT(all) */ ++ (3ull << 56) | (waitall_mask << 16), ++ ++ /* SYNC_ADD64.system_scope.propage_err.nowait rX:rX+1, rX+2*/ ++ (51ull << 56) | (0ull << 48) | (addr_reg << 40) | (val_reg << 32) | (0 << 16) | 1, ++ ++ /* ERROR_BARRIER, so we can recover from faults at job ++ * boundaries. ++ */ ++ (47ull << 56), ++ }; ++ ++ /* Need to be cacheline aligned to please the prefetcher. */ ++ static_assert(sizeof(call_instrs) % 64 == 0, ++ "call_instrs is not aligned on a cacheline"); ++ ++ /* Stream size is zero, nothing to do => return a NULL fence and let ++ * drm_sched signal the parent. ++ */ ++ if (!job->call_info.size) ++ return NULL; ++ ++ ret = pm_runtime_resume_and_get(ptdev->base.dev); ++ if (drm_WARN_ON(&ptdev->base, ret)) ++ return ERR_PTR(ret); ++ ++ mutex_lock(&sched->lock); ++ if (!group_can_run(group)) { ++ done_fence = ERR_PTR(-ECANCELED); ++ goto out_unlock; ++ } ++ ++ dma_fence_init(job->done_fence, ++ &panthor_queue_fence_ops, ++ &queue->fence_ctx.lock, ++ queue->fence_ctx.id, ++ atomic64_inc_return(&queue->fence_ctx.seqno)); ++ ++ memcpy(queue->ringbuf->kmap + ringbuf_insert, ++ call_instrs, sizeof(call_instrs)); ++ ++ panthor_job_get(&job->base); ++ spin_lock(&queue->fence_ctx.lock); ++ list_add_tail(&job->node, &queue->fence_ctx.in_flight_jobs); ++ spin_unlock(&queue->fence_ctx.lock); ++ ++ job->ringbuf.start = queue->iface.input->insert; ++ job->ringbuf.end = job->ringbuf.start + sizeof(call_instrs); ++ ++ /* Make sure the ring buffer is updated before the INSERT ++ * register. ++ */ ++ wmb(); ++ ++ queue->iface.input->extract = queue->iface.output->extract; ++ queue->iface.input->insert = job->ringbuf.end; ++ ++ if (group->csg_id < 0) { ++ /* If the queue is blocked, we want to keep the timeout running, so we ++ * can detect unbounded waits and kill the group when that happens. ++ * Otherwise, we suspend the timeout so the time we spend waiting for ++ * a CSG slot is not counted. ++ */ ++ if (!(group->blocked_queues & BIT(job->queue_idx)) && ++ !queue->timeout_suspended) { ++ queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler); ++ queue->timeout_suspended = true; ++ } ++ ++ group_schedule_locked(group, BIT(job->queue_idx)); ++ } else { ++ gpu_write(ptdev, CSF_DOORBELL(queue->doorbell_id), 1); ++ if (!sched->pm.has_ref && ++ !(group->blocked_queues & BIT(job->queue_idx))) { ++ pm_runtime_get(ptdev->base.dev); ++ sched->pm.has_ref = true; ++ } ++ } ++ ++ done_fence = dma_fence_get(job->done_fence); ++ ++out_unlock: ++ mutex_unlock(&sched->lock); ++ pm_runtime_mark_last_busy(ptdev->base.dev); ++ pm_runtime_put_autosuspend(ptdev->base.dev); ++ ++ return done_fence; ++} ++ ++static enum drm_gpu_sched_stat ++queue_timedout_job(struct drm_sched_job *sched_job) ++{ ++ struct panthor_job *job = container_of(sched_job, struct panthor_job, base); ++ struct panthor_group *group = job->group; ++ struct panthor_device *ptdev = group->ptdev; ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ struct panthor_queue *queue = group->queues[job->queue_idx]; ++ ++ drm_warn(&ptdev->base, "job timeout\n"); ++ ++ drm_WARN_ON(&ptdev->base, atomic_read(&sched->reset.in_progress)); ++ ++ queue_stop(queue, job); ++ ++ mutex_lock(&sched->lock); ++ group->timedout = true; ++ if (group->csg_id >= 0) { ++ sched_queue_delayed_work(ptdev->scheduler, tick, 0); ++ } else { ++ /* Remove from the run queues, so the scheduler can't ++ * pick the group on the next tick. ++ */ ++ list_del_init(&group->run_node); ++ list_del_init(&group->wait_node); ++ ++ group_queue_work(group, term); ++ } ++ mutex_unlock(&sched->lock); ++ ++ queue_start(queue); ++ ++ return DRM_GPU_SCHED_STAT_NOMINAL; ++} ++ ++static void queue_free_job(struct drm_sched_job *sched_job) ++{ ++ drm_sched_job_cleanup(sched_job); ++ panthor_job_put(sched_job); ++} ++ ++static const struct drm_sched_backend_ops panthor_queue_sched_ops = { ++ .run_job = queue_run_job, ++ .timedout_job = queue_timedout_job, ++ .free_job = queue_free_job, ++}; ++ ++static struct panthor_queue * ++group_create_queue(struct panthor_group *group, ++ const struct drm_panthor_queue_create *args) ++{ ++ struct drm_gpu_scheduler *drm_sched; ++ struct panthor_queue *queue; ++ int ret; ++ ++ if (args->pad[0] || args->pad[1] || args->pad[2]) ++ return ERR_PTR(-EINVAL); ++ ++ if (args->ringbuf_size < SZ_4K || args->ringbuf_size > SZ_64K || ++ !is_power_of_2(args->ringbuf_size)) ++ return ERR_PTR(-EINVAL); ++ ++ if (args->priority > CSF_MAX_QUEUE_PRIO) ++ return ERR_PTR(-EINVAL); ++ ++ queue = kzalloc(sizeof(*queue), GFP_KERNEL); ++ if (!queue) ++ return ERR_PTR(-ENOMEM); ++ ++ queue->fence_ctx.id = dma_fence_context_alloc(1); ++ spin_lock_init(&queue->fence_ctx.lock); ++ INIT_LIST_HEAD(&queue->fence_ctx.in_flight_jobs); ++ ++ queue->priority = args->priority; ++ ++ queue->ringbuf = panthor_kernel_bo_create(group->ptdev, group->vm, ++ args->ringbuf_size, ++ DRM_PANTHOR_BO_NO_MMAP, ++ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | ++ DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED, ++ PANTHOR_VM_KERNEL_AUTO_VA); ++ if (IS_ERR(queue->ringbuf)) { ++ ret = PTR_ERR(queue->ringbuf); ++ goto err_free_queue; ++ } ++ ++ ret = panthor_kernel_bo_vmap(queue->ringbuf); ++ if (ret) ++ goto err_free_queue; ++ ++ queue->iface.mem = panthor_fw_alloc_queue_iface_mem(group->ptdev, ++ &queue->iface.input, ++ &queue->iface.output, ++ &queue->iface.input_fw_va, ++ &queue->iface.output_fw_va); ++ if (IS_ERR(queue->iface.mem)) { ++ ret = PTR_ERR(queue->iface.mem); ++ goto err_free_queue; ++ } ++ ++ ret = drm_sched_init(&queue->scheduler, &panthor_queue_sched_ops, ++ group->ptdev->scheduler->wq, 1, ++ args->ringbuf_size / (NUM_INSTRS_PER_SLOT * sizeof(u64)), ++ 0, msecs_to_jiffies(JOB_TIMEOUT_MS), ++ group->ptdev->reset.wq, ++ NULL, "panthor-queue", group->ptdev->base.dev); ++ if (ret) ++ goto err_free_queue; ++ ++ drm_sched = &queue->scheduler; ++ ret = drm_sched_entity_init(&queue->entity, 0, &drm_sched, 1, NULL); ++ ++ return queue; ++ ++err_free_queue: ++ group_free_queue(group, queue); ++ return ERR_PTR(ret); ++} ++ ++#define MAX_GROUPS_PER_POOL 128 ++ ++int panthor_group_create(struct panthor_file *pfile, ++ const struct drm_panthor_group_create *group_args, ++ const struct drm_panthor_queue_create *queue_args) ++{ ++ struct panthor_device *ptdev = pfile->ptdev; ++ struct panthor_group_pool *gpool = pfile->groups; ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, 0); ++ struct panthor_group *group = NULL; ++ u32 gid, i, suspend_size; ++ int ret; ++ ++ if (group_args->pad) ++ return -EINVAL; ++ ++ if (group_args->priority > PANTHOR_CSG_PRIORITY_HIGH) ++ return -EINVAL; ++ ++ if ((group_args->compute_core_mask & ~ptdev->gpu_info.shader_present) || ++ (group_args->fragment_core_mask & ~ptdev->gpu_info.shader_present) || ++ (group_args->tiler_core_mask & ~ptdev->gpu_info.tiler_present)) ++ return -EINVAL; ++ ++ if (hweight64(group_args->compute_core_mask) < group_args->max_compute_cores || ++ hweight64(group_args->fragment_core_mask) < group_args->max_fragment_cores || ++ hweight64(group_args->tiler_core_mask) < group_args->max_tiler_cores) ++ return -EINVAL; ++ ++ group = kzalloc(sizeof(*group), GFP_KERNEL); ++ if (!group) ++ return -ENOMEM; ++ ++ spin_lock_init(&group->fatal_lock); ++ kref_init(&group->refcount); ++ group->state = PANTHOR_CS_GROUP_CREATED; ++ group->csg_id = -1; ++ ++ group->ptdev = ptdev; ++ group->max_compute_cores = group_args->max_compute_cores; ++ group->compute_core_mask = group_args->compute_core_mask; ++ group->max_fragment_cores = group_args->max_fragment_cores; ++ group->fragment_core_mask = group_args->fragment_core_mask; ++ group->max_tiler_cores = group_args->max_tiler_cores; ++ group->tiler_core_mask = group_args->tiler_core_mask; ++ group->priority = group_args->priority; ++ ++ INIT_LIST_HEAD(&group->wait_node); ++ INIT_LIST_HEAD(&group->run_node); ++ INIT_WORK(&group->term_work, group_term_work); ++ INIT_WORK(&group->sync_upd_work, group_sync_upd_work); ++ INIT_WORK(&group->tiler_oom_work, group_tiler_oom_work); ++ INIT_WORK(&group->release_work, group_release_work); ++ ++ group->vm = panthor_vm_pool_get_vm(pfile->vms, group_args->vm_id); ++ if (!group->vm) { ++ ret = -EINVAL; ++ goto err_put_group; ++ } ++ ++ suspend_size = csg_iface->control->suspend_size; ++ group->suspend_buf = panthor_fw_alloc_suspend_buf_mem(ptdev, suspend_size); ++ if (IS_ERR(group->suspend_buf)) { ++ ret = PTR_ERR(group->suspend_buf); ++ group->suspend_buf = NULL; ++ goto err_put_group; ++ } ++ ++ suspend_size = csg_iface->control->protm_suspend_size; ++ group->protm_suspend_buf = panthor_fw_alloc_suspend_buf_mem(ptdev, suspend_size); ++ if (IS_ERR(group->protm_suspend_buf)) { ++ ret = PTR_ERR(group->protm_suspend_buf); ++ group->protm_suspend_buf = NULL; ++ goto err_put_group; ++ } ++ ++ group->syncobjs = panthor_kernel_bo_create(ptdev, group->vm, ++ group_args->queues.count * ++ sizeof(struct panthor_syncobj_64b), ++ DRM_PANTHOR_BO_NO_MMAP, ++ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | ++ DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED, ++ PANTHOR_VM_KERNEL_AUTO_VA); ++ if (IS_ERR(group->syncobjs)) { ++ ret = PTR_ERR(group->syncobjs); ++ goto err_put_group; ++ } ++ ++ ret = panthor_kernel_bo_vmap(group->syncobjs); ++ if (ret) ++ goto err_put_group; ++ ++ memset(group->syncobjs->kmap, 0, ++ group_args->queues.count * sizeof(struct panthor_syncobj_64b)); ++ ++ for (i = 0; i < group_args->queues.count; i++) { ++ group->queues[i] = group_create_queue(group, &queue_args[i]); ++ if (IS_ERR(group->queues[i])) { ++ ret = PTR_ERR(group->queues[i]); ++ group->queues[i] = NULL; ++ goto err_put_group; ++ } ++ ++ group->queue_count++; ++ } ++ ++ group->idle_queues = GENMASK(group->queue_count - 1, 0); ++ ++ ret = xa_alloc(&gpool->xa, &gid, group, XA_LIMIT(1, MAX_GROUPS_PER_POOL), GFP_KERNEL); ++ if (ret) ++ goto err_put_group; ++ ++ mutex_lock(&sched->reset.lock); ++ if (atomic_read(&sched->reset.in_progress)) { ++ panthor_group_stop(group); ++ } else { ++ mutex_lock(&sched->lock); ++ list_add_tail(&group->run_node, ++ &sched->groups.idle[group->priority]); ++ mutex_unlock(&sched->lock); ++ } ++ mutex_unlock(&sched->reset.lock); ++ ++ return gid; ++ ++err_put_group: ++ group_put(group); ++ return ret; ++} ++ ++int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle) ++{ ++ struct panthor_group_pool *gpool = pfile->groups; ++ struct panthor_device *ptdev = pfile->ptdev; ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ struct panthor_group *group; ++ ++ group = xa_erase(&gpool->xa, group_handle); ++ if (!group) ++ return -EINVAL; ++ ++ for (u32 i = 0; i < group->queue_count; i++) { ++ if (group->queues[i]) ++ drm_sched_entity_destroy(&group->queues[i]->entity); ++ } ++ ++ mutex_lock(&sched->reset.lock); ++ mutex_lock(&sched->lock); ++ group->destroyed = true; ++ if (group->csg_id >= 0) { ++ sched_queue_delayed_work(sched, tick, 0); ++ } else if (!atomic_read(&sched->reset.in_progress)) { ++ /* Remove from the run queues, so the scheduler can't ++ * pick the group on the next tick. ++ */ ++ list_del_init(&group->run_node); ++ list_del_init(&group->wait_node); ++ group_queue_work(group, term); ++ } ++ mutex_unlock(&sched->lock); ++ mutex_unlock(&sched->reset.lock); ++ ++ group_put(group); ++ return 0; ++} ++ ++int panthor_group_get_state(struct panthor_file *pfile, ++ struct drm_panthor_group_get_state *get_state) ++{ ++ struct panthor_group_pool *gpool = pfile->groups; ++ struct panthor_device *ptdev = pfile->ptdev; ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ struct panthor_group *group; ++ ++ if (get_state->pad) ++ return -EINVAL; ++ ++ group = group_get(xa_load(&gpool->xa, get_state->group_handle)); ++ if (!group) ++ return -EINVAL; ++ ++ memset(get_state, 0, sizeof(*get_state)); ++ ++ mutex_lock(&sched->lock); ++ if (group->timedout) ++ get_state->state |= DRM_PANTHOR_GROUP_STATE_TIMEDOUT; ++ if (group->fatal_queues) { ++ get_state->state |= DRM_PANTHOR_GROUP_STATE_FATAL_FAULT; ++ get_state->fatal_queues = group->fatal_queues; ++ } ++ mutex_unlock(&sched->lock); ++ ++ group_put(group); ++ return 0; ++} ++ ++int panthor_group_pool_create(struct panthor_file *pfile) ++{ ++ struct panthor_group_pool *gpool; ++ ++ gpool = kzalloc(sizeof(*gpool), GFP_KERNEL); ++ if (!gpool) ++ return -ENOMEM; ++ ++ xa_init_flags(&gpool->xa, XA_FLAGS_ALLOC1); ++ pfile->groups = gpool; ++ return 0; ++} ++ ++void panthor_group_pool_destroy(struct panthor_file *pfile) ++{ ++ struct panthor_group_pool *gpool = pfile->groups; ++ struct panthor_group *group; ++ unsigned long i; ++ ++ if (IS_ERR_OR_NULL(gpool)) ++ return; ++ ++ xa_for_each(&gpool->xa, i, group) ++ panthor_group_destroy(pfile, i); ++ ++ xa_destroy(&gpool->xa); ++ kfree(gpool); ++ pfile->groups = NULL; ++} ++ ++static void job_release(struct kref *ref) ++{ ++ struct panthor_job *job = container_of(ref, struct panthor_job, refcount); ++ ++ drm_WARN_ON(&job->group->ptdev->base, !list_empty(&job->node)); ++ ++ if (job->base.s_fence) ++ drm_sched_job_cleanup(&job->base); ++ ++ if (job->done_fence && job->done_fence->ops) ++ dma_fence_put(job->done_fence); ++ else ++ dma_fence_free(job->done_fence); ++ ++ group_put(job->group); ++ ++ kfree(job); ++} ++ ++struct drm_sched_job *panthor_job_get(struct drm_sched_job *sched_job) ++{ ++ if (sched_job) { ++ struct panthor_job *job = container_of(sched_job, struct panthor_job, base); ++ ++ kref_get(&job->refcount); ++ } ++ ++ return sched_job; ++} ++ ++void panthor_job_put(struct drm_sched_job *sched_job) ++{ ++ struct panthor_job *job = container_of(sched_job, struct panthor_job, base); ++ ++ if (sched_job) ++ kref_put(&job->refcount, job_release); ++} ++ ++struct panthor_vm *panthor_job_vm(struct drm_sched_job *sched_job) ++{ ++ struct panthor_job *job = container_of(sched_job, struct panthor_job, base); ++ ++ return job->group->vm; ++} ++ ++struct drm_sched_job * ++panthor_job_create(struct panthor_file *pfile, ++ u16 group_handle, ++ const struct drm_panthor_queue_submit *qsubmit) ++{ ++ struct panthor_group_pool *gpool = pfile->groups; ++ struct panthor_job *job; ++ int ret; ++ ++ if (qsubmit->pad) ++ return ERR_PTR(-EINVAL); ++ ++ /* If stream_addr is zero, so stream_size should be. */ ++ if ((qsubmit->stream_size == 0) != (qsubmit->stream_addr == 0)) ++ return ERR_PTR(-EINVAL); ++ ++ /* Make sure the address is aligned on 64-byte (cacheline) and the size is ++ * aligned on 8-byte (instruction size). ++ */ ++ if ((qsubmit->stream_addr & 63) || (qsubmit->stream_size & 7)) ++ return ERR_PTR(-EINVAL); ++ ++ /* bits 24:30 must be zero. */ ++ if (qsubmit->latest_flush & GENMASK(30, 24)) ++ return ERR_PTR(-EINVAL); ++ ++ job = kzalloc(sizeof(*job), GFP_KERNEL); ++ if (!job) ++ return ERR_PTR(-ENOMEM); ++ ++ kref_init(&job->refcount); ++ job->queue_idx = qsubmit->queue_index; ++ job->call_info.size = qsubmit->stream_size; ++ job->call_info.start = qsubmit->stream_addr; ++ job->call_info.latest_flush = qsubmit->latest_flush; ++ INIT_LIST_HEAD(&job->node); ++ ++ job->group = group_get(xa_load(&gpool->xa, group_handle)); ++ if (!job->group) { ++ ret = -EINVAL; ++ goto err_put_job; ++ } ++ ++ if (job->queue_idx >= job->group->queue_count || ++ !job->group->queues[job->queue_idx]) { ++ ret = -EINVAL; ++ goto err_put_job; ++ } ++ ++ job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL); ++ if (!job->done_fence) { ++ ret = -ENOMEM; ++ goto err_put_job; ++ } ++ ++ ret = drm_sched_job_init(&job->base, ++ &job->group->queues[job->queue_idx]->entity, ++ 1, job->group); ++ if (ret) ++ goto err_put_job; ++ ++ return &job->base; ++ ++err_put_job: ++ panthor_job_put(&job->base); ++ return ERR_PTR(ret); ++} ++ ++void panthor_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *sched_job) ++{ ++ struct panthor_job *job = container_of(sched_job, struct panthor_job, base); ++ ++ /* Still not sure why we want USAGE_WRITE for external objects, since I ++ * was assuming this would be handled through explicit syncs being imported ++ * to external BOs with DMA_BUF_IOCTL_IMPORT_SYNC_FILE, but other drivers ++ * seem to pass DMA_RESV_USAGE_WRITE, so there must be a good reason. ++ */ ++ panthor_vm_update_resvs(job->group->vm, exec, &sched_job->s_fence->finished, ++ DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_WRITE); ++} ++ ++void panthor_sched_unplug(struct panthor_device *ptdev) ++{ ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ ++ cancel_delayed_work_sync(&sched->tick_work); ++ ++ mutex_lock(&sched->lock); ++ if (sched->pm.has_ref) { ++ pm_runtime_put(ptdev->base.dev); ++ sched->pm.has_ref = false; ++ } ++ mutex_unlock(&sched->lock); ++} ++ ++static void panthor_sched_fini(struct drm_device *ddev, void *res) ++{ ++ struct panthor_scheduler *sched = res; ++ int prio; ++ ++ if (!sched || !sched->csg_slot_count) ++ return; ++ ++ cancel_delayed_work_sync(&sched->tick_work); ++ ++ if (sched->wq) ++ destroy_workqueue(sched->wq); ++ ++ if (sched->heap_alloc_wq) ++ destroy_workqueue(sched->heap_alloc_wq); ++ ++ for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) { ++ drm_WARN_ON(ddev, !list_empty(&sched->groups.runnable[prio])); ++ drm_WARN_ON(ddev, !list_empty(&sched->groups.idle[prio])); ++ } ++ ++ drm_WARN_ON(ddev, !list_empty(&sched->groups.waiting)); ++} ++ ++int panthor_sched_init(struct panthor_device *ptdev) ++{ ++ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); ++ struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, 0); ++ struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, 0, 0); ++ struct panthor_scheduler *sched; ++ u32 gpu_as_count, num_groups; ++ int prio, ret; ++ ++ sched = drmm_kzalloc(&ptdev->base, sizeof(*sched), GFP_KERNEL); ++ if (!sched) ++ return -ENOMEM; ++ ++ /* The highest bit in JOB_INT_* is reserved for globabl IRQs. That ++ * leaves 31 bits for CSG IRQs, hence the MAX_CSGS clamp here. ++ */ ++ num_groups = min_t(u32, MAX_CSGS, glb_iface->control->group_num); ++ ++ /* The FW-side scheduler might deadlock if two groups with the same ++ * priority try to access a set of resources that overlaps, with part ++ * of the resources being allocated to one group and the other part to ++ * the other group, both groups waiting for the remaining resources to ++ * be allocated. To avoid that, it is recommended to assign each CSG a ++ * different priority. In theory we could allow several groups to have ++ * the same CSG priority if they don't request the same resources, but ++ * that makes the scheduling logic more complicated, so let's clamp ++ * the number of CSG slots to MAX_CSG_PRIO + 1 for now. ++ */ ++ num_groups = min_t(u32, MAX_CSG_PRIO + 1, num_groups); ++ ++ /* We need at least one AS for the MCU and one for the GPU contexts. */ ++ gpu_as_count = hweight32(ptdev->gpu_info.as_present & GENMASK(31, 1)); ++ if (!gpu_as_count) { ++ drm_err(&ptdev->base, "Not enough AS (%d, expected at least 2)", ++ gpu_as_count + 1); ++ return -EINVAL; ++ } ++ ++ sched->ptdev = ptdev; ++ sched->sb_slot_count = CS_FEATURES_SCOREBOARDS(cs_iface->control->features); ++ sched->csg_slot_count = num_groups; ++ sched->cs_slot_count = csg_iface->control->stream_num; ++ sched->as_slot_count = gpu_as_count; ++ ptdev->csif_info.csg_slot_count = sched->csg_slot_count; ++ ptdev->csif_info.cs_slot_count = sched->cs_slot_count; ++ ptdev->csif_info.scoreboard_slot_count = sched->sb_slot_count; ++ ++ sched->last_tick = 0; ++ sched->resched_target = U64_MAX; ++ sched->tick_period = msecs_to_jiffies(10); ++ INIT_DELAYED_WORK(&sched->tick_work, tick_work); ++ INIT_WORK(&sched->sync_upd_work, sync_upd_work); ++ INIT_WORK(&sched->fw_events_work, process_fw_events_work); ++ ++ ret = drmm_mutex_init(&ptdev->base, &sched->lock); ++ if (ret) ++ return ret; ++ ++ for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) { ++ INIT_LIST_HEAD(&sched->groups.runnable[prio]); ++ INIT_LIST_HEAD(&sched->groups.idle[prio]); ++ } ++ INIT_LIST_HEAD(&sched->groups.waiting); ++ ++ ret = drmm_mutex_init(&ptdev->base, &sched->reset.lock); ++ if (ret) ++ return ret; ++ ++ INIT_LIST_HEAD(&sched->reset.stopped_groups); ++ ++ /* sched->heap_alloc_wq will be used for heap chunk allocation on ++ * tiler OOM events, which means we can't use the same workqueue for ++ * the scheduler because works queued by the scheduler are in ++ * the dma-signalling path. Allocate a dedicated heap_alloc_wq to ++ * work around this limitation. ++ * ++ * FIXME: Ultimately, what we need is a failable/non-blocking GEM ++ * allocation path that we can call when a heap OOM is reported. The ++ * FW is smart enough to fall back on other methods if the kernel can't ++ * allocate memory, and fail the tiling job if none of these ++ * countermeasures worked. ++ * ++ * Set WQ_MEM_RECLAIM on sched->wq to unblock the situation when the ++ * system is running out of memory. ++ */ ++ sched->heap_alloc_wq = alloc_workqueue("panthor-heap-alloc", WQ_UNBOUND, 0); ++ sched->wq = alloc_workqueue("panthor-csf-sched", WQ_MEM_RECLAIM | WQ_UNBOUND, 0); ++ if (!sched->wq || !sched->heap_alloc_wq) { ++ panthor_sched_fini(&ptdev->base, sched); ++ drm_err(&ptdev->base, "Failed to allocate the workqueues"); ++ return -ENOMEM; ++ } ++ ++ ret = drmm_add_action_or_reset(&ptdev->base, panthor_sched_fini, sched); ++ if (ret) ++ return ret; ++ ++ ptdev->scheduler = sched; ++ return 0; ++} +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_sched.h +@@ -0,0 +1,50 @@ ++/* SPDX-License-Identifier: GPL-2.0 or MIT */ ++/* Copyright 2023 Collabora ltd. */ ++ ++#ifndef __PANTHOR_SCHED_H__ ++#define __PANTHOR_SCHED_H__ ++ ++struct drm_exec; ++struct dma_fence; ++struct drm_file; ++struct drm_gem_object; ++struct drm_sched_job; ++struct drm_panthor_group_create; ++struct drm_panthor_queue_create; ++struct drm_panthor_group_get_state; ++struct drm_panthor_queue_submit; ++struct panthor_device; ++struct panthor_file; ++struct panthor_group_pool; ++struct panthor_job; ++ ++int panthor_group_create(struct panthor_file *pfile, ++ const struct drm_panthor_group_create *group_args, ++ const struct drm_panthor_queue_create *queue_args); ++int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle); ++int panthor_group_get_state(struct panthor_file *pfile, ++ struct drm_panthor_group_get_state *get_state); ++ ++struct drm_sched_job * ++panthor_job_create(struct panthor_file *pfile, ++ u16 group_handle, ++ const struct drm_panthor_queue_submit *qsubmit); ++struct drm_sched_job *panthor_job_get(struct drm_sched_job *job); ++struct panthor_vm *panthor_job_vm(struct drm_sched_job *sched_job); ++void panthor_job_put(struct drm_sched_job *job); ++void panthor_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *job); ++ ++int panthor_group_pool_create(struct panthor_file *pfile); ++void panthor_group_pool_destroy(struct panthor_file *pfile); ++ ++int panthor_sched_init(struct panthor_device *ptdev); ++void panthor_sched_unplug(struct panthor_device *ptdev); ++void panthor_sched_pre_reset(struct panthor_device *ptdev); ++void panthor_sched_post_reset(struct panthor_device *ptdev); ++void panthor_sched_suspend(struct panthor_device *ptdev); ++void panthor_sched_resume(struct panthor_device *ptdev); ++ ++void panthor_sched_report_mmu_fault(struct panthor_device *ptdev); ++void panthor_sched_report_fw_events(struct panthor_device *ptdev, u32 events); ++ ++#endif diff --git a/patches-6.6/034-40-v6.10-drm-panthor-Add-the-driver-frontend-block.patch b/patches-6.6/034-40-v6.10-drm-panthor-Add-the-driver-frontend-block.patch new file mode 100644 index 0000000..7a798a5 --- /dev/null +++ b/patches-6.6/034-40-v6.10-drm-panthor-Add-the-driver-frontend-block.patch @@ -0,0 +1,1534 @@ +From 4bdca11507928a4c9174e9b7240e9d058c12a71d Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Thu, 29 Feb 2024 17:22:25 +0100 +Subject: [PATCH] drm/panthor: Add the driver frontend block + +This is the last piece missing to expose the driver to the outside +world. + +This is basically a wrapper between the ioctls and the other logical +blocks. + +v6: +- Add Maxime's and Heiko's acks +- Return a page-aligned BO size to userspace +- Keep header inclusion alphabetically ordered + +v5: +- Account for the drm_exec_init() prototype change +- Include platform_device.h + +v4: +- Add an ioctl to let the UMD query the VM state +- Fix kernel doc +- Let panthor_device_init() call panthor_device_init() +- Fix cleanup ordering in the panthor_init() error path +- Add Steve's and Liviu's R-b + +v3: +- Add acks for the MIT/GPL2 relicensing +- Fix 32-bit support +- Account for panthor_vm and panthor_sched changes +- Simplify the resv preparation/update logic +- Use a linked list rather than xarray for list of signals. +- Simplify panthor_get_uobj_array by returning the newly allocated + array. +- Drop the "DOC" for job submission helpers and move the relevant + comments to panthor_ioctl_group_submit(). +- Add helpers sync_op_is_signal()/sync_op_is_wait(). +- Simplify return type of panthor_submit_ctx_add_sync_signal() and + panthor_submit_ctx_get_sync_signal(). +- Drop WARN_ON from panthor_submit_ctx_add_job(). +- Fix typos in comments. + +Co-developed-by: Steven Price +Signed-off-by: Steven Price +Signed-off-by: Boris Brezillon +Acked-by: Steven Price # MIT+GPL2 relicensing,Arm +Acked-by: Grant Likely # MIT+GPL2 relicensing,Linaro +Acked-by: Boris Brezillon # MIT+GPL2 relicensing,Collabora +Reviewed-by: Steven Price +Reviewed-by: Liviu Dudau +Acked-by: Maxime Ripard +Acked-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-12-boris.brezillon@collabora.com +--- + drivers/gpu/drm/panthor/panthor_drv.c | 1473 +++++++++++++++++++++++++ + 1 file changed, 1473 insertions(+) + create mode 100644 drivers/gpu/drm/panthor/panthor_drv.c + +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_drv.c +@@ -0,0 +1,1472 @@ ++// SPDX-License-Identifier: GPL-2.0 or MIT ++/* Copyright 2018 Marty E. Plummer */ ++/* Copyright 2019 Linaro, Ltd., Rob Herring */ ++/* Copyright 2019 Collabora ltd. */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "panthor_device.h" ++#include "panthor_fw.h" ++#include "panthor_gem.h" ++#include "panthor_gpu.h" ++#include "panthor_heap.h" ++#include "panthor_mmu.h" ++#include "panthor_regs.h" ++#include "panthor_sched.h" ++ ++/** ++ * DOC: user <-> kernel object copy helpers. ++ */ ++ ++/** ++ * panthor_set_uobj() - Copy kernel object to user object. ++ * @usr_ptr: Users pointer. ++ * @usr_size: Size of the user object. ++ * @min_size: Minimum size for this object. ++ * @kern_size: Size of the kernel object. ++ * @in: Address of the kernel object to copy. ++ * ++ * Helper automating kernel -> user object copies. ++ * ++ * Don't use this function directly, use PANTHOR_UOBJ_SET() instead. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++static int ++panthor_set_uobj(u64 usr_ptr, u32 usr_size, u32 min_size, u32 kern_size, const void *in) ++{ ++ /* User size shouldn't be smaller than the minimal object size. */ ++ if (usr_size < min_size) ++ return -EINVAL; ++ ++ if (copy_to_user(u64_to_user_ptr(usr_ptr), in, min_t(u32, usr_size, kern_size))) ++ return -EFAULT; ++ ++ /* When the kernel object is smaller than the user object, we fill the gap with ++ * zeros. ++ */ ++ if (usr_size > kern_size && ++ clear_user(u64_to_user_ptr(usr_ptr + kern_size), usr_size - kern_size)) { ++ return -EFAULT; ++ } ++ ++ return 0; ++} ++ ++/** ++ * panthor_get_uobj_array() - Copy a user object array into a kernel accessible object array. ++ * @in: The object array to copy. ++ * @min_stride: Minimum array stride. ++ * @obj_size: Kernel object size. ++ * ++ * Helper automating user -> kernel object copies. ++ * ++ * Don't use this function directly, use PANTHOR_UOBJ_GET_ARRAY() instead. ++ * ++ * Return: newly allocated object array or an ERR_PTR on error. ++ */ ++static void * ++panthor_get_uobj_array(const struct drm_panthor_obj_array *in, u32 min_stride, ++ u32 obj_size) ++{ ++ int ret = 0; ++ void *out_alloc; ++ ++ /* User stride must be at least the minimum object size, otherwise it might ++ * lack useful information. ++ */ ++ if (in->stride < min_stride) ++ return ERR_PTR(-EINVAL); ++ ++ if (!in->count) ++ return NULL; ++ ++ out_alloc = kvmalloc_array(in->count, obj_size, GFP_KERNEL); ++ if (!out_alloc) ++ return ERR_PTR(-ENOMEM); ++ ++ if (obj_size == in->stride) { ++ /* Fast path when user/kernel have the same uAPI header version. */ ++ if (copy_from_user(out_alloc, u64_to_user_ptr(in->array), ++ (unsigned long)obj_size * in->count)) ++ ret = -EFAULT; ++ } else { ++ void __user *in_ptr = u64_to_user_ptr(in->array); ++ void *out_ptr = out_alloc; ++ ++ /* If the sizes differ, we need to copy elements one by one. */ ++ for (u32 i = 0; i < in->count; i++) { ++ ret = copy_struct_from_user(out_ptr, obj_size, in_ptr, in->stride); ++ if (ret) ++ break; ++ ++ out_ptr += obj_size; ++ in_ptr += in->stride; ++ } ++ } ++ ++ if (ret) { ++ kvfree(out_alloc); ++ return ERR_PTR(ret); ++ } ++ ++ return out_alloc; ++} ++ ++/** ++ * PANTHOR_UOBJ_MIN_SIZE_INTERNAL() - Get the minimum user object size ++ * @_typename: Object type. ++ * @_last_mandatory_field: Last mandatory field. ++ * ++ * Get the minimum user object size based on the last mandatory field name, ++ * A.K.A, the name of the last field of the structure at the time this ++ * structure was added to the uAPI. ++ * ++ * Don't use directly, use PANTHOR_UOBJ_DECL() instead. ++ */ ++#define PANTHOR_UOBJ_MIN_SIZE_INTERNAL(_typename, _last_mandatory_field) \ ++ (offsetof(_typename, _last_mandatory_field) + \ ++ sizeof(((_typename *)NULL)->_last_mandatory_field)) ++ ++/** ++ * PANTHOR_UOBJ_DECL() - Declare a new uAPI object whose subject to ++ * evolutions. ++ * @_typename: Object type. ++ * @_last_mandatory_field: Last mandatory field. ++ * ++ * Should be used to extend the PANTHOR_UOBJ_MIN_SIZE() list. ++ */ ++#define PANTHOR_UOBJ_DECL(_typename, _last_mandatory_field) \ ++ _typename : PANTHOR_UOBJ_MIN_SIZE_INTERNAL(_typename, _last_mandatory_field) ++ ++/** ++ * PANTHOR_UOBJ_MIN_SIZE() - Get the minimum size of a given uAPI object ++ * @_obj_name: Object to get the minimum size of. ++ * ++ * Don't use this macro directly, it's automatically called by ++ * PANTHOR_UOBJ_{SET,GET_ARRAY}(). ++ */ ++#define PANTHOR_UOBJ_MIN_SIZE(_obj_name) \ ++ _Generic(_obj_name, \ ++ PANTHOR_UOBJ_DECL(struct drm_panthor_gpu_info, tiler_present), \ ++ PANTHOR_UOBJ_DECL(struct drm_panthor_csif_info, pad), \ ++ PANTHOR_UOBJ_DECL(struct drm_panthor_sync_op, timeline_value), \ ++ PANTHOR_UOBJ_DECL(struct drm_panthor_queue_submit, syncs), \ ++ PANTHOR_UOBJ_DECL(struct drm_panthor_queue_create, ringbuf_size), \ ++ PANTHOR_UOBJ_DECL(struct drm_panthor_vm_bind_op, syncs)) ++ ++/** ++ * PANTHOR_UOBJ_SET() - Copy a kernel object to a user object. ++ * @_dest_usr_ptr: User pointer to copy to. ++ * @_usr_size: Size of the user object. ++ * @_src_obj: Kernel object to copy (not a pointer). ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++#define PANTHOR_UOBJ_SET(_dest_usr_ptr, _usr_size, _src_obj) \ ++ panthor_set_uobj(_dest_usr_ptr, _usr_size, \ ++ PANTHOR_UOBJ_MIN_SIZE(_src_obj), \ ++ sizeof(_src_obj), &(_src_obj)) ++ ++/** ++ * PANTHOR_UOBJ_GET_ARRAY() - Copy a user object array to a kernel accessible ++ * object array. ++ * @_dest_array: Local variable that will hold the newly allocated kernel ++ * object array. ++ * @_uobj_array: The drm_panthor_obj_array object describing the user object ++ * array. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++#define PANTHOR_UOBJ_GET_ARRAY(_dest_array, _uobj_array) \ ++ ({ \ ++ typeof(_dest_array) _tmp; \ ++ _tmp = panthor_get_uobj_array(_uobj_array, \ ++ PANTHOR_UOBJ_MIN_SIZE((_dest_array)[0]), \ ++ sizeof((_dest_array)[0])); \ ++ if (!IS_ERR(_tmp)) \ ++ _dest_array = _tmp; \ ++ PTR_ERR_OR_ZERO(_tmp); \ ++ }) ++ ++/** ++ * struct panthor_sync_signal - Represent a synchronization object point to attach ++ * our job fence to. ++ * ++ * This structure is here to keep track of fences that are currently bound to ++ * a specific syncobj point. ++ * ++ * At the beginning of a job submission, the fence ++ * is retrieved from the syncobj itself, and can be NULL if no fence was attached ++ * to this point. ++ * ++ * At the end, it points to the fence of the last job that had a ++ * %DRM_PANTHOR_SYNC_OP_SIGNAL on this syncobj. ++ * ++ * With jobs being submitted in batches, the fence might change several times during ++ * the process, allowing one job to wait on a job that's part of the same submission ++ * but appears earlier in the drm_panthor_group_submit::queue_submits array. ++ */ ++struct panthor_sync_signal { ++ /** @node: list_head to track signal ops within a submit operation */ ++ struct list_head node; ++ ++ /** @handle: The syncobj handle. */ ++ u32 handle; ++ ++ /** ++ * @point: The syncobj point. ++ * ++ * Zero for regular syncobjs, and non-zero for timeline syncobjs. ++ */ ++ u64 point; ++ ++ /** ++ * @syncobj: The sync object pointed by @handle. ++ */ ++ struct drm_syncobj *syncobj; ++ ++ /** ++ * @chain: Chain object used to link the new fence to an existing ++ * timeline syncobj. ++ * ++ * NULL for regular syncobj, non-NULL for timeline syncobjs. ++ */ ++ struct dma_fence_chain *chain; ++ ++ /** ++ * @fence: The fence to assign to the syncobj or syncobj-point. ++ */ ++ struct dma_fence *fence; ++}; ++ ++/** ++ * struct panthor_job_ctx - Job context ++ */ ++struct panthor_job_ctx { ++ /** @job: The job that is about to be submitted to drm_sched. */ ++ struct drm_sched_job *job; ++ ++ /** @syncops: Array of sync operations. */ ++ struct drm_panthor_sync_op *syncops; ++ ++ /** @syncop_count: Number of sync operations. */ ++ u32 syncop_count; ++}; ++ ++/** ++ * struct panthor_submit_ctx - Submission context ++ * ++ * Anything that's related to a submission (%DRM_IOCTL_PANTHOR_VM_BIND or ++ * %DRM_IOCTL_PANTHOR_GROUP_SUBMIT) is kept here, so we can automate the ++ * initialization and cleanup steps. ++ */ ++struct panthor_submit_ctx { ++ /** @file: DRM file this submission happens on. */ ++ struct drm_file *file; ++ ++ /** ++ * @signals: List of struct panthor_sync_signal. ++ * ++ * %DRM_PANTHOR_SYNC_OP_SIGNAL operations will be recorded here, ++ * and %DRM_PANTHOR_SYNC_OP_WAIT will first check if an entry ++ * matching the syncobj+point exists before calling ++ * drm_syncobj_find_fence(). This allows us to describe dependencies ++ * existing between jobs that are part of the same batch. ++ */ ++ struct list_head signals; ++ ++ /** @jobs: Array of jobs. */ ++ struct panthor_job_ctx *jobs; ++ ++ /** @job_count: Number of entries in the @jobs array. */ ++ u32 job_count; ++ ++ /** @exec: drm_exec context used to acquire and prepare resv objects. */ ++ struct drm_exec exec; ++}; ++ ++#define PANTHOR_SYNC_OP_FLAGS_MASK \ ++ (DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK | DRM_PANTHOR_SYNC_OP_SIGNAL) ++ ++static bool sync_op_is_signal(const struct drm_panthor_sync_op *sync_op) ++{ ++ return !!(sync_op->flags & DRM_PANTHOR_SYNC_OP_SIGNAL); ++} ++ ++static bool sync_op_is_wait(const struct drm_panthor_sync_op *sync_op) ++{ ++ /* Note that DRM_PANTHOR_SYNC_OP_WAIT == 0 */ ++ return !(sync_op->flags & DRM_PANTHOR_SYNC_OP_SIGNAL); ++} ++ ++/** ++ * panthor_check_sync_op() - Check drm_panthor_sync_op fields ++ * @sync_op: The sync operation to check. ++ * ++ * Return: 0 on success, -EINVAL otherwise. ++ */ ++static int ++panthor_check_sync_op(const struct drm_panthor_sync_op *sync_op) ++{ ++ u8 handle_type; ++ ++ if (sync_op->flags & ~PANTHOR_SYNC_OP_FLAGS_MASK) ++ return -EINVAL; ++ ++ handle_type = sync_op->flags & DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK; ++ if (handle_type != DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ && ++ handle_type != DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ) ++ return -EINVAL; ++ ++ if (handle_type == DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ && ++ sync_op->timeline_value != 0) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++/** ++ * panthor_sync_signal_free() - Release resources and free a panthor_sync_signal object ++ * @sig_sync: Signal object to free. ++ */ ++static void ++panthor_sync_signal_free(struct panthor_sync_signal *sig_sync) ++{ ++ if (!sig_sync) ++ return; ++ ++ drm_syncobj_put(sig_sync->syncobj); ++ dma_fence_chain_free(sig_sync->chain); ++ dma_fence_put(sig_sync->fence); ++ kfree(sig_sync); ++} ++ ++/** ++ * panthor_submit_ctx_add_sync_signal() - Add a signal operation to a submit context ++ * @ctx: Context to add the signal operation to. ++ * @handle: Syncobj handle. ++ * @point: Syncobj point. ++ * ++ * Return: 0 on success, otherwise negative error value. ++ */ ++static int ++panthor_submit_ctx_add_sync_signal(struct panthor_submit_ctx *ctx, u32 handle, u64 point) ++{ ++ struct panthor_sync_signal *sig_sync; ++ struct dma_fence *cur_fence; ++ int ret; ++ ++ sig_sync = kzalloc(sizeof(*sig_sync), GFP_KERNEL); ++ if (!sig_sync) ++ return -ENOMEM; ++ ++ sig_sync->handle = handle; ++ sig_sync->point = point; ++ ++ if (point > 0) { ++ sig_sync->chain = dma_fence_chain_alloc(); ++ if (!sig_sync->chain) { ++ ret = -ENOMEM; ++ goto err_free_sig_sync; ++ } ++ } ++ ++ sig_sync->syncobj = drm_syncobj_find(ctx->file, handle); ++ if (!sig_sync->syncobj) { ++ ret = -EINVAL; ++ goto err_free_sig_sync; ++ } ++ ++ /* Retrieve the current fence attached to that point. It's ++ * perfectly fine to get a NULL fence here, it just means there's ++ * no fence attached to that point yet. ++ */ ++ if (!drm_syncobj_find_fence(ctx->file, handle, point, 0, &cur_fence)) ++ sig_sync->fence = cur_fence; ++ ++ list_add_tail(&sig_sync->node, &ctx->signals); ++ ++ return 0; ++ ++err_free_sig_sync: ++ panthor_sync_signal_free(sig_sync); ++ return ret; ++} ++ ++/** ++ * panthor_submit_ctx_search_sync_signal() - Search an existing signal operation in a ++ * submit context. ++ * @ctx: Context to search the signal operation in. ++ * @handle: Syncobj handle. ++ * @point: Syncobj point. ++ * ++ * Return: A valid panthor_sync_signal object if found, NULL otherwise. ++ */ ++static struct panthor_sync_signal * ++panthor_submit_ctx_search_sync_signal(struct panthor_submit_ctx *ctx, u32 handle, u64 point) ++{ ++ struct panthor_sync_signal *sig_sync; ++ ++ list_for_each_entry(sig_sync, &ctx->signals, node) { ++ if (handle == sig_sync->handle && point == sig_sync->point) ++ return sig_sync; ++ } ++ ++ return NULL; ++} ++ ++/** ++ * panthor_submit_ctx_add_job() - Add a job to a submit context ++ * @ctx: Context to search the signal operation in. ++ * @idx: Index of the job in the context. ++ * @job: Job to add. ++ * @syncs: Sync operations provided by userspace. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++static int ++panthor_submit_ctx_add_job(struct panthor_submit_ctx *ctx, u32 idx, ++ struct drm_sched_job *job, ++ const struct drm_panthor_obj_array *syncs) ++{ ++ int ret; ++ ++ ctx->jobs[idx].job = job; ++ ++ ret = PANTHOR_UOBJ_GET_ARRAY(ctx->jobs[idx].syncops, syncs); ++ if (ret) ++ return ret; ++ ++ ctx->jobs[idx].syncop_count = syncs->count; ++ return 0; ++} ++ ++/** ++ * panthor_submit_ctx_get_sync_signal() - Search signal operation and add one if none was found. ++ * @ctx: Context to search the signal operation in. ++ * @handle: Syncobj handle. ++ * @point: Syncobj point. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++static int ++panthor_submit_ctx_get_sync_signal(struct panthor_submit_ctx *ctx, u32 handle, u64 point) ++{ ++ struct panthor_sync_signal *sig_sync; ++ ++ sig_sync = panthor_submit_ctx_search_sync_signal(ctx, handle, point); ++ if (sig_sync) ++ return 0; ++ ++ return panthor_submit_ctx_add_sync_signal(ctx, handle, point); ++} ++ ++/** ++ * panthor_submit_ctx_update_job_sync_signal_fences() - Update fences ++ * on the signal operations specified by a job. ++ * @ctx: Context to search the signal operation in. ++ * @job_idx: Index of the job to operate on. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++static int ++panthor_submit_ctx_update_job_sync_signal_fences(struct panthor_submit_ctx *ctx, ++ u32 job_idx) ++{ ++ struct panthor_device *ptdev = container_of(ctx->file->minor->dev, ++ struct panthor_device, ++ base); ++ struct dma_fence *done_fence = &ctx->jobs[job_idx].job->s_fence->finished; ++ const struct drm_panthor_sync_op *sync_ops = ctx->jobs[job_idx].syncops; ++ u32 sync_op_count = ctx->jobs[job_idx].syncop_count; ++ ++ for (u32 i = 0; i < sync_op_count; i++) { ++ struct dma_fence *old_fence; ++ struct panthor_sync_signal *sig_sync; ++ ++ if (!sync_op_is_signal(&sync_ops[i])) ++ continue; ++ ++ sig_sync = panthor_submit_ctx_search_sync_signal(ctx, sync_ops[i].handle, ++ sync_ops[i].timeline_value); ++ if (drm_WARN_ON(&ptdev->base, !sig_sync)) ++ return -EINVAL; ++ ++ old_fence = sig_sync->fence; ++ sig_sync->fence = dma_fence_get(done_fence); ++ dma_fence_put(old_fence); ++ ++ if (drm_WARN_ON(&ptdev->base, !sig_sync->fence)) ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++/** ++ * panthor_submit_ctx_collect_job_signal_ops() - Iterate over all job signal operations ++ * and add them to the context. ++ * @ctx: Context to search the signal operation in. ++ * @job_idx: Index of the job to operate on. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++static int ++panthor_submit_ctx_collect_job_signal_ops(struct panthor_submit_ctx *ctx, ++ u32 job_idx) ++{ ++ const struct drm_panthor_sync_op *sync_ops = ctx->jobs[job_idx].syncops; ++ u32 sync_op_count = ctx->jobs[job_idx].syncop_count; ++ ++ for (u32 i = 0; i < sync_op_count; i++) { ++ int ret; ++ ++ if (!sync_op_is_signal(&sync_ops[i])) ++ continue; ++ ++ ret = panthor_check_sync_op(&sync_ops[i]); ++ if (ret) ++ return ret; ++ ++ ret = panthor_submit_ctx_get_sync_signal(ctx, ++ sync_ops[i].handle, ++ sync_ops[i].timeline_value); ++ if (ret) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++/** ++ * panthor_submit_ctx_push_fences() - Iterate over the signal array, and for each entry, push ++ * the currently assigned fence to the associated syncobj. ++ * @ctx: Context to push fences on. ++ * ++ * This is the last step of a submission procedure, and is done once we know the submission ++ * is effective and job fences are guaranteed to be signaled in finite time. ++ */ ++static void ++panthor_submit_ctx_push_fences(struct panthor_submit_ctx *ctx) ++{ ++ struct panthor_sync_signal *sig_sync; ++ ++ list_for_each_entry(sig_sync, &ctx->signals, node) { ++ if (sig_sync->chain) { ++ drm_syncobj_add_point(sig_sync->syncobj, sig_sync->chain, ++ sig_sync->fence, sig_sync->point); ++ sig_sync->chain = NULL; ++ } else { ++ drm_syncobj_replace_fence(sig_sync->syncobj, sig_sync->fence); ++ } ++ } ++} ++ ++/** ++ * panthor_submit_ctx_add_sync_deps_to_job() - Add sync wait operations as ++ * job dependencies. ++ * @ctx: Submit context. ++ * @job_idx: Index of the job to operate on. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++static int ++panthor_submit_ctx_add_sync_deps_to_job(struct panthor_submit_ctx *ctx, ++ u32 job_idx) ++{ ++ struct panthor_device *ptdev = container_of(ctx->file->minor->dev, ++ struct panthor_device, ++ base); ++ const struct drm_panthor_sync_op *sync_ops = ctx->jobs[job_idx].syncops; ++ struct drm_sched_job *job = ctx->jobs[job_idx].job; ++ u32 sync_op_count = ctx->jobs[job_idx].syncop_count; ++ int ret = 0; ++ ++ for (u32 i = 0; i < sync_op_count; i++) { ++ struct panthor_sync_signal *sig_sync; ++ struct dma_fence *fence; ++ ++ if (!sync_op_is_wait(&sync_ops[i])) ++ continue; ++ ++ ret = panthor_check_sync_op(&sync_ops[i]); ++ if (ret) ++ return ret; ++ ++ sig_sync = panthor_submit_ctx_search_sync_signal(ctx, sync_ops[i].handle, ++ sync_ops[i].timeline_value); ++ if (sig_sync) { ++ if (drm_WARN_ON(&ptdev->base, !sig_sync->fence)) ++ return -EINVAL; ++ ++ fence = dma_fence_get(sig_sync->fence); ++ } else { ++ ret = drm_syncobj_find_fence(ctx->file, sync_ops[i].handle, ++ sync_ops[i].timeline_value, ++ 0, &fence); ++ if (ret) ++ return ret; ++ } ++ ++ ret = drm_sched_job_add_dependency(job, fence); ++ if (ret) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++/** ++ * panthor_submit_ctx_collect_jobs_signal_ops() - Collect all signal operations ++ * and add them to the submit context. ++ * @ctx: Submit context. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++static int ++panthor_submit_ctx_collect_jobs_signal_ops(struct panthor_submit_ctx *ctx) ++{ ++ for (u32 i = 0; i < ctx->job_count; i++) { ++ int ret; ++ ++ ret = panthor_submit_ctx_collect_job_signal_ops(ctx, i); ++ if (ret) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++/** ++ * panthor_submit_ctx_add_deps_and_arm_jobs() - Add jobs dependencies and arm jobs ++ * @ctx: Submit context. ++ * ++ * Must be called after the resv preparation has been taken care of. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++static int ++panthor_submit_ctx_add_deps_and_arm_jobs(struct panthor_submit_ctx *ctx) ++{ ++ for (u32 i = 0; i < ctx->job_count; i++) { ++ int ret; ++ ++ ret = panthor_submit_ctx_add_sync_deps_to_job(ctx, i); ++ if (ret) ++ return ret; ++ ++ drm_sched_job_arm(ctx->jobs[i].job); ++ ++ ret = panthor_submit_ctx_update_job_sync_signal_fences(ctx, i); ++ if (ret) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++/** ++ * panthor_submit_ctx_push_jobs() - Push jobs to their scheduling entities. ++ * @ctx: Submit context. ++ * @upd_resvs: Callback used to update reservation objects that were previously ++ * preapred. ++ */ ++static void ++panthor_submit_ctx_push_jobs(struct panthor_submit_ctx *ctx, ++ void (*upd_resvs)(struct drm_exec *, struct drm_sched_job *)) ++{ ++ for (u32 i = 0; i < ctx->job_count; i++) { ++ upd_resvs(&ctx->exec, ctx->jobs[i].job); ++ drm_sched_entity_push_job(ctx->jobs[i].job); ++ ++ /* Job is owned by the scheduler now. */ ++ ctx->jobs[i].job = NULL; ++ } ++ ++ panthor_submit_ctx_push_fences(ctx); ++} ++ ++/** ++ * panthor_submit_ctx_init() - Initializes a submission context ++ * @ctx: Submit context to initialize. ++ * @file: drm_file this submission happens on. ++ * @job_count: Number of jobs that will be submitted. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++static int panthor_submit_ctx_init(struct panthor_submit_ctx *ctx, ++ struct drm_file *file, u32 job_count) ++{ ++ ctx->jobs = kvmalloc_array(job_count, sizeof(*ctx->jobs), ++ GFP_KERNEL | __GFP_ZERO); ++ if (!ctx->jobs) ++ return -ENOMEM; ++ ++ ctx->file = file; ++ ctx->job_count = job_count; ++ INIT_LIST_HEAD(&ctx->signals); ++ drm_exec_init(&ctx->exec, ++ DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES); ++ return 0; ++} ++ ++/** ++ * panthor_submit_ctx_cleanup() - Cleanup a submission context ++ * @ctx: Submit context to cleanup. ++ * @job_put: Job put callback. ++ */ ++static void panthor_submit_ctx_cleanup(struct panthor_submit_ctx *ctx, ++ void (*job_put)(struct drm_sched_job *)) ++{ ++ struct panthor_sync_signal *sig_sync, *tmp; ++ unsigned long i; ++ ++ drm_exec_fini(&ctx->exec); ++ ++ list_for_each_entry_safe(sig_sync, tmp, &ctx->signals, node) ++ panthor_sync_signal_free(sig_sync); ++ ++ for (i = 0; i < ctx->job_count; i++) { ++ job_put(ctx->jobs[i].job); ++ kvfree(ctx->jobs[i].syncops); ++ } ++ ++ kvfree(ctx->jobs); ++} ++ ++static int panthor_ioctl_dev_query(struct drm_device *ddev, void *data, struct drm_file *file) ++{ ++ struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base); ++ struct drm_panthor_dev_query *args = data; ++ ++ if (!args->pointer) { ++ switch (args->type) { ++ case DRM_PANTHOR_DEV_QUERY_GPU_INFO: ++ args->size = sizeof(ptdev->gpu_info); ++ return 0; ++ ++ case DRM_PANTHOR_DEV_QUERY_CSIF_INFO: ++ args->size = sizeof(ptdev->csif_info); ++ return 0; ++ ++ default: ++ return -EINVAL; ++ } ++ } ++ ++ switch (args->type) { ++ case DRM_PANTHOR_DEV_QUERY_GPU_INFO: ++ return PANTHOR_UOBJ_SET(args->pointer, args->size, ptdev->gpu_info); ++ ++ case DRM_PANTHOR_DEV_QUERY_CSIF_INFO: ++ return PANTHOR_UOBJ_SET(args->pointer, args->size, ptdev->csif_info); ++ ++ default: ++ return -EINVAL; ++ } ++} ++ ++#define PANTHOR_VM_CREATE_FLAGS 0 ++ ++static int panthor_ioctl_vm_create(struct drm_device *ddev, void *data, ++ struct drm_file *file) ++{ ++ struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base); ++ struct panthor_file *pfile = file->driver_priv; ++ struct drm_panthor_vm_create *args = data; ++ int cookie, ret; ++ ++ if (!drm_dev_enter(ddev, &cookie)) ++ return -ENODEV; ++ ++ ret = panthor_vm_pool_create_vm(ptdev, pfile->vms, args); ++ if (ret >= 0) { ++ args->id = ret; ++ ret = 0; ++ } ++ ++ drm_dev_exit(cookie); ++ return ret; ++} ++ ++static int panthor_ioctl_vm_destroy(struct drm_device *ddev, void *data, ++ struct drm_file *file) ++{ ++ struct panthor_file *pfile = file->driver_priv; ++ struct drm_panthor_vm_destroy *args = data; ++ ++ if (args->pad) ++ return -EINVAL; ++ ++ return panthor_vm_pool_destroy_vm(pfile->vms, args->id); ++} ++ ++#define PANTHOR_BO_FLAGS DRM_PANTHOR_BO_NO_MMAP ++ ++static int panthor_ioctl_bo_create(struct drm_device *ddev, void *data, ++ struct drm_file *file) ++{ ++ struct panthor_file *pfile = file->driver_priv; ++ struct drm_panthor_bo_create *args = data; ++ struct panthor_vm *vm = NULL; ++ int cookie, ret; ++ ++ if (!drm_dev_enter(ddev, &cookie)) ++ return -ENODEV; ++ ++ if (!args->size || args->pad || ++ (args->flags & ~PANTHOR_BO_FLAGS)) { ++ ret = -EINVAL; ++ goto out_dev_exit; ++ } ++ ++ if (args->exclusive_vm_id) { ++ vm = panthor_vm_pool_get_vm(pfile->vms, args->exclusive_vm_id); ++ if (!vm) { ++ ret = -EINVAL; ++ goto out_dev_exit; ++ } ++ } ++ ++ ret = panthor_gem_create_with_handle(file, ddev, vm, &args->size, ++ args->flags, &args->handle); ++ ++ panthor_vm_put(vm); ++ ++out_dev_exit: ++ drm_dev_exit(cookie); ++ return ret; ++} ++ ++static int panthor_ioctl_bo_mmap_offset(struct drm_device *ddev, void *data, ++ struct drm_file *file) ++{ ++ struct drm_panthor_bo_mmap_offset *args = data; ++ struct drm_gem_object *obj; ++ int ret; ++ ++ if (args->pad) ++ return -EINVAL; ++ ++ obj = drm_gem_object_lookup(file, args->handle); ++ if (!obj) ++ return -ENOENT; ++ ++ ret = drm_gem_create_mmap_offset(obj); ++ if (ret) ++ goto out; ++ ++ args->offset = drm_vma_node_offset_addr(&obj->vma_node); ++ ++out: ++ drm_gem_object_put(obj); ++ return ret; ++} ++ ++static int panthor_ioctl_group_submit(struct drm_device *ddev, void *data, ++ struct drm_file *file) ++{ ++ struct panthor_file *pfile = file->driver_priv; ++ struct drm_panthor_group_submit *args = data; ++ struct drm_panthor_queue_submit *jobs_args; ++ struct panthor_submit_ctx ctx; ++ int ret = 0, cookie; ++ ++ if (args->pad) ++ return -EINVAL; ++ ++ if (!drm_dev_enter(ddev, &cookie)) ++ return -ENODEV; ++ ++ ret = PANTHOR_UOBJ_GET_ARRAY(jobs_args, &args->queue_submits); ++ if (ret) ++ goto out_dev_exit; ++ ++ ret = panthor_submit_ctx_init(&ctx, file, args->queue_submits.count); ++ if (ret) ++ goto out_free_jobs_args; ++ ++ /* Create jobs and attach sync operations */ ++ for (u32 i = 0; i < args->queue_submits.count; i++) { ++ const struct drm_panthor_queue_submit *qsubmit = &jobs_args[i]; ++ struct drm_sched_job *job; ++ ++ job = panthor_job_create(pfile, args->group_handle, qsubmit); ++ if (IS_ERR(job)) { ++ ret = PTR_ERR(job); ++ goto out_cleanup_submit_ctx; ++ } ++ ++ ret = panthor_submit_ctx_add_job(&ctx, i, job, &qsubmit->syncs); ++ if (ret) ++ goto out_cleanup_submit_ctx; ++ } ++ ++ /* ++ * Collect signal operations on all jobs, such that each job can pick ++ * from it for its dependencies and update the fence to signal when the ++ * job is submitted. ++ */ ++ ret = panthor_submit_ctx_collect_jobs_signal_ops(&ctx); ++ if (ret) ++ goto out_cleanup_submit_ctx; ++ ++ /* ++ * We acquire/prepare revs on all jobs before proceeding with the ++ * dependency registration. ++ * ++ * This is solving two problems: ++ * 1. drm_sched_job_arm() and drm_sched_entity_push_job() must be ++ * protected by a lock to make sure no concurrent access to the same ++ * entity get interleaved, which would mess up with the fence seqno ++ * ordering. Luckily, one of the resv being acquired is the VM resv, ++ * and a scheduling entity is only bound to a single VM. As soon as ++ * we acquire the VM resv, we should be safe. ++ * 2. Jobs might depend on fences that were issued by previous jobs in ++ * the same batch, so we can't add dependencies on all jobs before ++ * arming previous jobs and registering the fence to the signal ++ * array, otherwise we might miss dependencies, or point to an ++ * outdated fence. ++ */ ++ if (args->queue_submits.count > 0) { ++ /* All jobs target the same group, so they also point to the same VM. */ ++ struct panthor_vm *vm = panthor_job_vm(ctx.jobs[0].job); ++ ++ drm_exec_until_all_locked(&ctx.exec) { ++ ret = panthor_vm_prepare_mapped_bos_resvs(&ctx.exec, vm, ++ args->queue_submits.count); ++ } ++ ++ if (ret) ++ goto out_cleanup_submit_ctx; ++ } ++ ++ /* ++ * Now that resvs are locked/prepared, we can iterate over each job to ++ * add the dependencies, arm the job fence, register the job fence to ++ * the signal array. ++ */ ++ ret = panthor_submit_ctx_add_deps_and_arm_jobs(&ctx); ++ if (ret) ++ goto out_cleanup_submit_ctx; ++ ++ /* Nothing can fail after that point, so we can make our job fences ++ * visible to the outside world. Push jobs and set the job fences to ++ * the resv slots we reserved. This also pushes the fences to the ++ * syncobjs that are part of the signal array. ++ */ ++ panthor_submit_ctx_push_jobs(&ctx, panthor_job_update_resvs); ++ ++out_cleanup_submit_ctx: ++ panthor_submit_ctx_cleanup(&ctx, panthor_job_put); ++ ++out_free_jobs_args: ++ kvfree(jobs_args); ++ ++out_dev_exit: ++ drm_dev_exit(cookie); ++ return ret; ++} ++ ++static int panthor_ioctl_group_destroy(struct drm_device *ddev, void *data, ++ struct drm_file *file) ++{ ++ struct panthor_file *pfile = file->driver_priv; ++ struct drm_panthor_group_destroy *args = data; ++ ++ if (args->pad) ++ return -EINVAL; ++ ++ return panthor_group_destroy(pfile, args->group_handle); ++} ++ ++static int panthor_ioctl_group_create(struct drm_device *ddev, void *data, ++ struct drm_file *file) ++{ ++ struct panthor_file *pfile = file->driver_priv; ++ struct drm_panthor_group_create *args = data; ++ struct drm_panthor_queue_create *queue_args; ++ int ret; ++ ++ if (!args->queues.count) ++ return -EINVAL; ++ ++ ret = PANTHOR_UOBJ_GET_ARRAY(queue_args, &args->queues); ++ if (ret) ++ return ret; ++ ++ ret = panthor_group_create(pfile, args, queue_args); ++ if (ret >= 0) { ++ args->group_handle = ret; ++ ret = 0; ++ } ++ ++ kvfree(queue_args); ++ return ret; ++} ++ ++static int panthor_ioctl_group_get_state(struct drm_device *ddev, void *data, ++ struct drm_file *file) ++{ ++ struct panthor_file *pfile = file->driver_priv; ++ struct drm_panthor_group_get_state *args = data; ++ ++ return panthor_group_get_state(pfile, args); ++} ++ ++static int panthor_ioctl_tiler_heap_create(struct drm_device *ddev, void *data, ++ struct drm_file *file) ++{ ++ struct panthor_file *pfile = file->driver_priv; ++ struct drm_panthor_tiler_heap_create *args = data; ++ struct panthor_heap_pool *pool; ++ struct panthor_vm *vm; ++ int ret; ++ ++ vm = panthor_vm_pool_get_vm(pfile->vms, args->vm_id); ++ if (!vm) ++ return -EINVAL; ++ ++ pool = panthor_vm_get_heap_pool(vm, true); ++ if (IS_ERR(pool)) { ++ ret = PTR_ERR(pool); ++ goto out_put_vm; ++ } ++ ++ ret = panthor_heap_create(pool, ++ args->initial_chunk_count, ++ args->chunk_size, ++ args->max_chunks, ++ args->target_in_flight, ++ &args->tiler_heap_ctx_gpu_va, ++ &args->first_heap_chunk_gpu_va); ++ if (ret < 0) ++ goto out_put_heap_pool; ++ ++ /* Heap pools are per-VM. We combine the VM and HEAP id to make ++ * a unique heap handle. ++ */ ++ args->handle = (args->vm_id << 16) | ret; ++ ret = 0; ++ ++out_put_heap_pool: ++ panthor_heap_pool_put(pool); ++ ++out_put_vm: ++ panthor_vm_put(vm); ++ return ret; ++} ++ ++static int panthor_ioctl_tiler_heap_destroy(struct drm_device *ddev, void *data, ++ struct drm_file *file) ++{ ++ struct panthor_file *pfile = file->driver_priv; ++ struct drm_panthor_tiler_heap_destroy *args = data; ++ struct panthor_heap_pool *pool; ++ struct panthor_vm *vm; ++ int ret; ++ ++ if (args->pad) ++ return -EINVAL; ++ ++ vm = panthor_vm_pool_get_vm(pfile->vms, args->handle >> 16); ++ if (!vm) ++ return -EINVAL; ++ ++ pool = panthor_vm_get_heap_pool(vm, false); ++ if (!pool) { ++ ret = -EINVAL; ++ goto out_put_vm; ++ } ++ ++ ret = panthor_heap_destroy(pool, args->handle & GENMASK(15, 0)); ++ panthor_heap_pool_put(pool); ++ ++out_put_vm: ++ panthor_vm_put(vm); ++ return ret; ++} ++ ++static int panthor_ioctl_vm_bind_async(struct drm_device *ddev, ++ struct drm_panthor_vm_bind *args, ++ struct drm_file *file) ++{ ++ struct panthor_file *pfile = file->driver_priv; ++ struct drm_panthor_vm_bind_op *jobs_args; ++ struct panthor_submit_ctx ctx; ++ struct panthor_vm *vm; ++ int ret = 0; ++ ++ vm = panthor_vm_pool_get_vm(pfile->vms, args->vm_id); ++ if (!vm) ++ return -EINVAL; ++ ++ ret = PANTHOR_UOBJ_GET_ARRAY(jobs_args, &args->ops); ++ if (ret) ++ goto out_put_vm; ++ ++ ret = panthor_submit_ctx_init(&ctx, file, args->ops.count); ++ if (ret) ++ goto out_free_jobs_args; ++ ++ for (u32 i = 0; i < args->ops.count; i++) { ++ struct drm_panthor_vm_bind_op *op = &jobs_args[i]; ++ struct drm_sched_job *job; ++ ++ job = panthor_vm_bind_job_create(file, vm, op); ++ if (IS_ERR(job)) { ++ ret = PTR_ERR(job); ++ goto out_cleanup_submit_ctx; ++ } ++ ++ ret = panthor_submit_ctx_add_job(&ctx, i, job, &op->syncs); ++ if (ret) ++ goto out_cleanup_submit_ctx; ++ } ++ ++ ret = panthor_submit_ctx_collect_jobs_signal_ops(&ctx); ++ if (ret) ++ goto out_cleanup_submit_ctx; ++ ++ /* Prepare reservation objects for each VM_BIND job. */ ++ drm_exec_until_all_locked(&ctx.exec) { ++ for (u32 i = 0; i < ctx.job_count; i++) { ++ ret = panthor_vm_bind_job_prepare_resvs(&ctx.exec, ctx.jobs[i].job); ++ drm_exec_retry_on_contention(&ctx.exec); ++ if (ret) ++ goto out_cleanup_submit_ctx; ++ } ++ } ++ ++ ret = panthor_submit_ctx_add_deps_and_arm_jobs(&ctx); ++ if (ret) ++ goto out_cleanup_submit_ctx; ++ ++ /* Nothing can fail after that point. */ ++ panthor_submit_ctx_push_jobs(&ctx, panthor_vm_bind_job_update_resvs); ++ ++out_cleanup_submit_ctx: ++ panthor_submit_ctx_cleanup(&ctx, panthor_vm_bind_job_put); ++ ++out_free_jobs_args: ++ kvfree(jobs_args); ++ ++out_put_vm: ++ panthor_vm_put(vm); ++ return ret; ++} ++ ++static int panthor_ioctl_vm_bind_sync(struct drm_device *ddev, ++ struct drm_panthor_vm_bind *args, ++ struct drm_file *file) ++{ ++ struct panthor_file *pfile = file->driver_priv; ++ struct drm_panthor_vm_bind_op *jobs_args; ++ struct panthor_vm *vm; ++ int ret; ++ ++ vm = panthor_vm_pool_get_vm(pfile->vms, args->vm_id); ++ if (!vm) ++ return -EINVAL; ++ ++ ret = PANTHOR_UOBJ_GET_ARRAY(jobs_args, &args->ops); ++ if (ret) ++ goto out_put_vm; ++ ++ for (u32 i = 0; i < args->ops.count; i++) { ++ ret = panthor_vm_bind_exec_sync_op(file, vm, &jobs_args[i]); ++ if (ret) { ++ /* Update ops.count so the user knows where things failed. */ ++ args->ops.count = i; ++ break; ++ } ++ } ++ ++ kvfree(jobs_args); ++ ++out_put_vm: ++ panthor_vm_put(vm); ++ return ret; ++} ++ ++#define PANTHOR_VM_BIND_FLAGS DRM_PANTHOR_VM_BIND_ASYNC ++ ++static int panthor_ioctl_vm_bind(struct drm_device *ddev, void *data, ++ struct drm_file *file) ++{ ++ struct drm_panthor_vm_bind *args = data; ++ int cookie, ret; ++ ++ if (!drm_dev_enter(ddev, &cookie)) ++ return -ENODEV; ++ ++ if (args->flags & DRM_PANTHOR_VM_BIND_ASYNC) ++ ret = panthor_ioctl_vm_bind_async(ddev, args, file); ++ else ++ ret = panthor_ioctl_vm_bind_sync(ddev, args, file); ++ ++ drm_dev_exit(cookie); ++ return ret; ++} ++ ++static int panthor_ioctl_vm_get_state(struct drm_device *ddev, void *data, ++ struct drm_file *file) ++{ ++ struct panthor_file *pfile = file->driver_priv; ++ struct drm_panthor_vm_get_state *args = data; ++ struct panthor_vm *vm; ++ ++ vm = panthor_vm_pool_get_vm(pfile->vms, args->vm_id); ++ if (!vm) ++ return -EINVAL; ++ ++ if (panthor_vm_is_unusable(vm)) ++ args->state = DRM_PANTHOR_VM_STATE_UNUSABLE; ++ else ++ args->state = DRM_PANTHOR_VM_STATE_USABLE; ++ ++ panthor_vm_put(vm); ++ return 0; ++} ++ ++static int ++panthor_open(struct drm_device *ddev, struct drm_file *file) ++{ ++ struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base); ++ struct panthor_file *pfile; ++ int ret; ++ ++ if (!try_module_get(THIS_MODULE)) ++ return -EINVAL; ++ ++ pfile = kzalloc(sizeof(*pfile), GFP_KERNEL); ++ if (!pfile) { ++ ret = -ENOMEM; ++ goto err_put_mod; ++ } ++ ++ pfile->ptdev = ptdev; ++ ++ ret = panthor_vm_pool_create(pfile); ++ if (ret) ++ goto err_free_file; ++ ++ ret = panthor_group_pool_create(pfile); ++ if (ret) ++ goto err_destroy_vm_pool; ++ ++ file->driver_priv = pfile; ++ return 0; ++ ++err_destroy_vm_pool: ++ panthor_vm_pool_destroy(pfile); ++ ++err_free_file: ++ kfree(pfile); ++ ++err_put_mod: ++ module_put(THIS_MODULE); ++ return ret; ++} ++ ++static void ++panthor_postclose(struct drm_device *ddev, struct drm_file *file) ++{ ++ struct panthor_file *pfile = file->driver_priv; ++ ++ panthor_group_pool_destroy(pfile); ++ panthor_vm_pool_destroy(pfile); ++ ++ kfree(pfile); ++ module_put(THIS_MODULE); ++} ++ ++static const struct drm_ioctl_desc panthor_drm_driver_ioctls[] = { ++#define PANTHOR_IOCTL(n, func, flags) \ ++ DRM_IOCTL_DEF_DRV(PANTHOR_##n, panthor_ioctl_##func, flags) ++ ++ PANTHOR_IOCTL(DEV_QUERY, dev_query, DRM_RENDER_ALLOW), ++ PANTHOR_IOCTL(VM_CREATE, vm_create, DRM_RENDER_ALLOW), ++ PANTHOR_IOCTL(VM_DESTROY, vm_destroy, DRM_RENDER_ALLOW), ++ PANTHOR_IOCTL(VM_BIND, vm_bind, DRM_RENDER_ALLOW), ++ PANTHOR_IOCTL(VM_GET_STATE, vm_get_state, DRM_RENDER_ALLOW), ++ PANTHOR_IOCTL(BO_CREATE, bo_create, DRM_RENDER_ALLOW), ++ PANTHOR_IOCTL(BO_MMAP_OFFSET, bo_mmap_offset, DRM_RENDER_ALLOW), ++ PANTHOR_IOCTL(GROUP_CREATE, group_create, DRM_RENDER_ALLOW), ++ PANTHOR_IOCTL(GROUP_DESTROY, group_destroy, DRM_RENDER_ALLOW), ++ PANTHOR_IOCTL(GROUP_GET_STATE, group_get_state, DRM_RENDER_ALLOW), ++ PANTHOR_IOCTL(TILER_HEAP_CREATE, tiler_heap_create, DRM_RENDER_ALLOW), ++ PANTHOR_IOCTL(TILER_HEAP_DESTROY, tiler_heap_destroy, DRM_RENDER_ALLOW), ++ PANTHOR_IOCTL(GROUP_SUBMIT, group_submit, DRM_RENDER_ALLOW), ++}; ++ ++static int panthor_mmap(struct file *filp, struct vm_area_struct *vma) ++{ ++ struct drm_file *file = filp->private_data; ++ struct panthor_file *pfile = file->driver_priv; ++ struct panthor_device *ptdev = pfile->ptdev; ++ u64 offset = (u64)vma->vm_pgoff << PAGE_SHIFT; ++ int ret, cookie; ++ ++ if (!drm_dev_enter(file->minor->dev, &cookie)) ++ return -ENODEV; ++ ++ if (panthor_device_mmio_offset(offset) >= DRM_PANTHOR_USER_MMIO_OFFSET) ++ ret = panthor_device_mmap_io(ptdev, vma); ++ else ++ ret = drm_gem_mmap(filp, vma); ++ ++ drm_dev_exit(cookie); ++ return ret; ++} ++ ++static const struct file_operations panthor_drm_driver_fops = { ++ .open = drm_open, ++ .release = drm_release, ++ .unlocked_ioctl = drm_ioctl, ++ .compat_ioctl = drm_compat_ioctl, ++ .poll = drm_poll, ++ .read = drm_read, ++ .llseek = noop_llseek, ++ .mmap = panthor_mmap, ++}; ++ ++#ifdef CONFIG_DEBUG_FS ++static void panthor_debugfs_init(struct drm_minor *minor) ++{ ++ panthor_mmu_debugfs_init(minor); ++} ++#endif ++ ++/* ++ * PanCSF driver version: ++ * - 1.0 - initial interface ++ */ ++static const struct drm_driver panthor_drm_driver = { ++ .driver_features = DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ | ++ DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA, ++ .open = panthor_open, ++ .postclose = panthor_postclose, ++ .ioctls = panthor_drm_driver_ioctls, ++ .num_ioctls = ARRAY_SIZE(panthor_drm_driver_ioctls), ++ .fops = &panthor_drm_driver_fops, ++ .name = "panthor", ++ .desc = "Panthor DRM driver", ++ .date = "20230801", ++ .major = 1, ++ .minor = 0, ++ ++ .gem_create_object = panthor_gem_create_object, ++ .gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table, ++#ifdef CONFIG_DEBUG_FS ++ .debugfs_init = panthor_debugfs_init, ++#endif ++}; ++ ++static int panthor_probe(struct platform_device *pdev) ++{ ++ struct panthor_device *ptdev; ++ ++ ptdev = devm_drm_dev_alloc(&pdev->dev, &panthor_drm_driver, ++ struct panthor_device, base); ++ if (!ptdev) ++ return -ENOMEM; ++ ++ platform_set_drvdata(pdev, ptdev); ++ ++ return panthor_device_init(ptdev); ++} ++ ++static void panthor_remove(struct platform_device *pdev) ++{ ++ struct panthor_device *ptdev = platform_get_drvdata(pdev); ++ ++ panthor_device_unplug(ptdev); ++} ++ ++static const struct of_device_id dt_match[] = { ++ { .compatible = "rockchip,rk3588-mali" }, ++ { .compatible = "arm,mali-valhall-csf" }, ++ {} ++}; ++MODULE_DEVICE_TABLE(of, dt_match); ++ ++static DEFINE_RUNTIME_DEV_PM_OPS(panthor_pm_ops, ++ panthor_device_suspend, ++ panthor_device_resume, ++ NULL); ++ ++static struct platform_driver panthor_driver = { ++ .probe = panthor_probe, ++ .remove_new = panthor_remove, ++ .driver = { ++ .name = "panthor", ++ .pm = &panthor_pm_ops, ++ .of_match_table = dt_match, ++ }, ++}; ++ ++/* ++ * Workqueue used to cleanup stuff. ++ * ++ * We create a dedicated workqueue so we can drain on unplug and ++ * make sure all resources are freed before the module is unloaded. ++ */ ++struct workqueue_struct *panthor_cleanup_wq; ++ ++static int __init panthor_init(void) ++{ ++ int ret; ++ ++ ret = panthor_mmu_pt_cache_init(); ++ if (ret) ++ return ret; ++ ++ panthor_cleanup_wq = alloc_workqueue("panthor-cleanup", WQ_UNBOUND, 0); ++ if (!panthor_cleanup_wq) { ++ pr_err("panthor: Failed to allocate the workqueues"); ++ ret = -ENOMEM; ++ goto err_mmu_pt_cache_fini; ++ } ++ ++ ret = platform_driver_register(&panthor_driver); ++ if (ret) ++ goto err_destroy_cleanup_wq; ++ ++ return 0; ++ ++err_destroy_cleanup_wq: ++ destroy_workqueue(panthor_cleanup_wq); ++ ++err_mmu_pt_cache_fini: ++ panthor_mmu_pt_cache_fini(); ++ return ret; ++} ++module_init(panthor_init); ++ ++static void __exit panthor_exit(void) ++{ ++ platform_driver_unregister(&panthor_driver); ++ destroy_workqueue(panthor_cleanup_wq); ++ panthor_mmu_pt_cache_fini(); ++} ++module_exit(panthor_exit); ++ ++MODULE_AUTHOR("Panthor Project Developers"); ++MODULE_DESCRIPTION("Panthor DRM Driver"); ++MODULE_LICENSE("Dual MIT/GPL"); diff --git a/patches-6.6/034-41-v6.10-drm-panthor-Allow-driver-compilation.patch b/patches-6.6/034-41-v6.10-drm-panthor-Allow-driver-compilation.patch new file mode 100644 index 0000000..733733b --- /dev/null +++ b/patches-6.6/034-41-v6.10-drm-panthor-Allow-driver-compilation.patch @@ -0,0 +1,104 @@ +From d72f049087d4f973f6332b599c92177e718107de Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Thu, 29 Feb 2024 17:22:26 +0100 +Subject: [PATCH] drm/panthor: Allow driver compilation + +Now that all blocks are available, we can add/update Kconfig/Makefile +files to allow compilation. + +v6: +- Add Maxime's and Heiko's acks +- Keep source files alphabetically ordered in the Makefile + +v4: +- Add Steve's R-b + +v3: +- Add a dep on DRM_GPUVM +- Fix dependencies in Kconfig +- Expand help text to (hopefully) describe which GPUs are to be + supported by this driver and which are for panfrost. + +Co-developed-by: Steven Price +Signed-off-by: Steven Price +Signed-off-by: Boris Brezillon +Acked-by: Steven Price # MIT+GPL2 relicensing,Arm +Acked-by: Grant Likely # MIT+GPL2 relicensing,Linaro +Acked-by: Boris Brezillon # MIT+GPL2 relicensing,Collabora +Reviewed-by: Steven Price +Acked-by: Maxime Ripard +Acked-by: Heiko Stuebner +Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-13-boris.brezillon@collabora.com +--- + drivers/gpu/drm/Kconfig | 2 ++ + drivers/gpu/drm/Makefile | 1 + + drivers/gpu/drm/panthor/Kconfig | 23 +++++++++++++++++++++++ + drivers/gpu/drm/panthor/Makefile | 14 ++++++++++++++ + 4 files changed, 40 insertions(+) + create mode 100644 drivers/gpu/drm/panthor/Kconfig + create mode 100644 drivers/gpu/drm/panthor/Makefile + +--- a/drivers/gpu/drm/Kconfig ++++ b/drivers/gpu/drm/Kconfig +@@ -381,6 +381,8 @@ source "drivers/gpu/drm/lima/Kconfig" + + source "drivers/gpu/drm/panfrost/Kconfig" + ++source "drivers/gpu/drm/panthor/Kconfig" ++ + source "drivers/gpu/drm/aspeed/Kconfig" + + source "drivers/gpu/drm/mcde/Kconfig" +--- a/drivers/gpu/drm/Makefile ++++ b/drivers/gpu/drm/Makefile +@@ -189,6 +189,7 @@ obj-$(CONFIG_DRM_XEN) += xen/ + obj-$(CONFIG_DRM_VBOXVIDEO) += vboxvideo/ + obj-$(CONFIG_DRM_LIMA) += lima/ + obj-$(CONFIG_DRM_PANFROST) += panfrost/ ++obj-$(CONFIG_DRM_PANTHOR) += panthor/ + obj-$(CONFIG_DRM_ASPEED_GFX) += aspeed/ + obj-$(CONFIG_DRM_MCDE) += mcde/ + obj-$(CONFIG_DRM_TIDSS) += tidss/ +--- /dev/null ++++ b/drivers/gpu/drm/panthor/Kconfig +@@ -0,0 +1,23 @@ ++# SPDX-License-Identifier: GPL-2.0 or MIT ++ ++config DRM_PANTHOR ++ tristate "Panthor (DRM support for ARM Mali CSF-based GPUs)" ++ depends on DRM ++ depends on ARM || ARM64 || COMPILE_TEST ++ depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE ++ depends on MMU ++ select DEVFREQ_GOV_SIMPLE_ONDEMAND ++ select DRM_EXEC ++ select DRM_GEM_SHMEM_HELPER ++ select DRM_GPUVM ++ select DRM_SCHED ++ select IOMMU_IO_PGTABLE_LPAE ++ select IOMMU_SUPPORT ++ select PM_DEVFREQ ++ help ++ DRM driver for ARM Mali CSF-based GPUs. ++ ++ This driver is for Mali (or Immortalis) Valhall Gxxx GPUs. ++ ++ Note that the Mali-G68 and Mali-G78, while Valhall architecture, will ++ be supported with the panfrost driver as they are not CSF GPUs. +--- /dev/null ++++ b/drivers/gpu/drm/panthor/Makefile +@@ -0,0 +1,14 @@ ++# SPDX-License-Identifier: GPL-2.0 or MIT ++ ++panthor-y := \ ++ panthor_devfreq.o \ ++ panthor_device.o \ ++ panthor_drv.o \ ++ panthor_fw.o \ ++ panthor_gem.o \ ++ panthor_gpu.o \ ++ panthor_heap.o \ ++ panthor_mmu.o \ ++ panthor_sched.o ++ ++obj-$(CONFIG_DRM_PANTHOR) += panthor.o diff --git a/patches-6.6/034-42-v6.10-drm-panthor-Fix-panthor_devfreq-kerneldoc.patch b/patches-6.6/034-42-v6.10-drm-panthor-Fix-panthor_devfreq-kerneldoc.patch new file mode 100644 index 0000000..63605d3 --- /dev/null +++ b/patches-6.6/034-42-v6.10-drm-panthor-Fix-panthor_devfreq-kerneldoc.patch @@ -0,0 +1,27 @@ +From 9c86b03863844ce69f99aa66404c79492ec9e208 Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Mon, 4 Mar 2024 10:08:10 +0100 +Subject: [PATCH] drm/panthor: Fix panthor_devfreq kerneldoc + +Missing '*' to have a valid kerneldoc prefix. + +Reported-by: kernel test robot +Closes: https://lore.kernel.org/oe-kbuild-all/202403031019.6jvrOqGT-lkp@intel.com/ +Signed-off-by: Boris Brezillon +Reviewed-by: Steven Price +Link: https://patchwork.freedesktop.org/patch/msgid/20240304090812.3941084-2-boris.brezillon@collabora.com +--- + drivers/gpu/drm/panthor/panthor_devfreq.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/panthor/panthor_devfreq.c ++++ b/drivers/gpu/drm/panthor/panthor_devfreq.c +@@ -34,7 +34,7 @@ struct panthor_devfreq { + /** @last_busy_state: True if the GPU was busy last time we updated the state. */ + bool last_busy_state; + +- /* ++ /** + * @lock: Lock used to protect busy_time, idle_time, time_last_update and + * last_busy_state. + * diff --git a/patches-6.6/034-43-v6.10-drm-panthor-Explicitly-include-mm-h-for-the-virt-.patch b/patches-6.6/034-43-v6.10-drm-panthor-Explicitly-include-mm-h-for-the-virt-.patch new file mode 100644 index 0000000..ddf8613 --- /dev/null +++ b/patches-6.6/034-43-v6.10-drm-panthor-Explicitly-include-mm-h-for-the-virt-.patch @@ -0,0 +1,31 @@ +From eb1dc10a6ee3559310436ab62db93b72310a2a18 Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Mon, 4 Mar 2024 10:08:11 +0100 +Subject: [PATCH] drm/panthor: Explicitly include mm.h for the {virt, + __phys)_to_pfn() defs + +Something on arm[64] must be including , but things fail +to compile on sparc64. Make sure this header is included (through +linux/mm.h) so this driver can be compile-tested on all supported +architectures. + +Reported-by: kernel test robot +Closes: https://lore.kernel.org/oe-kbuild-all/202403031142.Vl4pW7X6-lkp@intel.com/ +Signed-off-by: Boris Brezillon +Reviewed-by: Liviu Dudau +Reviewed-by: Steven Price +Link: https://patchwork.freedesktop.org/patch/msgid/20240304090812.3941084-3-boris.brezillon@collabora.com +--- + drivers/gpu/drm/panthor/panthor_device.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/gpu/drm/panthor/panthor_device.c ++++ b/drivers/gpu/drm/panthor/panthor_device.c +@@ -4,6 +4,7 @@ + /* Copyright 2023 Collabora ltd. */ + + #include ++#include + #include + #include + #include diff --git a/patches-6.6/034-44-v6.10-drm-panthor-Fix-the-CONFIG_PM_n-case.patch b/patches-6.6/034-44-v6.10-drm-panthor-Fix-the-CONFIG_PM_n-case.patch new file mode 100644 index 0000000..c8c18e2 --- /dev/null +++ b/patches-6.6/034-44-v6.10-drm-panthor-Fix-the-CONFIG_PM_n-case.patch @@ -0,0 +1,75 @@ +From 0cd8363ed802922e39446d783f767b3e09335ddc Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Mon, 18 Mar 2024 16:31:17 +0100 +Subject: [PATCH] drm/panthor: Fix the CONFIG_PM=n case + +Putting a hard dependency on CONFIG_PM is not possible because of a +circular dependency issue, and it's actually not desirable either. In +order to support this use case, we forcibly resume at init time, and +suspend at unplug time. + +v2: +- Drop the #ifdef CONFIG_PM section around panthor_pm_ops's definition + +Reported-by: kernel test robot +Closes: https://lore.kernel.org/oe-kbuild-all/202403031944.EOimQ8WK-lkp@intel.com/ +Signed-off-by: Boris Brezillon +Reviewed-by: Steven Price +Reviewed-by: AngeloGioacchino Del Regno +Link: https://patchwork.freedesktop.org/patch/msgid/20240318153117.1321544-1-boris.brezillon@collabora.com +--- + drivers/gpu/drm/panthor/panthor_device.c | 13 +++++++++++-- + drivers/gpu/drm/panthor/panthor_drv.c | 2 +- + 2 files changed, 12 insertions(+), 3 deletions(-) + +--- a/drivers/gpu/drm/panthor/panthor_device.c ++++ b/drivers/gpu/drm/panthor/panthor_device.c +@@ -87,6 +87,10 @@ void panthor_device_unplug(struct pantho + pm_runtime_dont_use_autosuspend(ptdev->base.dev); + pm_runtime_put_sync_suspend(ptdev->base.dev); + ++ /* If PM is disabled, we need to call the suspend handler manually. */ ++ if (!IS_ENABLED(CONFIG_PM)) ++ panthor_device_suspend(ptdev->base.dev); ++ + /* Report the unplug operation as done to unblock concurrent + * panthor_device_unplug() callers. + */ +@@ -218,6 +222,13 @@ int panthor_device_init(struct panthor_d + if (ret) + return ret; + ++ /* If PM is disabled, we need to call panthor_device_resume() manually. */ ++ if (!IS_ENABLED(CONFIG_PM)) { ++ ret = panthor_device_resume(ptdev->base.dev); ++ if (ret) ++ return ret; ++ } ++ + ret = panthor_gpu_init(ptdev); + if (ret) + goto err_rpm_put; +@@ -402,7 +413,6 @@ int panthor_device_mmap_io(struct pantho + return 0; + } + +-#ifdef CONFIG_PM + int panthor_device_resume(struct device *dev) + { + struct panthor_device *ptdev = dev_get_drvdata(dev); +@@ -547,4 +557,3 @@ err_set_active: + mutex_unlock(&ptdev->pm.mmio_lock); + return ret; + } +-#endif +--- a/drivers/gpu/drm/panthor/panthor_drv.c ++++ b/drivers/gpu/drm/panthor/panthor_drv.c +@@ -1416,7 +1416,7 @@ static struct platform_driver panthor_dr + .remove_new = panthor_remove, + .driver = { + .name = "panthor", +- .pm = &panthor_pm_ops, ++ .pm = pm_ptr(&panthor_pm_ops), + .of_match_table = dt_match, + }, + }; diff --git a/patches-6.6/034-45-v6.10-drm-panthor-Don-t-use-virt_to_pfn.patch b/patches-6.6/034-45-v6.10-drm-panthor-Don-t-use-virt_to_pfn.patch new file mode 100644 index 0000000..c02405b --- /dev/null +++ b/patches-6.6/034-45-v6.10-drm-panthor-Don-t-use-virt_to_pfn.patch @@ -0,0 +1,74 @@ +From 0b45921c2a8831834a5f8a52ddd0b25b5b1c6faf Mon Sep 17 00:00:00 2001 +From: Steven Price +Date: Mon, 18 Mar 2024 14:51:19 +0000 +Subject: [PATCH] drm/panthor: Don't use virt_to_pfn() + +virt_to_pfn() isn't available on x86 (except to xen) so breaks +COMPILE_TEST builds. Avoid its use completely by instead storing the +struct page pointer allocated in panthor_device_init() and using +page_to_pfn() instead. + +Signed-off-by: Steven Price +Reviewed-by: Boris Brezillon +Signed-off-by: Boris Brezillon +Link: https://patchwork.freedesktop.org/patch/msgid/20240318145119.368582-1-steven.price@arm.com +--- + drivers/gpu/drm/panthor/panthor_device.c | 10 ++++++---- + drivers/gpu/drm/panthor/panthor_device.h | 2 +- + 2 files changed, 7 insertions(+), 5 deletions(-) + +--- a/drivers/gpu/drm/panthor/panthor_device.c ++++ b/drivers/gpu/drm/panthor/panthor_device.c +@@ -151,11 +151,12 @@ static bool panthor_device_is_initialize + + static void panthor_device_free_page(struct drm_device *ddev, void *data) + { +- free_page((unsigned long)data); ++ __free_page(data); + } + + int panthor_device_init(struct panthor_device *ptdev) + { ++ u32 *dummy_page_virt; + struct resource *res; + struct page *p; + int ret; +@@ -176,7 +177,8 @@ int panthor_device_init(struct panthor_d + if (!p) + return -ENOMEM; + +- ptdev->pm.dummy_latest_flush = page_address(p); ++ ptdev->pm.dummy_latest_flush = p; ++ dummy_page_virt = page_address(p); + ret = drmm_add_action_or_reset(&ptdev->base, panthor_device_free_page, + ptdev->pm.dummy_latest_flush); + if (ret) +@@ -188,7 +190,7 @@ int panthor_device_init(struct panthor_d + * happens while the dummy page is mapped. Zero cannot be used because + * that means 'always flush'. + */ +- *ptdev->pm.dummy_latest_flush = 1; ++ *dummy_page_virt = 1; + + INIT_WORK(&ptdev->reset.work, panthor_device_reset_work); + ptdev->reset.wq = alloc_ordered_workqueue("panthor-reset-wq", 0); +@@ -364,7 +366,7 @@ static vm_fault_t panthor_mmio_vm_fault( + if (active) + pfn = __phys_to_pfn(ptdev->phys_addr + CSF_GPU_LATEST_FLUSH_ID); + else +- pfn = virt_to_pfn(ptdev->pm.dummy_latest_flush); ++ pfn = page_to_pfn(ptdev->pm.dummy_latest_flush); + break; + + default: +--- a/drivers/gpu/drm/panthor/panthor_device.h ++++ b/drivers/gpu/drm/panthor/panthor_device.h +@@ -160,7 +160,7 @@ struct panthor_device { + * Used to replace the real LATEST_FLUSH page when the GPU + * is suspended. + */ +- u32 *dummy_latest_flush; ++ struct page *dummy_latest_flush; + } pm; + }; + diff --git a/patches-6.6/034-46-v6.10-drm-panthor-Fix-spelling-mistake-readyness-readiness.patch b/patches-6.6/034-46-v6.10-drm-panthor-Fix-spelling-mistake-readyness-readiness.patch new file mode 100644 index 0000000..d2c5b50 --- /dev/null +++ b/patches-6.6/034-46-v6.10-drm-panthor-Fix-spelling-mistake-readyness-readiness.patch @@ -0,0 +1,26 @@ +From 9d1848778e56fb565db041e4237a2f27f9277f63 Mon Sep 17 00:00:00 2001 +From: Colin Ian King +Date: Tue, 26 Mar 2024 10:02:19 +0000 +Subject: [PATCH] drm/panthor: Fix spelling mistake "readyness" -> "readiness" + +There is a spelling mistake in a drm_err message. Fix it. + +Signed-off-by: Colin Ian King +Acked-by: Liviu Dudau +Signed-off-by: Boris Brezillon +Link: https://patchwork.freedesktop.org/patch/msgid/20240326100219.43989-1-colin.i.king@gmail.com +--- + drivers/gpu/drm/panthor/panthor_gpu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/panthor/panthor_gpu.c ++++ b/drivers/gpu/drm/panthor/panthor_gpu.c +@@ -333,7 +333,7 @@ int panthor_gpu_block_power_on(struct pa + val, (mask32 & val) == mask32, + 100, timeout_us); + if (ret) { +- drm_err(&ptdev->base, "timeout waiting on %s:%llx readyness", ++ drm_err(&ptdev->base, "timeout waiting on %s:%llx readiness", + blk_name, mask); + return ret; + } diff --git a/patches-6.6/034-47-v6.10-drm-panthor-Fix-IO-page-mmap-for-32-bit-userspace-on.patch b/patches-6.6/034-47-v6.10-drm-panthor-Fix-IO-page-mmap-for-32-bit-userspace-on.patch new file mode 100644 index 0000000..dff8430 --- /dev/null +++ b/patches-6.6/034-47-v6.10-drm-panthor-Fix-IO-page-mmap-for-32-bit-userspace-on.patch @@ -0,0 +1,142 @@ +From 11f0275cc1b90b4b9bf37a5ebc27c0a9b2451b4e Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Tue, 26 Mar 2024 12:12:03 +0100 +Subject: [PATCH] drm/panthor: Fix IO-page mmap() for 32-bit userspace on + 64-bit kernel +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +When mapping an IO region, the pseudo-file offset is dependent on the +userspace architecture. panthor_device_mmio_offset() abstracts that +away for us by turning a userspace MMIO offset into its kernel +equivalent, but we were not updating vm_area_struct::vm_pgoff +accordingly, leading us to attach the MMIO region to the wrong file +offset. + +This has implications when we start mixing 64 bit and 32 bit apps, but +that's only really a problem when we start having more that 2^43 bytes of +memory allocated, which is very unlikely to happen. + +What's more problematic is the fact this turns our +unmap_mapping_range(DRM_PANTHOR_USER_MMIO_OFFSET) calls, which are +supposed to kill the MMIO mapping when entering suspend, into NOPs. +Which means we either keep the dummy flush_id mapping active at all +times, or we risk a BUS_FAULT if the MMIO region was mapped, and the +GPU is suspended after that. + +Solve that by patching vm_pgoff early in panthor_mmap(). With +this in place, we no longer need the panthor_device_mmio_offset() +helper. + +v3: +- No changes + +v2: +- Kill panthor_device_mmio_offset() + +Fixes: 5fe909cae118 ("drm/panthor: Add the device logical block") +Reported-by: Adrián Larumbe +Reported-by: Lukas F. Hartmann +Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/10835 +Signed-off-by: Boris Brezillon +Reviewed-by: Steven Price +Reviewed-by: Liviu Dudau +Link: https://patchwork.freedesktop.org/patch/msgid/20240326111205.510019-1-boris.brezillon@collabora.com +--- + drivers/gpu/drm/panthor/panthor_device.c | 8 ++++---- + drivers/gpu/drm/panthor/panthor_device.h | 24 ------------------------ + drivers/gpu/drm/panthor/panthor_drv.c | 17 ++++++++++++++++- + 3 files changed, 20 insertions(+), 29 deletions(-) + +--- a/drivers/gpu/drm/panthor/panthor_device.c ++++ b/drivers/gpu/drm/panthor/panthor_device.c +@@ -348,7 +348,7 @@ static vm_fault_t panthor_mmio_vm_fault( + { + struct vm_area_struct *vma = vmf->vma; + struct panthor_device *ptdev = vma->vm_private_data; +- u64 id = (u64)vma->vm_pgoff << PAGE_SHIFT; ++ u64 offset = (u64)vma->vm_pgoff << PAGE_SHIFT; + unsigned long pfn; + pgprot_t pgprot; + vm_fault_t ret; +@@ -361,7 +361,7 @@ static vm_fault_t panthor_mmio_vm_fault( + mutex_lock(&ptdev->pm.mmio_lock); + active = atomic_read(&ptdev->pm.state) == PANTHOR_DEVICE_PM_STATE_ACTIVE; + +- switch (panthor_device_mmio_offset(id)) { ++ switch (offset) { + case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET: + if (active) + pfn = __phys_to_pfn(ptdev->phys_addr + CSF_GPU_LATEST_FLUSH_ID); +@@ -392,9 +392,9 @@ static const struct vm_operations_struct + + int panthor_device_mmap_io(struct panthor_device *ptdev, struct vm_area_struct *vma) + { +- u64 id = (u64)vma->vm_pgoff << PAGE_SHIFT; ++ u64 offset = (u64)vma->vm_pgoff << PAGE_SHIFT; + +- switch (panthor_device_mmio_offset(id)) { ++ switch (offset) { + case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET: + if (vma->vm_end - vma->vm_start != PAGE_SIZE || + (vma->vm_flags & (VM_WRITE | VM_EXEC))) +--- a/drivers/gpu/drm/panthor/panthor_device.h ++++ b/drivers/gpu/drm/panthor/panthor_device.h +@@ -365,30 +365,6 @@ static int panthor_request_ ## __name ## + pirq); \ + } + +-/** +- * panthor_device_mmio_offset() - Turn a user MMIO offset into a kernel one +- * @offset: Offset to convert. +- * +- * With 32-bit systems being limited by the 32-bit representation of mmap2's +- * pgoffset field, we need to make the MMIO offset arch specific. This function +- * converts a user MMIO offset into something the kernel driver understands. +- * +- * If the kernel and userspace architecture match, the offset is unchanged. If +- * the kernel is 64-bit and userspace is 32-bit, the offset is adjusted to match +- * 64-bit offsets. 32-bit kernel with 64-bit userspace is impossible. +- * +- * Return: Adjusted offset. +- */ +-static inline u64 panthor_device_mmio_offset(u64 offset) +-{ +-#ifdef CONFIG_ARM64 +- if (test_tsk_thread_flag(current, TIF_32BIT)) +- offset += DRM_PANTHOR_USER_MMIO_OFFSET_64BIT - DRM_PANTHOR_USER_MMIO_OFFSET_32BIT; +-#endif +- +- return offset; +-} +- + extern struct workqueue_struct *panthor_cleanup_wq; + + #endif +--- a/drivers/gpu/drm/panthor/panthor_drv.c ++++ b/drivers/gpu/drm/panthor/panthor_drv.c +@@ -1326,7 +1326,22 @@ static int panthor_mmap(struct file *fil + if (!drm_dev_enter(file->minor->dev, &cookie)) + return -ENODEV; + +- if (panthor_device_mmio_offset(offset) >= DRM_PANTHOR_USER_MMIO_OFFSET) ++#ifdef CONFIG_ARM64 ++ /* ++ * With 32-bit systems being limited by the 32-bit representation of ++ * mmap2's pgoffset field, we need to make the MMIO offset arch ++ * specific. This converts a user MMIO offset into something the kernel ++ * driver understands. ++ */ ++ if (test_tsk_thread_flag(current, TIF_32BIT) && ++ offset >= DRM_PANTHOR_USER_MMIO_OFFSET_32BIT) { ++ offset += DRM_PANTHOR_USER_MMIO_OFFSET_64BIT - ++ DRM_PANTHOR_USER_MMIO_OFFSET_32BIT; ++ vma->vm_pgoff = offset >> PAGE_SHIFT; ++ } ++#endif ++ ++ if (offset >= DRM_PANTHOR_USER_MMIO_OFFSET) + ret = panthor_device_mmap_io(ptdev, vma); + else + ret = drm_gem_mmap(filp, vma); diff --git a/patches-6.6/034-48-v6.10-drm-panthor-Fix-ordering-in-_irq_suspend.patch b/patches-6.6/034-48-v6.10-drm-panthor-Fix-ordering-in-_irq_suspend.patch new file mode 100644 index 0000000..4832bf0 --- /dev/null +++ b/patches-6.6/034-48-v6.10-drm-panthor-Fix-ordering-in-_irq_suspend.patch @@ -0,0 +1,55 @@ +From 1de434e0b2757061b09b347264f1ff5bdf996e58 Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Tue, 26 Mar 2024 12:12:04 +0100 +Subject: [PATCH] drm/panthor: Fix ordering in _irq_suspend() + +Make sure we set suspended=true last to avoid generating an irq storm +in the unlikely case where an IRQ happens between the suspended=true +assignment and the _INT_MASK update. + +We also move the mask=0 assignment before writing to the _INT_MASK +register to prevent the thread handler from unmasking the interrupt +behind our back. This means we might lose events if there were some +pending when we get to suspend the IRQ, but that's fine. +The synchronize_irq() we have in the _irq_suspend() path was not +there to make sure all IRQs are processed, just to make sure we don't +have registers accesses coming from the irq handlers after +_irq_suspend() has been called. If there's a need to have all pending +IRQs processed, it should happen before _irq_suspend() is called. + +v3: +- Add Steve's R-b + +v2: +- New patch + +Fixes: 5fe909cae118 ("drm/panthor: Add the device logical block") +Reported-by: Steven Price +Signed-off-by: Boris Brezillon +Reviewed-by: Steven Price +Acked-by: Liviu Dudau +Link: https://patchwork.freedesktop.org/patch/msgid/20240326111205.510019-2-boris.brezillon@collabora.com +--- + drivers/gpu/drm/panthor/panthor_device.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/panthor/panthor_device.h ++++ b/drivers/gpu/drm/panthor/panthor_device.h +@@ -325,7 +325,7 @@ static inline void panthor_ ## __name ## + { \ + int cookie; \ + \ +- atomic_set(&pirq->suspended, true); \ ++ pirq->mask = 0; \ + \ + if (drm_dev_enter(&pirq->ptdev->base, &cookie)) { \ + gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, 0); \ +@@ -333,7 +333,7 @@ static inline void panthor_ ## __name ## + drm_dev_exit(cookie); \ + } \ + \ +- pirq->mask = 0; \ ++ atomic_set(&pirq->suspended, true); \ + } \ + \ + static inline void panthor_ ## __name ## _irq_resume(struct panthor_irq *pirq, u32 mask) \ diff --git a/patches-6.6/034-49-v6.10-drm-panthor-Drop-the-dev_enter-exit-sections-in.patch b/patches-6.6/034-49-v6.10-drm-panthor-Drop-the-dev_enter-exit-sections-in.patch new file mode 100644 index 0000000..6e09c3d --- /dev/null +++ b/patches-6.6/034-49-v6.10-drm-panthor-Drop-the-dev_enter-exit-sections-in.patch @@ -0,0 +1,56 @@ +From 962f88b9c91647f3ff4a0d3709662641baed5164 Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Tue, 26 Mar 2024 12:12:05 +0100 +Subject: [PATCH] drm/panthor: Drop the dev_enter/exit() sections in + _irq_suspend/resume() + +There's no reason for _irq_suspend/resume() to be called after the +device has been unplugged, and keeping this dev_enter/exit() +section in _irq_suspend() is turns _irq_suspend() into a NOP +when called from the _unplug() functions, which we don't want. + +v3: +- New patch + +Fixes: 5fe909cae118 ("drm/panthor: Add the device logical block") +Signed-off-by: Boris Brezillon +Reviewed-by: Liviu Dudau +Reviewed-by: Steven Price +Link: https://patchwork.freedesktop.org/patch/msgid/20240326111205.510019-3-boris.brezillon@collabora.com +--- + drivers/gpu/drm/panthor/panthor_device.h | 17 ++++------------- + 1 file changed, 4 insertions(+), 13 deletions(-) + +--- a/drivers/gpu/drm/panthor/panthor_device.h ++++ b/drivers/gpu/drm/panthor/panthor_device.h +@@ -326,13 +326,8 @@ static inline void panthor_ ## __name ## + int cookie; \ + \ + pirq->mask = 0; \ +- \ +- if (drm_dev_enter(&pirq->ptdev->base, &cookie)) { \ +- gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, 0); \ +- synchronize_irq(pirq->irq); \ +- drm_dev_exit(cookie); \ +- } \ +- \ ++ gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, 0); \ ++ synchronize_irq(pirq->irq); \ + atomic_set(&pirq->suspended, true); \ + } \ + \ +@@ -342,12 +337,8 @@ static inline void panthor_ ## __name ## + \ + atomic_set(&pirq->suspended, false); \ + pirq->mask = mask; \ +- \ +- if (drm_dev_enter(&pirq->ptdev->base, &cookie)) { \ +- gpu_write(pirq->ptdev, __reg_prefix ## _INT_CLEAR, mask); \ +- gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, mask); \ +- drm_dev_exit(cookie); \ +- } \ ++ gpu_write(pirq->ptdev, __reg_prefix ## _INT_CLEAR, mask); \ ++ gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, mask); \ + } \ + \ + static int panthor_request_ ## __name ## _irq(struct panthor_device *ptdev, \ diff --git a/patches-6.6/034-50-v6.10-drm-panthor-Fix-clang-Wunused-but-set-variable-in.patch b/patches-6.6/034-50-v6.10-drm-panthor-Fix-clang-Wunused-but-set-variable-in.patch new file mode 100644 index 0000000..11cde86 --- /dev/null +++ b/patches-6.6/034-50-v6.10-drm-panthor-Fix-clang-Wunused-but-set-variable-in.patch @@ -0,0 +1,46 @@ +From d76653c32dd16d78e56208b4819134e766257c06 Mon Sep 17 00:00:00 2001 +From: Nathan Chancellor +Date: Thu, 28 Mar 2024 09:22:07 -0700 +Subject: [PATCH] drm/panthor: Fix clang -Wunused-but-set-variable in + tick_ctx_apply() + +Clang warns (or errors with CONFIG_WERROR): + + drivers/gpu/drm/panthor/panthor_sched.c:2048:6: error: variable 'csg_mod_mask' set but not used [-Werror,-Wunused-but-set-variable] + 2048 | u32 csg_mod_mask = 0, free_csg_slots = 0; + | ^ + 1 error generated. + +The variable is an artifact left over from refactoring that occurred +during the development of the initial series for this driver. Remove it +to resolve the warning. + +Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block") +Signed-off-by: Nathan Chancellor +Reviewed-by: Steven Price +Reviewed-by: Justin Stitt +Signed-off-by: Boris Brezillon +Link: https://patchwork.freedesktop.org/patch/msgid/20240328-panthor-drop-csg_mod_mask-v1-1-5a80be3df581@kernel.org +--- + drivers/gpu/drm/panthor/panthor_sched.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/drivers/gpu/drm/panthor/panthor_sched.c ++++ b/drivers/gpu/drm/panthor/panthor_sched.c +@@ -2045,7 +2045,7 @@ tick_ctx_apply(struct panthor_scheduler + struct panthor_device *ptdev = sched->ptdev; + struct panthor_csg_slot *csg_slot; + int prio, new_csg_prio = MAX_CSG_PRIO, i; +- u32 csg_mod_mask = 0, free_csg_slots = 0; ++ u32 free_csg_slots = 0; + struct panthor_csg_slots_upd_ctx upd_ctx; + int ret; + +@@ -2139,7 +2139,6 @@ tick_ctx_apply(struct panthor_scheduler + + csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id); + csg_slot = &sched->csg_slots[csg_id]; +- csg_mod_mask |= BIT(csg_id); + group_bind_locked(group, csg_id); + csg_slot_prog_locked(ptdev, csg_id, new_csg_prio--); + csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id, diff --git a/patches-6.6/034-51-v6.10-drm-panthor-Cleanup-unused-variable-cookie-.patch b/patches-6.6/034-51-v6.10-drm-panthor-Cleanup-unused-variable-cookie-.patch new file mode 100644 index 0000000..90c0ed2 --- /dev/null +++ b/patches-6.6/034-51-v6.10-drm-panthor-Cleanup-unused-variable-cookie-.patch @@ -0,0 +1,40 @@ +From 00044169de061dac8d9da2cf930757c53006adff Mon Sep 17 00:00:00 2001 +From: Liviu Dudau +Date: Tue, 2 Apr 2024 22:54:22 +0100 +Subject: [PATCH] drm/panthor: Cleanup unused variable 'cookie' + +Commit 962f88b9c916 ("drm/panthor: Drop the dev_enter/exit() sections in +_irq_suspend/resume()") removed the code that used the 'cookie' variable +but left the declaration in place. Remove it. + +Fixes: 962f88b9c916 ("drm/panthor: Drop the dev_enter/exit() sections in _irq_suspend/resume()") +Cc: Boris Brezillon +Cc: Steven Price +Signed-off-by: Liviu Dudau +Reviewed-by: Boris Brezillon +Signed-off-by: Boris Brezillon +Link: https://patchwork.freedesktop.org/patch/msgid/20240402215423.360341-1-liviu.dudau@arm.com +--- + drivers/gpu/drm/panthor/panthor_device.h | 4 ---- + 1 file changed, 4 deletions(-) + +--- a/drivers/gpu/drm/panthor/panthor_device.h ++++ b/drivers/gpu/drm/panthor/panthor_device.h +@@ -323,8 +323,6 @@ static irqreturn_t panthor_ ## __name ## + \ + static inline void panthor_ ## __name ## _irq_suspend(struct panthor_irq *pirq) \ + { \ +- int cookie; \ +- \ + pirq->mask = 0; \ + gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, 0); \ + synchronize_irq(pirq->irq); \ +@@ -333,8 +331,6 @@ static inline void panthor_ ## __name ## + \ + static inline void panthor_ ## __name ## _irq_resume(struct panthor_irq *pirq, u32 mask) \ + { \ +- int cookie; \ +- \ + atomic_set(&pirq->suspended, false); \ + pirq->mask = mask; \ + gpu_write(pirq->ptdev, __reg_prefix ## _INT_CLEAR, mask); \ diff --git a/patches-6.6/034-52-v6.10-drm-panthor-Fix-some-kerneldoc-warnings.patch b/patches-6.6/034-52-v6.10-drm-panthor-Fix-some-kerneldoc-warnings.patch new file mode 100644 index 0000000..2f853e4 --- /dev/null +++ b/patches-6.6/034-52-v6.10-drm-panthor-Fix-some-kerneldoc-warnings.patch @@ -0,0 +1,58 @@ +From be7ffc821f5fc2eb30944562a04901c10892cc7c Mon Sep 17 00:00:00 2001 +From: Liviu Dudau +Date: Tue, 2 Apr 2024 22:54:23 +0100 +Subject: [PATCH] drm/panthor: Fix some kerneldoc warnings + +When compiling with W=1 the build process will flag empty comments, +misnamed documented variables and incorrect tagging of functions. +Fix them in one go. + +Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block") +Cc: Boris Brezillon +Cc: Steven Price +Signed-off-by: Liviu Dudau +Reviewed-by: Boris Brezillon +Signed-off-by: Boris Brezillon +Link: https://patchwork.freedesktop.org/patch/msgid/20240402215423.360341-2-liviu.dudau@arm.com +--- + drivers/gpu/drm/panthor/panthor_sched.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +--- a/drivers/gpu/drm/panthor/panthor_sched.c ++++ b/drivers/gpu/drm/panthor/panthor_sched.c +@@ -517,7 +517,7 @@ struct panthor_group { + /** @max_compute_cores: Maximum number of shader cores used for compute jobs. */ + u8 max_compute_cores; + +- /** @max_compute_cores: Maximum number of shader cores used for fragment jobs. */ ++ /** @max_fragment_cores: Maximum number of shader cores used for fragment jobs. */ + u8 max_fragment_cores; + + /** @max_tiler_cores: Maximum number of tiler cores used for tiler jobs. */ +@@ -993,7 +993,7 @@ cs_slot_prog_locked(struct panthor_devic + } + + /** +- * @cs_slot_reset_locked() - Reset a queue slot ++ * cs_slot_reset_locked() - Reset a queue slot + * @ptdev: Device. + * @csg_id: Group slot. + * @cs_id: Queue slot. +@@ -1591,7 +1591,7 @@ static void sched_process_idle_event_loc + } + + /** +- * panthor_sched_process_global_irq() - Process the scheduling part of a global IRQ ++ * sched_process_global_irq_locked() - Process the scheduling part of a global IRQ + * @ptdev: Device. + */ + static void sched_process_global_irq_locked(struct panthor_device *ptdev) +@@ -1660,8 +1660,6 @@ static const struct dma_fence_ops pantho + .get_timeline_name = queue_fence_get_timeline_name, + }; + +-/** +- */ + struct panthor_csg_slots_upd_ctx { + u32 update_mask; + u32 timedout_mask; diff --git a/patches-6.6/034-53-v6.10-drm-panthor-Fix-a-couple-ENOMEM-error-codes.patch b/patches-6.6/034-53-v6.10-drm-panthor-Fix-a-couple-ENOMEM-error-codes.patch new file mode 100644 index 0000000..43ac629 --- /dev/null +++ b/patches-6.6/034-53-v6.10-drm-panthor-Fix-a-couple-ENOMEM-error-codes.patch @@ -0,0 +1,42 @@ +From d33733263a550775c7574169f62bf144f74d8f9a Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Tue, 2 Apr 2024 12:58:09 +0300 +Subject: [PATCH] drm/panthor: Fix a couple -ENOMEM error codes + +These error paths forgot to set the error code to -ENOMEM. + +Fixes: 647810ec2476 ("drm/panthor: Add the MMU/VM logical block") +Signed-off-by: Dan Carpenter +Reviewed-by: Boris Brezillon +Signed-off-by: Boris Brezillon +Link: https://patchwork.freedesktop.org/patch/msgid/cf5bbba5-427e-4940-b91e-925f9fa71f8d@moroto.mountain +--- + drivers/gpu/drm/panthor/panthor_mmu.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/panthor/panthor_mmu.c ++++ b/drivers/gpu/drm/panthor/panthor_mmu.c +@@ -1264,8 +1264,10 @@ static int panthor_vm_prepare_map_op_ctx + op_ctx->rsvd_page_tables.pages = kcalloc(pt_count, + sizeof(*op_ctx->rsvd_page_tables.pages), + GFP_KERNEL); +- if (!op_ctx->rsvd_page_tables.pages) ++ if (!op_ctx->rsvd_page_tables.pages) { ++ ret = -ENOMEM; + goto err_cleanup; ++ } + + ret = kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, pt_count, + op_ctx->rsvd_page_tables.pages); +@@ -1318,8 +1320,10 @@ static int panthor_vm_prepare_unmap_op_c + op_ctx->rsvd_page_tables.pages = kcalloc(pt_count, + sizeof(*op_ctx->rsvd_page_tables.pages), + GFP_KERNEL); +- if (!op_ctx->rsvd_page_tables.pages) ++ if (!op_ctx->rsvd_page_tables.pages) { ++ ret = -ENOMEM; + goto err_cleanup; ++ } + + ret = kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, pt_count, + op_ctx->rsvd_page_tables.pages); diff --git a/patches-6.6/034-54-v6.10-drm-panthor-Fix-error-code-in-panthor_gpu_init.patch b/patches-6.6/034-54-v6.10-drm-panthor-Fix-error-code-in-panthor_gpu_init.patch new file mode 100644 index 0000000..4c72843 --- /dev/null +++ b/patches-6.6/034-54-v6.10-drm-panthor-Fix-error-code-in-panthor_gpu_init.patch @@ -0,0 +1,33 @@ +From 99b74db1e27145bdf0afb85559aa70d951569ac3 Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Tue, 2 Apr 2024 12:56:19 +0300 +Subject: [PATCH] drm/panthor: Fix error code in panthor_gpu_init() + +This code accidentally returns zero/success on error because of a typo. +It should be "irq" instead of "ret". The other thing is that if +platform_get_irq_byname() were to return zero then the error code would +be cmplicated. Fortunately, it does not so we can just change <= to +< 0. + +Fixes: 5cd894e258c4 ("drm/panthor: Add the GPU logical block") +Signed-off-by: Dan Carpenter +Reviewed-by: Boris Brezillon +Signed-off-by: Boris Brezillon +Link: https://patchwork.freedesktop.org/patch/msgid/d753e684-43ee-45c2-a1fd-86222da204e1@moroto.mountain +--- + drivers/gpu/drm/panthor/panthor_gpu.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/panthor/panthor_gpu.c ++++ b/drivers/gpu/drm/panthor/panthor_gpu.c +@@ -211,8 +211,8 @@ int panthor_gpu_init(struct panthor_devi + return ret; + + irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "gpu"); +- if (irq <= 0) +- return ret; ++ if (irq < 0) ++ return irq; + + ret = panthor_request_gpu_irq(ptdev, &ptdev->gpu->irq, irq, GPU_INTERRUPTS_MASK); + if (ret) diff --git a/patches-6.6/034-55-v6.10-drm-panthor-Fix-off-by-one-in-panthor_fw_get_cs_iface.patch b/patches-6.6/034-55-v6.10-drm-panthor-Fix-off-by-one-in-panthor_fw_get_cs_iface.patch new file mode 100644 index 0000000..9b10ef9 --- /dev/null +++ b/patches-6.6/034-55-v6.10-drm-panthor-Fix-off-by-one-in-panthor_fw_get_cs_iface.patch @@ -0,0 +1,28 @@ +From 2b5890786014b926f845402ae80ebc71c4bd6d5c Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Tue, 2 Apr 2024 12:56:42 +0300 +Subject: [PATCH] drm/panthor: Fix off by one in panthor_fw_get_cs_iface() + +The ->iface.streams[csg_slot][] array has MAX_CS_PER_CSG elements so +this > comparison needs to be >= to prevent an out of bounds access. + +Fixes: 2718d91816ee ("drm/panthor: Add the FW logical block") +Signed-off-by: Dan Carpenter +Reviewed-by: Boris Brezillon +Signed-off-by: Boris Brezillon +Link: https://patchwork.freedesktop.org/patch/msgid/62835c16-c85c-483d-a8fe-63be78d49d15@moroto.mountain +--- + drivers/gpu/drm/panthor/panthor_fw.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/panthor/panthor_fw.c ++++ b/drivers/gpu/drm/panthor/panthor_fw.c +@@ -308,7 +308,7 @@ panthor_fw_get_csg_iface(struct panthor_ + struct panthor_fw_cs_iface * + panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot) + { +- if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS || cs_slot > MAX_CS_PER_CSG)) ++ if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS || cs_slot >= MAX_CS_PER_CSG)) + return NULL; + + return &ptdev->fw->iface.streams[csg_slot][cs_slot]; diff --git a/patches-6.6/034-56-v6.10-drm-panthor-Don-t-return-NULL-from.patch b/patches-6.6/034-56-v6.10-drm-panthor-Don-t-return-NULL-from.patch new file mode 100644 index 0000000..e9d7eba --- /dev/null +++ b/patches-6.6/034-56-v6.10-drm-panthor-Don-t-return-NULL-from.patch @@ -0,0 +1,58 @@ +From 45c734fdd43db14444025910b4c59dd2b8be714a Mon Sep 17 00:00:00 2001 +From: Harshit Mogalapalli +Date: Tue, 2 Apr 2024 07:14:11 -0700 +Subject: [PATCH] drm/panthor: Don't return NULL from + panthor_vm_get_heap_pool() + +The kernel doc says this function returns either a valid pointer +or an ERR_PTR(), but in practice this function can return NULL if +create=false. Fix the function to match the doc (return +ERR_PTR(-ENOENT) instead of NULL) and adjust all call-sites +accordingly. + +Fixes: 4bdca1150792 ("drm/panthor: Add the driver frontend block") +Signed-off-by: Harshit Mogalapalli +Reviewed-by: Boris Brezillon +Signed-off-by: Boris Brezillon +Link: https://patchwork.freedesktop.org/patch/msgid/20240402141412.1707949-1-harshit.m.mogalapalli@oracle.com +--- + drivers/gpu/drm/panthor/panthor_drv.c | 4 ++-- + drivers/gpu/drm/panthor/panthor_mmu.c | 2 ++ + drivers/gpu/drm/panthor/panthor_sched.c | 2 +- + 3 files changed, 5 insertions(+), 3 deletions(-) + +--- a/drivers/gpu/drm/panthor/panthor_drv.c ++++ b/drivers/gpu/drm/panthor/panthor_drv.c +@@ -1089,8 +1089,8 @@ static int panthor_ioctl_tiler_heap_dest + return -EINVAL; + + pool = panthor_vm_get_heap_pool(vm, false); +- if (!pool) { +- ret = -EINVAL; ++ if (IS_ERR(pool)) { ++ ret = PTR_ERR(pool); + goto out_put_vm; + } + +--- a/drivers/gpu/drm/panthor/panthor_mmu.c ++++ b/drivers/gpu/drm/panthor/panthor_mmu.c +@@ -1897,6 +1897,8 @@ struct panthor_heap_pool *panthor_vm_get + vm->heaps.pool = panthor_heap_pool_get(pool); + } else { + pool = panthor_heap_pool_get(vm->heaps.pool); ++ if (!pool) ++ pool = ERR_PTR(-ENOENT); + } + mutex_unlock(&vm->heaps.lock); + +--- a/drivers/gpu/drm/panthor/panthor_sched.c ++++ b/drivers/gpu/drm/panthor/panthor_sched.c +@@ -1343,7 +1343,7 @@ static int group_process_tiler_oom(struc + if (unlikely(csg_id < 0)) + return 0; + +- if (!heaps || frag_end > vt_end || vt_end >= vt_start) { ++ if (IS_ERR(heaps) || frag_end > vt_end || vt_end >= vt_start) { + ret = -EINVAL; + } else { + /* We do the allocation without holding the scheduler lock to avoid diff --git a/patches-6.6/034-57-v6.10-drm-panthor-Fix-NULL-vs-IS_ERR-bug-in-panthor_probe.patch b/patches-6.6/034-57-v6.10-drm-panthor-Fix-NULL-vs-IS_ERR-bug-in-panthor_probe.patch new file mode 100644 index 0000000..6bb8516 --- /dev/null +++ b/patches-6.6/034-57-v6.10-drm-panthor-Fix-NULL-vs-IS_ERR-bug-in-panthor_probe.patch @@ -0,0 +1,28 @@ +From 6e0718f21feda0ed97f932cee39b676817e457f2 Mon Sep 17 00:00:00 2001 +From: Harshit Mogalapalli +Date: Tue, 2 Apr 2024 03:40:40 -0700 +Subject: [PATCH] drm/panthor: Fix NULL vs IS_ERR() bug in panthor_probe() + +The devm_drm_dev_alloc() function returns error pointers. +Update the error handling to check for error pointers instead of NULL. + +Fixes: 4bdca1150792 ("drm/panthor: Add the driver frontend block") +Signed-off-by: Harshit Mogalapalli +Reviewed-by: Boris Brezillon +Signed-off-by: Boris Brezillon +Link: https://patchwork.freedesktop.org/patch/msgid/20240402104041.1689951-1-harshit.m.mogalapalli@oracle.com +--- + drivers/gpu/drm/panthor/panthor_drv.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/panthor/panthor_drv.c ++++ b/drivers/gpu/drm/panthor/panthor_drv.c +@@ -1399,7 +1399,7 @@ static int panthor_probe(struct platform + + ptdev = devm_drm_dev_alloc(&pdev->dev, &panthor_drm_driver, + struct panthor_device, base); +- if (!ptdev) ++ if (IS_ERR(ptdev)) + return -ENOMEM; + + platform_set_drvdata(pdev, ptdev); diff --git a/patches-6.6/034-58-v6.10-drm-panthor-clean-up-some-types-in-panthor_sched_suspend.patch b/patches-6.6/034-58-v6.10-drm-panthor-clean-up-some-types-in-panthor_sched_suspend.patch new file mode 100644 index 0000000..b90fcef --- /dev/null +++ b/patches-6.6/034-58-v6.10-drm-panthor-clean-up-some-types-in-panthor_sched_suspend.patch @@ -0,0 +1,34 @@ +From a9b7dfd1d1f96be3a3f92128e9d78719a8d65939 Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Mon, 8 Apr 2024 10:36:35 +0300 +Subject: [PATCH] drm/panthor: clean up some types in panthor_sched_suspend() + +These variables should be u32 instead of u64 because they're only +storing u32 values. Also static checkers complain when we do: + + suspended_slots &= ~upd_ctx.timedout_mask; + +In this code "suspended_slots" is a u64 and "upd_ctx.timedout_mask". The +mask clears out the top 32 bits which would likely be a bug if anything +were stored there. + +Signed-off-by: Dan Carpenter +Reviewed-by: Steven Price +Reviewed-by: Boris Brezillon +Signed-off-by: Boris Brezillon +Link: https://patchwork.freedesktop.org/patch/msgid/85356b15-4840-4e64-8c75-922cdd6a5fef@moroto.mountain +--- + drivers/gpu/drm/panthor/panthor_sched.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/panthor/panthor_sched.c ++++ b/drivers/gpu/drm/panthor/panthor_sched.c +@@ -2546,7 +2546,7 @@ void panthor_sched_suspend(struct pantho + { + struct panthor_scheduler *sched = ptdev->scheduler; + struct panthor_csg_slots_upd_ctx upd_ctx; +- u64 suspended_slots, faulty_slots; ++ u32 suspended_slots, faulty_slots; + struct panthor_group *group; + u32 i; + diff --git a/patches-6.6/111-irqchip-gic-v3-add-hackaround-for-rk3568-its.patch b/patches-6.6/111-irqchip-gic-v3-add-hackaround-for-rk3568-its.patch index b1e7cd5..cf17e26 100644 --- a/patches-6.6/111-irqchip-gic-v3-add-hackaround-for-rk3568-its.patch +++ b/patches-6.6/111-irqchip-gic-v3-add-hackaround-for-rk3568-its.patch @@ -17,7 +17,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its #define ITS_FLAGS_FORCE_NON_SHAREABLE (1ULL << 3) #define RD_LOCAL_LPI_ENABLED BIT(0) -@@ -2206,6 +2207,11 @@ static struct page *its_allocate_prop_ta +@@ -2186,6 +2187,11 @@ static struct page *its_allocate_prop_ta { struct page *prop_page; @@ -29,7 +29,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its prop_page = alloc_pages(gfp_flags, get_order(LPI_PROPBASE_SZ)); if (!prop_page) return NULL; -@@ -2329,6 +2335,7 @@ static int its_setup_baser(struct its_no +@@ -2309,6 +2315,7 @@ static int its_setup_baser(struct its_no u32 alloc_pages, psz; struct page *page; void *base; @@ -37,7 +37,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its psz = baser->psz; alloc_pages = (PAGE_ORDER_TO_SIZE(order) / psz); -@@ -2340,7 +2347,10 @@ static int its_setup_baser(struct its_no +@@ -2320,7 +2327,10 @@ static int its_setup_baser(struct its_no order = get_order(GITS_BASER_PAGES_MAX * psz); } @@ -49,7 +49,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its if (!page) return -ENOMEM; -@@ -2390,6 +2400,13 @@ retry_baser: +@@ -2370,6 +2380,13 @@ retry_baser: its_write_baser(its, baser, val); tmp = baser->val; @@ -63,7 +63,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its if ((val ^ tmp) & GITS_BASER_SHAREABILITY_MASK) { /* * Shareability didn't stick. Just use -@@ -2980,6 +2997,10 @@ static struct page *its_allocate_pending +@@ -2960,6 +2977,10 @@ static struct page *its_allocate_pending { struct page *pend_page; @@ -74,7 +74,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its pend_page = alloc_pages(gfp_flags | __GFP_ZERO, get_order(LPI_PENDBASE_SZ)); if (!pend_page) -@@ -3135,6 +3156,9 @@ static void its_cpu_init_lpis(void) +@@ -3115,6 +3136,9 @@ static void its_cpu_init_lpis(void) gicr_write_propbaser(val, rbase + GICR_PROPBASER); tmp = gicr_read_propbaser(rbase + GICR_PROPBASER); @@ -84,7 +84,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its if (!rdists_support_shareable()) tmp &= ~GICR_PROPBASER_SHAREABILITY_MASK; -@@ -3162,6 +3186,9 @@ static void its_cpu_init_lpis(void) +@@ -3142,6 +3166,9 @@ static void its_cpu_init_lpis(void) gicr_write_pendbaser(val, rbase + GICR_PENDBASER); tmp = gicr_read_pendbaser(rbase + GICR_PENDBASER); @@ -94,7 +94,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its if (!rdists_support_shareable()) tmp &= ~GICR_PENDBASER_SHAREABILITY_MASK; -@@ -3328,7 +3355,12 @@ static bool its_alloc_table_entry(struct +@@ -3308,7 +3335,12 @@ static bool its_alloc_table_entry(struct /* Allocate memory for 2nd level table */ if (!table[idx]) { @@ -108,7 +108,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its get_order(baser->psz)); if (!page) return false; -@@ -3417,6 +3449,7 @@ static struct its_device *its_create_dev +@@ -3397,6 +3429,7 @@ static struct its_device *its_create_dev int nr_lpis; int nr_ites; int sz; @@ -116,7 +116,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its if (!its_alloc_device_table(its, dev_id)) return NULL; -@@ -3424,7 +3457,11 @@ static struct its_device *its_create_dev +@@ -3404,7 +3437,11 @@ static struct its_device *its_create_dev if (WARN_ON(!is_power_of_2(nvecs))) nvecs = roundup_pow_of_two(nvecs); @@ -129,7 +129,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its /* * Even if the device wants a single LPI, the ITT must be * sized as a power of two (and you need at least one bit...). -@@ -3432,7 +3469,7 @@ static struct its_device *its_create_dev +@@ -3412,7 +3449,7 @@ static struct its_device *its_create_dev nr_ites = max(2, nvecs); sz = nr_ites * (FIELD_GET(GITS_TYPER_ITT_ENTRY_SIZE, its->typer) + 1); sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1; @@ -138,7 +138,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its if (alloc_lpis) { lpi_map = its_lpi_alloc(nvecs, &lpi_base, &nr_lpis); if (lpi_map) -@@ -4774,6 +4811,13 @@ static bool its_set_non_coherent(void *d +@@ -4754,6 +4791,13 @@ static bool its_set_non_coherent(void *d return true; } @@ -152,7 +152,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its static const struct gic_quirk its_quirks[] = { #ifdef CONFIG_CAVIUM_ERRATUM_22375 { -@@ -4828,6 +4872,14 @@ static const struct gic_quirk its_quirks +@@ -4808,6 +4852,14 @@ static const struct gic_quirk its_quirks .init = its_enable_rk3588001, }, #endif @@ -167,7 +167,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its { .desc = "ITS: non-coherent attribute", .property = "dma-noncoherent", -@@ -5089,6 +5141,7 @@ static int __init its_probe_one(struct i +@@ -5069,6 +5121,7 @@ static int __init its_probe_one(struct i struct page *page; u32 ctlr; int err; @@ -175,7 +175,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its its_enable_quirks(its); -@@ -5122,7 +5175,9 @@ static int __init its_probe_one(struct i +@@ -5102,7 +5155,9 @@ static int __init its_probe_one(struct i } } @@ -186,7 +186,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its get_order(ITS_CMD_QUEUE_SZ)); if (!page) { err = -ENOMEM; -@@ -5148,6 +5203,9 @@ static int __init its_probe_one(struct i +@@ -5128,6 +5183,9 @@ static int __init its_probe_one(struct i gits_write_cbaser(baser, its->base + GITS_CBASER); tmp = gits_read_cbaser(its->base + GITS_CBASER);