From ae3c1c2368ba07f621c3e60984f6d61f2800b2e1 Mon Sep 17 00:00:00 2001
From: sbwml <admin@cooluc.com>
Date: Sun, 7 Jul 2024 21:39:51 +0800
Subject: [PATCH] rockchip: backport panthor gpu driver

* rk3588

Signed-off-by: sbwml <admin@cooluc.com>
---
 armv8/config-6.6                              |    1 -
 image/nanopi-r4s.bootscript                   |    2 +-
 image/nanopi-r5s.bootscript                   |    2 +-
 modules.mk                                    |   24 +-
 .../003-rk3568-general-patch-set.patch        |  423 --
 .../005-friendlyelec-nanopi-series.patch      |  196 -
 ...2-rk356x-add-dwc3-xhci-usb-trb-quirk.patch |   14 +-
 ...dd-Samsung-HDMI-eDP-Combo-PHY-driver.patch | 1084 ++++
 ...chip-vop-Add-rv1126-vop_lite-support.patch |   88 +
 ...chip-dsi-Add-rv1126-MIPI-DSI-support.patch |   60 +
 ...e-cleanup-helper-directly-as-destroy.patch |   71 +
 ...2-Demote-message-in-mod_supported-to.patch |   35 +
 ...ckchip-remove-redundant-of_match_ptr.patch |   53 +
 ...e-devm_platform_get_and_ioremap_reso.patch |   29 +
 ...ockchip-remove-unused-struct-in-vop2.patch |   54 +
 ...kchip-remove-NR_LAYERS-macro-on-vop2.patch |   36 +
 ...kchip-vop-fix-format-bpp-calculation.patch |   57 +
 ...ove-the-unsupported-format-of-cluste.patch |   89 +
 ...op2-Add-more-supported-10bit-formats.patch |  162 +
 ...ame-window-formats-to-show-window-ty.patch |  116 +
 ...fourcc-Add-NV20-and-NV30-YUV-formats.patch |   57 +
 ...p-vop-Add-NV15-NV20-and-NV30-support.patch |  231 +
 ...kchip-vop2-Add-NV20-and-NV30-support.patch |   67 +
 ...k3066_hdmi-Remove-useless-mode_fixup.patch |   40 +
 ..._hdmi-Switch-encoder-hooks-to-atomic.patch |   88 +
 ...rk3066_hdmi-include-drm-drm_atomic.h.patch |   43 +
 ...tput-interface-related-definition-to.patch |  189 +
 ...ckchip-vop2-Use-regcache_sync-to-fix.patch |   60 +
 ...p2-set-half_block_en-bit-in-all-mode.patch |   83 +
 ...-clear-afbc-en-and-transform-bit-for.patch |   36 +
 ...p2-Add-write-mask-for-VP-config-done.patch |   50 +
 ...ckchip-vop2-Set-YUV-RGB-overlay-mode.patch |   95 +
 ...p-vop2-set-bg-dly-and-prescan-dly-at.patch |   70 +
 ...-rockchip-vop2-rename-grf-to-sys_grf.patch |   50 +
 ...ip-vop2-Add-more-endpoint-definition.patch |   28 +
 ...rockchip-vop2-Add-support-for-rk3588.patch |  997 ++++
 ...2-rename-VOP_FEATURE_OUTPUT_10BIT-to.patch |   80 +
 ...id-use-regmap_reinit_cache-at-runtim.patch |   59 +
 ...clean-up-some-inconsistent-indenting.patch |   31 +
 ...ckchip-vop2-Drop-superfluous-include.patch |   25 +
 ...p2-Drop-unused-if_dclk_rate-variable.patch |   47 +
 ...ockchip-vop2-add-a-missing-unlock-in.patch |   31 +
 ...e-struct-drm_gpuva_manager-to-struct.patch | 5137 +++++++++++++++++
 ...7-drm-gpuvm-allow-building-as-module.patch |   78 +
 ...rm-nouveau-uvmm-rename-umgr-to-base-.patch |  208 +
 ...ce-the-drm_gpuvm-code-GPL-2.0-OR-MIT.patch |   45 +
 ...vm-convert-WARN-to-drm_WARN-variants.patch |  165 +
 ...m-gpuvm-export-drm_gpuvm_range_valid.patch |   61 +
 ...au-make-use-of-drm_gpuvm_range_valid.patch |   66 +
 ...common-dma-resv-per-struct-drm_gpuvm.patch |  205 +
 ...e-use-of-the-GPUVM-s-shared-dma-resv.patch |  140 +
 ...uvm-add-drm_gpuvm_flags-to-drm_gpuvm.patch |   98 +
 ...arately-allocate-struct-nouveau_uvmm.patch |  219 +
 ...reference-count-drm_gpuvm-structures.patch |  221 +
 ...-abstraction-for-a-VM-BO-combination.patch | 1036 ++++
 ...ck-validate-external-evicted-objects.patch | 1052 ++++
 ...veau-use-GPUVM-common-infrastructure.patch |  448 ++
 ...-get-range-of-unmap-from-a-remap-op..patch |   60 +
 ...vm-Fix-deprecated-license-identifier.patch |   41 +
 ...gpuvm-fall-back-to-drm_exec_lock_obj.patch |  142 +
 ...m_gpuvm_bo_put-report-when-the-vm_bo.patch |   59 +
 ...-GPU-scheduler-to-variable-number-of.patch |  405 ++
 ...-sched-Add-drm_sched_wqueue_-helpers.patch |  241 +
 ...-scheduler-to-use-a-work-queue-rathe.patch |  507 ++
 ...ed-Split-free_job-into-own-work-item.patch |  275 +
 ...dd-a-helper-to-queue-TDR-immediately.patch |   70 +
 ...x-from-drm_sched_wakeup_if_can_queue.patch |   70 +
 ...rm-sched-Qualify-drm_sched_wakeup-by.patch |   69 +
 ...d-implement-dynamic-job-flow-control.patch |  612 ++
 ...custom-allocators-to-pgtable-drivers.patch |  129 +
 ...c-drm-gpuvm-Prefer-u32-over-uint32_t.patch |   68 +
 .../034-30-v6.10-drm-panthor-Add-uAPI.patch   | 1024 ++++
 ...panthor-Add-GPU-register-definitions.patch |  280 +
 ...panthor-Add-the-device-logical-block.patch | 1013 ++++
 ...rm-panthor-Add-the-GPU-logical-block.patch |  593 ++
 ...10-drm-panthor-Add-GEM-logical-block.patch |  426 ++
 ...anthor-Add-the-devfreq-logical-block.patch |  356 ++
 ...panthor-Add-the-MMU-VM-logical-block.patch | 2975 ++++++++++
 ...drm-panthor-Add-the-FW-logical-block.patch | 1929 +++++++
 ...m-panthor-Add-the-heap-logical-block.patch |  696 +++
 ...thor-Add-the-scheduler-logical-block.patch | 3647 ++++++++++++
 ...anthor-Add-the-driver-frontend-block.patch | 1534 +++++
 ...drm-panthor-Allow-driver-compilation.patch |  104 +
 ...anthor-Fix-panthor_devfreq-kerneldoc.patch |   27 +
 ...xplicitly-include-mm-h-for-the-virt-.patch |   31 +
 ...drm-panthor-Fix-the-CONFIG_PM_n-case.patch |   75 +
 ...10-drm-panthor-Don-t-use-virt_to_pfn.patch |   74 +
 ...spelling-mistake-readyness-readiness.patch |   26 +
 ...IO-page-mmap-for-32-bit-userspace-on.patch |  142 +
 ...panthor-Fix-ordering-in-_irq_suspend.patch |   55 +
 ...-Drop-the-dev_enter-exit-sections-in.patch |   56 +
 ...ix-clang-Wunused-but-set-variable-in.patch |   46 +
 ...thor-Cleanup-unused-variable-cookie-.patch |   40 +
 ...-panthor-Fix-some-kerneldoc-warnings.patch |   58 +
 ...thor-Fix-a-couple-ENOMEM-error-codes.patch |   42 +
 ...r-Fix-error-code-in-panthor_gpu_init.patch |   33 +
 ...ff-by-one-in-panthor_fw_get_cs_iface.patch |   28 +
 ...0-drm-panthor-Don-t-return-NULL-from.patch |   58 +
 ...-NULL-vs-IS_ERR-bug-in-panthor_probe.patch |   28 +
 ...-some-types-in-panthor_sched_suspend.patch |   34 +
 ...gic-v3-add-hackaround-for-rk3568-its.patch |   32 +-
 101 files changed, 31785 insertions(+), 657 deletions(-)
 delete mode 100644 patches-6.6/003-rk3568-general-patch-set.patch
 delete mode 100644 patches-6.6/005-friendlyelec-nanopi-series.patch
 create mode 100644 patches-6.6/032-01-v6.9-phy-rockchip-Add-Samsung-HDMI-eDP-Combo-PHY-driver.patch
 create mode 100644 patches-6.6/033-01-v6.7-drm-rockchip-vop-Add-rv1126-vop_lite-support.patch
 create mode 100644 patches-6.6/033-02-v6.7-drm-rockchip-dsi-Add-rv1126-MIPI-DSI-support.patch
 create mode 100644 patches-6.6/033-03-v6.7-drm-rockchip-vop-Use-cleanup-helper-directly-as-destroy.patch
 create mode 100644 patches-6.6/033-04-v6.7-drm-rockchip-vop2-Demote-message-in-mod_supported-to.patch
 create mode 100644 patches-6.6/033-05-v6.7-drm-rockchip-remove-redundant-of_match_ptr.patch
 create mode 100644 patches-6.6/033-06-v6.7-drm-rockchip-dsi-Use-devm_platform_get_and_ioremap_reso.patch
 create mode 100644 patches-6.6/033-07-v6.7-drm-rockchip-remove-unused-struct-in-vop2.patch
 create mode 100644 patches-6.6/033-08-v6.7-drm-rockchip-remove-NR_LAYERS-macro-on-vop2.patch
 create mode 100644 patches-6.6/033-09-v6.7-drm-rockchip-vop-fix-format-bpp-calculation.patch
 create mode 100644 patches-6.6/033-10-v6.7-drm-rockchip-vop2-remove-the-unsupported-format-of-cluste.patch
 create mode 100644 patches-6.6/033-11-v6.7-drm-rockchip-vop2-Add-more-supported-10bit-formats.patch
 create mode 100644 patches-6.6/033-12-v6.7-drm-rockchip-vop2-rename-window-formats-to-show-window-ty.patch
 create mode 100644 patches-6.6/033-13-v6.7-drm-fourcc-Add-NV20-and-NV30-YUV-formats.patch
 create mode 100644 patches-6.6/033-14-v6.7-drm-rockchip-vop-Add-NV15-NV20-and-NV30-support.patch
 create mode 100644 patches-6.6/033-15-v6.8-drm-rockchip-vop2-Add-NV20-and-NV30-support.patch
 create mode 100644 patches-6.6/033-16-v6.8-drm-rockchip-rk3066_hdmi-Remove-useless-mode_fixup.patch
 create mode 100644 patches-6.6/033-17-v6.8-drm-rockchip-rk3066_hdmi-Switch-encoder-hooks-to-atomic.patch
 create mode 100644 patches-6.6/033-18-v6.8-drm-rockchip-rk3066_hdmi-include-drm-drm_atomic.h.patch
 create mode 100644 patches-6.6/033-19-v6.8-drm-rockchip-move-output-interface-related-definition-to.patch
 create mode 100644 patches-6.6/033-20-v6.8-Revert-drm-rockchip-vop2-Use-regcache_sync-to-fix.patch
 create mode 100644 patches-6.6/033-21-v6.8-drm-rockchip-vop2-set-half_block_en-bit-in-all-mode.patch
 create mode 100644 patches-6.6/033-22-v6.8-drm-rockchip-vop2-clear-afbc-en-and-transform-bit-for.patch
 create mode 100644 patches-6.6/033-23-v6.8-drm-rockchip-vop2-Add-write-mask-for-VP-config-done.patch
 create mode 100644 patches-6.6/033-24-v6.8-drm-rockchip-vop2-Set-YUV-RGB-overlay-mode.patch
 create mode 100644 patches-6.6/033-25-v6.8-drm-rockchip-vop2-set-bg-dly-and-prescan-dly-at.patch
 create mode 100644 patches-6.6/033-26-v6.8-drm-rockchip-vop2-rename-grf-to-sys_grf.patch
 create mode 100644 patches-6.6/033-27-v6.8-dt-bindings-rockchip-vop2-Add-more-endpoint-definition.patch
 create mode 100644 patches-6.6/033-28-v6.8-drm-rockchip-vop2-Add-support-for-rk3588.patch
 create mode 100644 patches-6.6/033-29-v6.8-drm-rockchip-vop2-rename-VOP_FEATURE_OUTPUT_10BIT-to.patch
 create mode 100644 patches-6.6/033-30-v6.8-drm-rockchip-vop2-Avoid-use-regmap_reinit_cache-at-runtim.patch
 create mode 100644 patches-6.6/033-31-v6.8-drm-rockchip-vop2-clean-up-some-inconsistent-indenting.patch
 create mode 100644 patches-6.6/033-32-v6.8-drm-rockchip-vop2-Drop-superfluous-include.patch
 create mode 100644 patches-6.6/033-33-v6.8-drm-rockchip-vop2-Drop-unused-if_dclk_rate-variable.patch
 create mode 100644 patches-6.6/033-34-v6.8-drm-rockchip-vop2-add-a-missing-unlock-in.patch
 create mode 100644 patches-6.6/034-01-v6.7-drm-gpuvm-rename-struct-drm_gpuva_manager-to-struct.patch
 create mode 100644 patches-6.6/034-02-v6.7-drm-gpuvm-allow-building-as-module.patch
 create mode 100644 patches-6.6/034-03-v6.7-drm-nouveau-uvmm-rename-umgr-to-base-.patch
 create mode 100644 patches-6.6/034-04-v6.7-drm-gpuvm-Dual-licence-the-drm_gpuvm-code-GPL-2.0-OR-MIT.patch
 create mode 100644 patches-6.6/034-05-v6.8-drm-gpuvm-convert-WARN-to-drm_WARN-variants.patch
 create mode 100644 patches-6.6/034-06-v6.8-drm-gpuvm-export-drm_gpuvm_range_valid.patch
 create mode 100644 patches-6.6/034-07-v6.8-drm-nouveau-make-use-of-drm_gpuvm_range_valid.patch
 create mode 100644 patches-6.6/034-08-v6.8-drm-gpuvm-add-common-dma-resv-per-struct-drm_gpuvm.patch
 create mode 100644 patches-6.6/034-09-v6.8-drm-nouveau-make-use-of-the-GPUVM-s-shared-dma-resv.patch
 create mode 100644 patches-6.6/034-10-v6.8-drm-gpuvm-add-drm_gpuvm_flags-to-drm_gpuvm.patch
 create mode 100644 patches-6.6/034-11-v6.8-drm-nouveau-separately-allocate-struct-nouveau_uvmm.patch
 create mode 100644 patches-6.6/034-12-v6.8-drm-gpuvm-reference-count-drm_gpuvm-structures.patch
 create mode 100644 patches-6.6/034-13-v6.8-drm-gpuvm-add-an-abstraction-for-a-VM-BO-combination.patch
 create mode 100644 patches-6.6/034-14-v6.8-drm-gpuvm-track-lock-validate-external-evicted-objects.patch
 create mode 100644 patches-6.6/034-15-v6.8-drm-nouveau-use-GPUVM-common-infrastructure.patch
 create mode 100644 patches-6.6/034-16-v6.8-drm-gpuvm-Helper-to-get-range-of-unmap-from-a-remap-op..patch
 create mode 100644 patches-6.6/034-17-v6.8-drm-gpuvm-Fix-deprecated-license-identifier.patch
 create mode 100644 patches-6.6/034-18-v6.8-drm-gpuvm-fall-back-to-drm_exec_lock_obj.patch
 create mode 100644 patches-6.6/034-19-v6.8-drm-gpuvm-Let-drm_gpuvm_bo_put-report-when-the-vm_bo.patch
 create mode 100644 patches-6.6/034-20-v6.7-drm-sched-Convert-the-GPU-scheduler-to-variable-number-of.patch
 create mode 100644 patches-6.6/034-21-v6.8-drm-sched-Add-drm_sched_wqueue_-helpers.patch
 create mode 100644 patches-6.6/034-22-v6.8-drm-sched-Convert-drm-scheduler-to-use-a-work-queue-rathe.patch
 create mode 100644 patches-6.6/034-23-v6.8-drm-sched-Split-free_job-into-own-work-item.patch
 create mode 100644 patches-6.6/034-24-v6.8-drm-sched-Add-a-helper-to-queue-TDR-immediately.patch
 create mode 100644 patches-6.6/034-25-v6.8-drm-sched-Drop-suffix-from-drm_sched_wakeup_if_can_queue.patch
 create mode 100644 patches-6.6/034-26-v6.8-drm-sched-Qualify-drm_sched_wakeup-by.patch
 create mode 100644 patches-6.6/034-27-v6.8-drm-sched-implement-dynamic-job-flow-control.patch
 create mode 100644 patches-6.6/034-28-v6.8-iommu-Allow-passing-custom-allocators-to-pgtable-drivers.patch
 create mode 100644 patches-6.6/034-29-v6.9-drm-exec-drm-gpuvm-Prefer-u32-over-uint32_t.patch
 create mode 100644 patches-6.6/034-30-v6.10-drm-panthor-Add-uAPI.patch
 create mode 100644 patches-6.6/034-31-v6.10-drm-panthor-Add-GPU-register-definitions.patch
 create mode 100644 patches-6.6/034-32-v6.10-drm-panthor-Add-the-device-logical-block.patch
 create mode 100644 patches-6.6/034-33-v6.10-drm-panthor-Add-the-GPU-logical-block.patch
 create mode 100644 patches-6.6/034-34-v6.10-drm-panthor-Add-GEM-logical-block.patch
 create mode 100644 patches-6.6/034-35-v6.10-drm-panthor-Add-the-devfreq-logical-block.patch
 create mode 100644 patches-6.6/034-36-v6.10-drm-panthor-Add-the-MMU-VM-logical-block.patch
 create mode 100644 patches-6.6/034-37-v6.10-drm-panthor-Add-the-FW-logical-block.patch
 create mode 100644 patches-6.6/034-38-v6.10-drm-panthor-Add-the-heap-logical-block.patch
 create mode 100644 patches-6.6/034-39-v6.10-drm-panthor-Add-the-scheduler-logical-block.patch
 create mode 100644 patches-6.6/034-40-v6.10-drm-panthor-Add-the-driver-frontend-block.patch
 create mode 100644 patches-6.6/034-41-v6.10-drm-panthor-Allow-driver-compilation.patch
 create mode 100644 patches-6.6/034-42-v6.10-drm-panthor-Fix-panthor_devfreq-kerneldoc.patch
 create mode 100644 patches-6.6/034-43-v6.10-drm-panthor-Explicitly-include-mm-h-for-the-virt-.patch
 create mode 100644 patches-6.6/034-44-v6.10-drm-panthor-Fix-the-CONFIG_PM_n-case.patch
 create mode 100644 patches-6.6/034-45-v6.10-drm-panthor-Don-t-use-virt_to_pfn.patch
 create mode 100644 patches-6.6/034-46-v6.10-drm-panthor-Fix-spelling-mistake-readyness-readiness.patch
 create mode 100644 patches-6.6/034-47-v6.10-drm-panthor-Fix-IO-page-mmap-for-32-bit-userspace-on.patch
 create mode 100644 patches-6.6/034-48-v6.10-drm-panthor-Fix-ordering-in-_irq_suspend.patch
 create mode 100644 patches-6.6/034-49-v6.10-drm-panthor-Drop-the-dev_enter-exit-sections-in.patch
 create mode 100644 patches-6.6/034-50-v6.10-drm-panthor-Fix-clang-Wunused-but-set-variable-in.patch
 create mode 100644 patches-6.6/034-51-v6.10-drm-panthor-Cleanup-unused-variable-cookie-.patch
 create mode 100644 patches-6.6/034-52-v6.10-drm-panthor-Fix-some-kerneldoc-warnings.patch
 create mode 100644 patches-6.6/034-53-v6.10-drm-panthor-Fix-a-couple-ENOMEM-error-codes.patch
 create mode 100644 patches-6.6/034-54-v6.10-drm-panthor-Fix-error-code-in-panthor_gpu_init.patch
 create mode 100644 patches-6.6/034-55-v6.10-drm-panthor-Fix-off-by-one-in-panthor_fw_get_cs_iface.patch
 create mode 100644 patches-6.6/034-56-v6.10-drm-panthor-Don-t-return-NULL-from.patch
 create mode 100644 patches-6.6/034-57-v6.10-drm-panthor-Fix-NULL-vs-IS_ERR-bug-in-panthor_probe.patch
 create mode 100644 patches-6.6/034-58-v6.10-drm-panthor-clean-up-some-types-in-panthor_sched_suspend.patch

diff --git a/armv8/config-6.6 b/armv8/config-6.6
index 7fd040c..ec72bb0 100644
--- a/armv8/config-6.6
+++ b/armv8/config-6.6
@@ -765,7 +765,6 @@ CONFIG_USB_ULPI_BUS=y
 CONFIG_USB_ULPI_VIEWPORT=y
 CONFIG_USB_XHCI_HCD=y
 CONFIG_USB_XHCI_PLATFORM=y
-CONFIG_VENDOR_FRIENDLYELEC=y
 # CONFIG_VIRTIO_MENU is not set
 CONFIG_VIRTUALIZATION=y
 CONFIG_VMAP_STACK=y
diff --git a/image/nanopi-r4s.bootscript b/image/nanopi-r4s.bootscript
index 2909aee..fc67b29 100644
--- a/image/nanopi-r4s.bootscript
+++ b/image/nanopi-r4s.bootscript
@@ -1,6 +1,6 @@
 part uuid mmc ${devnum}:2 uuid
 
-setenv bootargs "console=ttyS2,1500000 earlycon=uart8250,mmio32,0xff1a0000 root=PARTUUID=${uuid} rw rootwait mitigations=off"
+setenv bootargs "coherent_pool=2M console=ttyS2,1500000 earlycon=uart8250,mmio32,0xff1a0000 root=PARTUUID=${uuid} rw rootwait mitigations=off"
 
 load mmc ${devnum}:1 ${kernel_addr_r} kernel.img
 
diff --git a/image/nanopi-r5s.bootscript b/image/nanopi-r5s.bootscript
index 0cea211..27b55c5 100644
--- a/image/nanopi-r5s.bootscript
+++ b/image/nanopi-r5s.bootscript
@@ -1,6 +1,6 @@
 part uuid mmc ${devnum}:2 uuid
 
-setenv bootargs "console=tty1 console=ttyS2,1500000 earlycon=uart8250,mmio32,0xfe660000 root=PARTUUID=${uuid} rw rootwait mitigations=off pcie_aspm=off"
+setenv bootargs "coherent_pool=2M console=tty1 console=ttyS2,1500000 earlycon=uart8250,mmio32,0xfe660000 root=PARTUUID=${uuid} rw rootwait mitigations=off pcie_aspm=off"
 
 load mmc ${devnum}:1 ${kernel_addr_r} kernel.img
 
diff --git a/modules.mk b/modules.mk
index af5de3d..d69241f 100644
--- a/modules.mk
+++ b/modules.mk
@@ -5,7 +5,8 @@
 define KernelPackage/drm-rockchip
   SUBMENU:=$(VIDEO_MENU)
   TITLE:=Rockchip DRM support
-  DEPENDS:=@TARGET_rockchip +kmod-backlight +kmod-drm-kms-helper +kmod-multimedia-input
+  DEPENDS:=@TARGET_rockchip +kmod-backlight +kmod-drm-kms-helper \
+	+kmod-multimedia-input +kmod-drm-display-helper
   KCONFIG:= \
 	CONFIG_DRM_ROCKCHIP \
 	CONFIG_DRM_LOAD_EDID_FIRMWARE=y \
@@ -14,36 +15,35 @@ define KernelPackage/drm-rockchip
 	CONFIG_DRM_BRIDGE=y \
 	CONFIG_HDMI=y \
 	CONFIG_PHY_ROCKCHIP_INNO_HDMI \
+	CONFIG_PHY_ROCKCHIP_SAMSUNG_HDPTX \
 	CONFIG_DRM_DW_HDMI \
 	CONFIG_DRM_DW_HDMI_CEC \
-	CONFIG_ROCKCHIP_VOP=y \
-	CONFIG_ROCKCHIP_VOP2=y \
 	CONFIG_ROCKCHIP_ANALOGIX_DP=n \
 	CONFIG_ROCKCHIP_CDN_DP=n \
 	CONFIG_ROCKCHIP_DW_HDMI=y \
-	CONFIG_ROCKCHIP_INNO_HDMI=y \
 	CONFIG_ROCKCHIP_DW_MIPI_DSI=y \
+	CONFIG_ROCKCHIP_INNO_HDMI=y \
 	CONFIG_ROCKCHIP_LVDS=y \
 	CONFIG_ROCKCHIP_RGB=n \
 	CONFIG_ROCKCHIP_RK3066_HDMI=n \
+	CONFIG_ROCKCHIP_VOP=y \
+	CONFIG_ROCKCHIP_VOP2=y \
 	CONFIG_DRM_DP_AUX_BUS \
-	CONFIG_DRM_DW_HDMI_GP_AUDIO=n \
+	CONFIG_DRM_GEM_DMA_HELPER \
 	CONFIG_DRM_PANEL=y \
 	CONFIG_DRM_PANEL_BRIDGE=y \
-	CONFIG_DRM_PANEL_SIMPLE \
-	CONFIG_MEDIA_CEC_RC=y
+	CONFIG_DRM_PANEL_SIMPLE
   FILES:= \
 	$(LINUX_DIR)/drivers/gpu/drm/bridge/synopsys/dw-hdmi.ko \
 	$(LINUX_DIR)/drivers/gpu/drm/bridge/synopsys/dw-hdmi-cec.ko \
 	$(LINUX_DIR)/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.ko \
-	$(LINUX_DIR)/drivers/media/cec/core/cec.ko \
 	$(LINUX_DIR)/drivers/phy/rockchip/phy-rockchip-inno-hdmi.ko \
-	$(LINUX_DIR)/drivers/gpu/drm/display/drm_dp_aux_bus.ko@le5.15 \
+	$(LINUX_DIR)/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.ko \
 	$(LINUX_DIR)/drivers/gpu/drm/drm_dma_helper.ko \
-	$(LINUX_DIR)/drivers/gpu/drm/display/drm_display_helper.ko \
 	$(LINUX_DIR)/drivers/gpu/drm/panel/panel-simple.ko \
-	$(LINUX_DIR)/drivers/gpu/drm/rockchip/rockchipdrm.ko
-  AUTOLOAD:=$(call AutoProbe,rockchipdrm phy-rockchip-inno-hdmi dw-hdmi-cec)
+	$(LINUX_DIR)/drivers/gpu/drm/rockchip/rockchipdrm.ko \
+	$(LINUX_DIR)/drivers/media/cec/core/cec.ko
+  AUTOLOAD:=$(call AutoProbe,rockchipdrm phy-rockchip-inno-hdmi phy-rockchip-samsung-hdptx dw-hdmi-cec)
 endef
 
 define KernelPackage/drm-rockchip/description
diff --git a/patches-6.6/003-rk3568-general-patch-set.patch b/patches-6.6/003-rk3568-general-patch-set.patch
deleted file mode 100644
index 71440e7..0000000
--- a/patches-6.6/003-rk3568-general-patch-set.patch
+++ /dev/null
@@ -1,423 +0,0 @@
---- a/drivers/clk/rockchip/clk-half-divider.c
-+++ b/drivers/clk/rockchip/clk-half-divider.c
-@@ -166,7 +166,7 @@ struct clk *rockchip_clk_register_halfdi
- 					  unsigned long flags,
- 					  spinlock_t *lock)
- {
--	struct clk_hw *hw = ERR_PTR(-ENOMEM);
-+	struct clk_hw *hw;
- 	struct clk_mux *mux = NULL;
- 	struct clk_gate *gate = NULL;
- 	struct clk_divider *div = NULL;
---- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
-+++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
-@@ -92,74 +92,70 @@ static struct rockchip_hdmi *to_rockchip
- 
- static const struct dw_hdmi_mpll_config rockchip_mpll_cfg[] = {
- 	{
--		27000000, {
--			{ 0x00b3, 0x0000},
--			{ 0x2153, 0x0000},
--			{ 0x40f3, 0x0000}
-+		30666000, {
-+			{ 0x00b3, 0x0000 },
-+			{ 0x2153, 0x0000 },
-+			{ 0x40f3, 0x0000 },
- 		},
- 	}, {
--		36000000, {
--			{ 0x00b3, 0x0000},
--			{ 0x2153, 0x0000},
--			{ 0x40f3, 0x0000}
-+		36800000, {
-+			{ 0x00b3, 0x0000 },
-+			{ 0x2153, 0x0000 },
-+			{ 0x40a2, 0x0001 },
- 		},
- 	}, {
--		40000000, {
--			{ 0x00b3, 0x0000},
--			{ 0x2153, 0x0000},
--			{ 0x40f3, 0x0000}
-+		46000000, {
-+			{ 0x00b3, 0x0000 },
-+			{ 0x2142, 0x0001 },
-+			{ 0x40a2, 0x0001 },
- 		},
- 	}, {
--		54000000, {
--			{ 0x0072, 0x0001},
--			{ 0x2142, 0x0001},
--			{ 0x40a2, 0x0001},
-+		61333000, {
-+			{ 0x0072, 0x0001 },
-+			{ 0x2142, 0x0001 },
-+			{ 0x40a2, 0x0001 },
- 		},
- 	}, {
--		65000000, {
--			{ 0x0072, 0x0001},
--			{ 0x2142, 0x0001},
--			{ 0x40a2, 0x0001},
-+		73600000, {
-+			{ 0x0072, 0x0001 },
-+			{ 0x2142, 0x0001 },
-+			{ 0x4061, 0x0002 },
- 		},
- 	}, {
--		66000000, {
--			{ 0x013e, 0x0003},
--			{ 0x217e, 0x0002},
--			{ 0x4061, 0x0002}
-+		92000000, {
-+			{ 0x0072, 0x0001 },
-+			{ 0x2145, 0x0002 },
-+			{ 0x4061, 0x0002 },
- 		},
- 	}, {
--		74250000, {
--			{ 0x0072, 0x0001},
--			{ 0x2145, 0x0002},
--			{ 0x4061, 0x0002}
-+		122666000, {
-+			{ 0x0051, 0x0002 },
-+			{ 0x2145, 0x0002 },
-+			{ 0x4061, 0x0002 },
- 		},
- 	}, {
--		83500000, {
--			{ 0x0072, 0x0001},
-+		147200000, {
-+			{ 0x0051, 0x0002 },
-+			{ 0x2145, 0x0002 },
-+			{ 0x4064, 0x0003 },
- 		},
- 	}, {
--		108000000, {
--			{ 0x0051, 0x0002},
--			{ 0x2145, 0x0002},
--			{ 0x4061, 0x0002}
-+		184000000, {
-+			{ 0x0051, 0x0002 },
-+			{ 0x214c, 0x0003 },
-+			{ 0x4064, 0x0003 },
- 		},
- 	}, {
--		106500000, {
--			{ 0x0051, 0x0002},
--			{ 0x2145, 0x0002},
--			{ 0x4061, 0x0002}
--		},
--	}, {
--		146250000, {
--			{ 0x0051, 0x0002},
--			{ 0x2145, 0x0002},
--			{ 0x4061, 0x0002}
-+		226666000, {
-+			{ 0x0040, 0x0003 },
-+			{ 0x214c, 0x0003 },
-+			{ 0x4064, 0x0003 },
- 		},
- 	}, {
--		148500000, {
--			{ 0x0051, 0x0003},
--			{ 0x214c, 0x0003},
--			{ 0x4064, 0x0003}
-+		272000000, {
-+			{ 0x0040, 0x0003 },
-+			{ 0x214c, 0x0003 },
-+			{ 0x5a64, 0x0003 },
- 		},
- 	}, {
- 		340000000, {
-@@ -168,10 +164,16 @@ static const struct dw_hdmi_mpll_config
- 			{ 0x5a64, 0x0003 },
- 		},
- 	}, {
-+		600000000, {
-+			{ 0x1a40, 0x0003 },
-+			{ 0x3b4c, 0x0003 },
-+			{ 0x5a64, 0x0003 },
-+ 		},
-+	}, {
- 		~0UL, {
--			{ 0x00a0, 0x000a },
--			{ 0x2001, 0x000f },
--			{ 0x4002, 0x000f },
-+			{ 0x0000, 0x0000 },
-+			{ 0x0000, 0x0000 },
-+			{ 0x0000, 0x0000 },
- 		},
- 	}
- };
-@@ -179,20 +181,6 @@ static const struct dw_hdmi_mpll_config
- static const struct dw_hdmi_curr_ctrl rockchip_cur_ctr[] = {
- 	/*      pixelclk    bpp8    bpp10   bpp12 */
- 	{
--		40000000,  { 0x0018, 0x0018, 0x0018 },
--	}, {
--		65000000,  { 0x0028, 0x0028, 0x0028 },
--	}, {
--		66000000,  { 0x0038, 0x0038, 0x0038 },
--	}, {
--		74250000,  { 0x0028, 0x0038, 0x0038 },
--	}, {
--		83500000,  { 0x0028, 0x0038, 0x0038 },
--	}, {
--		146250000, { 0x0038, 0x0038, 0x0038 },
--	}, {
--		148500000, { 0x0000, 0x0038, 0x0038 },
--	}, {
- 		600000000, { 0x0000, 0x0000, 0x0000 },
- 	}, {
- 		~0UL,      { 0x0000, 0x0000, 0x0000},
-@@ -204,6 +192,7 @@ static const struct dw_hdmi_phy_config r
- 	{ 74250000,  0x8009, 0x0004, 0x0272},
- 	{ 148500000, 0x802b, 0x0004, 0x028d},
- 	{ 297000000, 0x8039, 0x0005, 0x028d},
-+	{ 594000000, 0x8039, 0x0000, 0x019d},
- 	{ ~0UL,	     0x0000, 0x0000, 0x0000}
- };
- 
-@@ -249,42 +238,6 @@ static int rockchip_hdmi_parse_dt(struct
- 	return 0;
- }
- 
--static enum drm_mode_status
--dw_hdmi_rockchip_mode_valid(struct dw_hdmi *dw_hdmi, void *data,
--			    const struct drm_display_info *info,
--			    const struct drm_display_mode *mode)
--{
--	struct rockchip_hdmi *hdmi = data;
--	const struct dw_hdmi_mpll_config *mpll_cfg = rockchip_mpll_cfg;
--	int pclk = mode->clock * 1000;
--	bool exact_match = hdmi->plat_data->phy_force_vendor;
--	int i;
--
--	if (hdmi->ref_clk) {
--		int rpclk = clk_round_rate(hdmi->ref_clk, pclk);
--
--		if (abs(rpclk - pclk) > pclk / 1000)
--			return MODE_NOCLOCK;
--	}
--
--	for (i = 0; mpll_cfg[i].mpixelclock != (~0UL); i++) {
--		/*
--		 * For vendor specific phys force an exact match of the pixelclock
--		 * to preserve the original behaviour of the driver.
--		 */
--		if (exact_match && pclk == mpll_cfg[i].mpixelclock)
--			return MODE_OK;
--		/*
--		 * The Synopsys phy can work with pixelclocks up to the value given
--		 * in the corresponding mpll_cfg entry.
--		 */
--		if (!exact_match && pclk <= mpll_cfg[i].mpixelclock)
--			return MODE_OK;
--	}
--
--	return MODE_BAD;
--}
--
- static void dw_hdmi_rockchip_encoder_disable(struct drm_encoder *encoder)
- {
- }
-@@ -450,7 +403,6 @@ static struct rockchip_hdmi_chip_data rk
- };
- 
- static const struct dw_hdmi_plat_data rk3228_hdmi_drv_data = {
--	.mode_valid = dw_hdmi_rockchip_mode_valid,
- 	.mpll_cfg = rockchip_mpll_cfg,
- 	.cur_ctr = rockchip_cur_ctr,
- 	.phy_config = rockchip_phy_config,
-@@ -467,7 +419,6 @@ static struct rockchip_hdmi_chip_data rk
- };
- 
- static const struct dw_hdmi_plat_data rk3288_hdmi_drv_data = {
--	.mode_valid = dw_hdmi_rockchip_mode_valid,
- 	.mpll_cfg   = rockchip_mpll_cfg,
- 	.cur_ctr    = rockchip_cur_ctr,
- 	.phy_config = rockchip_phy_config,
-@@ -487,7 +438,6 @@ static struct rockchip_hdmi_chip_data rk
- };
- 
- static const struct dw_hdmi_plat_data rk3328_hdmi_drv_data = {
--	.mode_valid = dw_hdmi_rockchip_mode_valid,
- 	.mpll_cfg = rockchip_mpll_cfg,
- 	.cur_ctr = rockchip_cur_ctr,
- 	.phy_config = rockchip_phy_config,
-@@ -505,7 +455,6 @@ static struct rockchip_hdmi_chip_data rk
- };
- 
- static const struct dw_hdmi_plat_data rk3399_hdmi_drv_data = {
--	.mode_valid = dw_hdmi_rockchip_mode_valid,
- 	.mpll_cfg   = rockchip_mpll_cfg,
- 	.cur_ctr    = rockchip_cur_ctr,
- 	.phy_config = rockchip_phy_config,
-@@ -518,7 +467,6 @@ static struct rockchip_hdmi_chip_data rk
- };
- 
- static const struct dw_hdmi_plat_data rk3568_hdmi_drv_data = {
--	.mode_valid = dw_hdmi_rockchip_mode_valid,
- 	.mpll_cfg   = rockchip_mpll_cfg,
- 	.cur_ctr    = rockchip_cur_ctr,
- 	.phy_config = rockchip_phy_config,
-@@ -625,6 +573,14 @@ static int dw_hdmi_rockchip_bind(struct
- 	}
- 
- 	if (hdmi->chip_data == &rk3568_chip_data) {
-+		regmap_write(hdmi->regmap, RK3568_GRF_VO_CON1,
-+			     HIWORD_UPDATE(RK3568_HDMI_SDAIN_MSK |
-+					   RK3568_HDMI_SCLIN_MSK,
-+					   RK3568_HDMI_SDAIN_MSK |
-+					   RK3568_HDMI_SCLIN_MSK));
-+	}
-+
-+	if (hdmi->chip_data == &rk3568_chip_data) {
- 		regmap_write(hdmi->regmap, RK3568_GRF_VO_CON1,
- 			     HIWORD_UPDATE(RK3568_HDMI_SDAIN_MSK |
- 					   RK3568_HDMI_SCLIN_MSK,
---- a/drivers/usb/dwc3/dwc3-of-simple.c
-+++ b/drivers/usb/dwc3/dwc3-of-simple.c
-@@ -30,12 +30,18 @@ struct dwc3_of_simple {
- 	bool			need_reset;
- };
- 
-+struct dwc3_of_simple_data {
-+	bool			need_reset;
-+};
-+
- static int dwc3_of_simple_probe(struct platform_device *pdev)
- {
- 	struct dwc3_of_simple	*simple;
- 	struct device		*dev = &pdev->dev;
- 	struct device_node	*np = dev->of_node;
- 
-+	const struct		dwc3_of_simple_data *data = of_device_get_match_data(dev);
-+
- 	int			ret;
- 
- 	simple = devm_kzalloc(dev, sizeof(*simple), GFP_KERNEL);
-@@ -49,8 +55,8 @@ static int dwc3_of_simple_probe(struct p
- 	 * Some controllers need to toggle the usb3-otg reset before trying to
- 	 * initialize the PHY, otherwise the PHY times out.
- 	 */
--	if (of_device_is_compatible(np, "rockchip,rk3399-dwc3"))
--		simple->need_reset = true;
-+	if (data->need_reset)
-+		simple->need_reset = data->need_reset;
- 
- 	simple->resets = of_reset_control_array_get(np, false, true,
- 						    true);
-@@ -168,12 +174,34 @@ static const struct dev_pm_ops dwc3_of_s
- 			dwc3_of_simple_runtime_resume, NULL)
- };
- 
-+static const struct dwc3_of_simple_data dwc3_of_simple_data_rk3399 = {
-+	.need_reset = true,
-+};
-+
- static const struct of_device_id of_dwc3_simple_match[] = {
--	{ .compatible = "rockchip,rk3399-dwc3" },
--	{ .compatible = "sprd,sc9860-dwc3" },
--	{ .compatible = "allwinner,sun50i-h6-dwc3" },
--	{ .compatible = "hisilicon,hi3670-dwc3" },
--	{ .compatible = "intel,keembay-dwc3" },
-+	{
-+		.compatible = "allwinner,sun50i-h6-dwc3",
-+	},
-+	{
-+		.compatible = "cavium,octeon-7130-usb-uctl",
-+	},
-+	{
-+		.compatible = "hisilicon,hi3670-dwc3",
-+	},
-+	{
-+		.compatible = "intel,keembay-dwc3",
-+	},
-+	{
-+		.compatible = "rockchip,rk3399-dwc3",
-+		.data = &dwc3_of_simple_data_rk3399,
-+	},
-+	{
-+		.compatible = "rockchip,rk3568-dwc3",
-+		.data = &dwc3_of_simple_data_rk3399,
-+	},
-+	{
-+		.compatible = "sprd,sc9860-dwc3",
-+	},
- 	{ /* Sentinel */ }
- };
- MODULE_DEVICE_TABLE(of, of_dwc3_simple_match);
---- a/kernel/dma/pool.c
-+++ b/kernel/dma/pool.c
-@@ -191,11 +191,10 @@ static int __init dma_atomic_pool_init(v
- 	/*
- 	 * If coherent_pool was not used on the command line, default the pool
- 	 * sizes to 128KB per 1GB of memory, min 128KB, max MAX_ORDER.
-+	 * Use 2MiB as default pool size.
- 	 */
- 	if (!atomic_pool_size) {
--		unsigned long pages = totalram_pages() / (SZ_1G / SZ_128K);
--		pages = min_t(unsigned long, pages, MAX_ORDER_NR_PAGES);
--		atomic_pool_size = max_t(size_t, pages << PAGE_SHIFT, SZ_128K);
-+		atomic_pool_size = SZ_2M;
- 	}
- 	INIT_WORK(&atomic_pool_work, atomic_pool_work_fn);
- 
---- a/sound/soc/codecs/rt5651.c
-+++ b/sound/soc/codecs/rt5651.c
-@@ -24,6 +24,7 @@
- #include <sound/initval.h>
- #include <sound/tlv.h>
- #include <sound/jack.h>
-+#include <linux/clk.h>
- 
- #include "rl6231.h"
- #include "rt5651.h"
-@@ -1511,6 +1512,7 @@ static int rt5651_set_dai_pll(struct snd
- static int rt5651_set_bias_level(struct snd_soc_component *component,
- 			enum snd_soc_bias_level level)
- {
-+	struct rt5651_priv *rt5651 = snd_soc_component_get_drvdata(component);
- 	switch (level) {
- 	case SND_SOC_BIAS_PREPARE:
- 		if (SND_SOC_BIAS_STANDBY == snd_soc_component_get_bias_level(component)) {
-@@ -1518,6 +1520,13 @@ static int rt5651_set_bias_level(struct
- 				snd_soc_component_update_bits(component, RT5651_D_MISC,
- 						    0xc00, 0xc00);
- 		}
-+		if (!IS_ERR(rt5651->mclk)){
-+			if (snd_soc_component_get_bias_level(component) == SND_SOC_BIAS_ON) {
-+				clk_disable_unprepare(rt5651->mclk);
-+			} else {
-+				clk_prepare_enable(rt5651->mclk);
-+			}
-+		}
- 		break;
- 	case SND_SOC_BIAS_STANDBY:
- 		if (SND_SOC_BIAS_OFF == snd_soc_component_get_bias_level(component)) {
-@@ -2059,6 +2068,13 @@ static int rt5651_probe(struct snd_soc_c
- {
- 	struct rt5651_priv *rt5651 = snd_soc_component_get_drvdata(component);
- 
-+	/* Check if MCLK provided */
-+	rt5651->mclk = devm_clk_get(component->dev, "mclk");
-+	if (PTR_ERR(rt5651->mclk) == -EPROBE_DEFER){
-+		dev_err(component->dev, "unable to get mclk\n");
-+		return -EPROBE_DEFER;
-+	}
-+
- 	rt5651->component = component;
- 
- 	snd_soc_component_update_bits(component, RT5651_PWR_ANLG1,
---- a/sound/soc/codecs/rt5651.h
-+++ b/sound/soc/codecs/rt5651.h
-@@ -2097,6 +2097,7 @@ struct rt5651_priv {
- 
- 	int dmic_en;
- 	bool hp_mute;
-+	struct clk *mclk;
- };
- 
- #endif /* __RT5651_H__ */
diff --git a/patches-6.6/005-friendlyelec-nanopi-series.patch b/patches-6.6/005-friendlyelec-nanopi-series.patch
deleted file mode 100644
index 03257e2..0000000
--- a/patches-6.6/005-friendlyelec-nanopi-series.patch
+++ /dev/null
@@ -1,196 +0,0 @@
-From e134dcd7dda9048f4ac2cab96322a8a7f08a9d22 Mon Sep 17 00:00:00 2001
-From: sbwml <984419930@qq.com>
-Date: Sat, 12 Nov 2022 10:24:30 +0800
-Subject: [PATCH] friendlyelec-nanopi-series
-
----
- drivers/soc/Kconfig               |   1 +
- drivers/soc/Makefile              |   1 +
- drivers/soc/friendlyelec/Kconfig  |  11 +++
- drivers/soc/friendlyelec/Makefile |   1 +
- drivers/soc/friendlyelec/board.c  | 143 ++++++++++++++++++++++++++++++
- 5 files changed, 157 insertions(+)
- create mode 100644 drivers/soc/friendlyelec/Kconfig
- create mode 100644 drivers/soc/friendlyelec/Makefile
- create mode 100644 drivers/soc/friendlyelec/board.c
-
---- a/drivers/soc/Kconfig
-+++ b/drivers/soc/Kconfig
-@@ -31,5 +31,6 @@ source "drivers/soc/ti/Kconfig"
- source "drivers/soc/ux500/Kconfig"
- source "drivers/soc/versatile/Kconfig"
- source "drivers/soc/xilinx/Kconfig"
-+source "drivers/soc/friendlyelec/Kconfig"
- 
- endmenu
---- a/drivers/soc/Makefile
-+++ b/drivers/soc/Makefile
-@@ -35,3 +35,4 @@ obj-y				+= ti/
- obj-$(CONFIG_ARCH_U8500)	+= ux500/
- obj-$(CONFIG_PLAT_VERSATILE)	+= versatile/
- obj-y				+= xilinx/
-+obj-$(CONFIG_VENDOR_FRIENDLYELEC)	+= friendlyelec/
---- /dev/null
-+++ b/drivers/soc/friendlyelec/Kconfig
-@@ -0,0 +1,11 @@
-+#
-+# Machine drivers
-+#
-+
-+if ARCH_ROCKCHIP
-+
-+config VENDOR_FRIENDLYELEC
-+	bool "FriendlyElec board based on Rockchip SoCs"
-+	default n
-+
-+endif
---- /dev/null
-+++ b/drivers/soc/friendlyelec/Makefile
-@@ -0,0 +1 @@
-+obj-$(CONFIG_VENDOR_FRIENDLYELEC)	+= board.o
---- /dev/null
-+++ b/drivers/soc/friendlyelec/board.c
-@@ -0,0 +1,143 @@
-+/*
-+ * Copyright (C) Guangzhou FriendlyELEC Computer Tech. Co., Ltd.
-+ * (http://www.friendlyarm.com)
-+ *
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License
-+ * as published by the Free Software Foundation; either version 2
-+ * of the License, or (at your option) any later version.
-+ *
-+ * This program is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with this program; if not, you can access it online at
-+ * http://www.gnu.org/licenses/gpl-2.0.html.
-+ */
-+
-+#include <linux/kernel.h>
-+#include <linux/module.h>
-+#include <linux/init.h>
-+#include <linux/types.h>
-+#include <linux/io.h>
-+#include <linux/of.h>
-+#include <linux/of_platform.h>
-+#include <linux/platform_device.h>
-+#include <linux/slab.h>
-+#include <linux/crc32.h>
-+#include <linux/nvmem-consumer.h>
-+
-+#define BOARD_MANF "FriendlyELEC Computer Tech. Co., Ltd."
-+
-+static const char *board_mach;
-+static const char *board_name;
-+static u32 board_rev;
-+static u32 board_serial_high, board_serial_low;
-+
-+static ssize_t board_sys_info_show(struct device *dev,
-+		struct device_attribute *attr,
-+		char *buf)
-+{
-+	char *s = buf;
-+
-+	s += sprintf(s, "Hardware\t: %s\n", board_mach);
-+	s += sprintf(s, "Revision\t: %04x\n", board_rev);
-+	s += sprintf(s, "Serial\t\t: %08x%08x\n",
-+			board_serial_high, board_serial_low);
-+	s += sprintf(s, "\nModel\t\t: %s\n", board_name);
-+	s += sprintf(s, "Manufacturer\t: %s\n", BOARD_MANF);
-+
-+	return (s - buf);
-+}
-+
-+static struct device_attribute board_attr_info =
-+	__ATTR(info, S_IRUGO, board_sys_info_show, NULL);
-+
-+static int rockchip_cpuinfo_probe(struct platform_device *pdev)
-+{
-+	struct device *dev = &pdev->dev;
-+	struct nvmem_cell *cell;
-+	unsigned char *efuse_buf, buf[16];
-+	size_t len;
-+	int i;
-+
-+	cell = nvmem_cell_get(dev, "id");
-+	if (IS_ERR(cell)) {
-+		dev_err(dev, "failed to get id cell: %ld\n", PTR_ERR(cell));
-+		return PTR_ERR(cell);
-+	}
-+
-+	efuse_buf = nvmem_cell_read(cell, &len);
-+	nvmem_cell_put(cell);
-+
-+	if (len != 16) {
-+		kfree(efuse_buf);
-+		dev_err(dev, "invalid id len: %zu\n", len);
-+		return -EINVAL;
-+	}
-+
-+	for (i = 0; i < 8; i++) {
-+		buf[i] = efuse_buf[1 + (i << 1)];
-+		buf[i + 8] = efuse_buf[i << 1];
-+	}
-+
-+	kfree(efuse_buf);
-+
-+	board_serial_low = crc32(0, buf, 8);
-+	board_serial_high = crc32(board_serial_low, buf + 8, 8);
-+
-+	dev_info(dev, "Serial\t\t: %08x%08x\n",
-+		 board_serial_high, board_serial_low);
-+
-+	return 0;
-+}
-+
-+static int board_sys_probe(struct platform_device *pdev)
-+{
-+	struct device_node *np = pdev->dev.of_node;
-+	struct device_node *root;
-+
-+	root = of_find_node_by_path("/");
-+
-+	of_property_read_u32(np, "hwrev", &board_rev);
-+
-+	if (of_property_read_string(np, "machine", &board_mach))
-+		of_property_read_string(root, "compatible", &board_mach);
-+
-+	if (of_property_read_string(np, "model", &board_name))
-+		of_property_read_string(root, "model", &board_name);
-+
-+	of_node_put(root);
-+
-+	rockchip_cpuinfo_probe(pdev);
-+
-+	device_create_file(&pdev->dev, &board_attr_info);
-+
-+	return 0;
-+}
-+
-+static const struct of_device_id board_sys_of_match[] = {
-+	{ .compatible = "friendlyelec,board" },
-+	{}
-+};
-+MODULE_DEVICE_TABLE(of, board_sys_of_match);
-+
-+static struct platform_driver board_sys_driver = {
-+	.probe = board_sys_probe,
-+	.driver = {
-+		.name = "friendlyelec-board",
-+		.of_match_table = board_sys_of_match,
-+	},
-+};
-+
-+static int __init board_sys_init(void)
-+{
-+	return platform_driver_register(&board_sys_driver);
-+}
-+late_initcall(board_sys_init);
-+
-+MODULE_AUTHOR("support@friendlyarm.com");
-+MODULE_DESCRIPTION("FriendlyElec NanoPi Series Machine Driver");
-+MODULE_LICENSE("GPL v2");
diff --git a/patches-6.6/012-rk356x-add-dwc3-xhci-usb-trb-quirk.patch b/patches-6.6/012-rk356x-add-dwc3-xhci-usb-trb-quirk.patch
index ca8f2e1..a11c727 100644
--- a/patches-6.6/012-rk356x-add-dwc3-xhci-usb-trb-quirk.patch
+++ b/patches-6.6/012-rk356x-add-dwc3-xhci-usb-trb-quirk.patch
@@ -18,7 +18,7 @@
  
 --- a/drivers/usb/dwc3/core.c
 +++ b/drivers/usb/dwc3/core.c
-@@ -1560,6 +1560,8 @@ static void dwc3_get_properties(struct d
+@@ -1578,6 +1578,8 @@ static void dwc3_get_properties(struct d
  				"snps,dis-del-phy-power-chg-quirk");
  	dwc->dis_tx_ipgap_linecheck_quirk = device_property_read_bool(dev,
  				"snps,dis-tx-ipgap-linecheck-quirk");
@@ -39,7 +39,7 @@
   * @resume_hs_terminations: Set if we enable quirk for fixing improper crc
   *			generation after resume from suspend.
   * @ulpi_ext_vbus_drv: Set to confiure the upli chip to drives CPEN pin
-@@ -1332,6 +1335,7 @@ struct dwc3 {
+@@ -1333,6 +1336,7 @@ struct dwc3 {
  	unsigned		dis_u2_freeclk_exists_quirk:1;
  	unsigned		dis_del_phy_power_chg_quirk:1;
  	unsigned		dis_tx_ipgap_linecheck_quirk:1;
@@ -73,7 +73,7 @@
  	}
 --- a/drivers/usb/host/xhci-ring.c
 +++ b/drivers/usb/host/xhci-ring.c
-@@ -3606,6 +3606,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *
+@@ -3637,6 +3637,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *
  	bool more_trbs_coming = true;
  	bool need_zero_pkt = false;
  	bool first_trb = true;
@@ -81,7 +81,7 @@
  	unsigned int num_trbs;
  	unsigned int start_cycle, num_sgs = 0;
  	unsigned int enqd_len, block_len, trb_buff_len, full_len;
-@@ -3642,6 +3643,13 @@ int xhci_queue_bulk_tx(struct xhci_hcd *
+@@ -3673,6 +3674,13 @@ int xhci_queue_bulk_tx(struct xhci_hcd *
  	if (urb->transfer_flags & URB_ZERO_PACKET && urb_priv->num_tds > 1)
  		need_zero_pkt = true;
  
@@ -95,7 +95,7 @@
  	td = &urb_priv->td[0];
  
  	/*
-@@ -3670,6 +3678,13 @@ int xhci_queue_bulk_tx(struct xhci_hcd *
+@@ -3701,6 +3709,13 @@ int xhci_queue_bulk_tx(struct xhci_hcd *
  			first_trb = false;
  			if (start_cycle == 0)
  				field |= TRB_CYCLE;
@@ -109,7 +109,7 @@
  		} else
  			field |= ring->cycle_state;
  
-@@ -3678,6 +3693,8 @@ int xhci_queue_bulk_tx(struct xhci_hcd *
+@@ -3709,6 +3724,8 @@ int xhci_queue_bulk_tx(struct xhci_hcd *
  		 */
  		if (enqd_len + trb_buff_len < full_len) {
  			field |= TRB_CHAIN;
@@ -132,7 +132,7 @@
  #define TRB_MAX_BUFF_SIZE	(1 << TRB_MAX_BUFF_SHIFT)
  /* How much data is left before the 64KB boundary? */
  #define TRB_BUFF_LEN_UP_TO_BOUNDARY(addr)	(TRB_MAX_BUFF_SIZE - \
-@@ -1854,6 +1858,7 @@ struct xhci_hcd {
+@@ -1855,6 +1859,7 @@ struct xhci_hcd {
  #define XHCI_STATE_HALTED	(1 << 1)
  #define XHCI_STATE_REMOVING	(1 << 2)
  	unsigned long long	quirks;
diff --git a/patches-6.6/032-01-v6.9-phy-rockchip-Add-Samsung-HDMI-eDP-Combo-PHY-driver.patch b/patches-6.6/032-01-v6.9-phy-rockchip-Add-Samsung-HDMI-eDP-Combo-PHY-driver.patch
new file mode 100644
index 0000000..30e14cd
--- /dev/null
+++ b/patches-6.6/032-01-v6.9-phy-rockchip-Add-Samsung-HDMI-eDP-Combo-PHY-driver.patch
@@ -0,0 +1,1084 @@
+From 553be2830c5f33308483e8118de748a2c69fe593 Mon Sep 17 00:00:00 2001
+From: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
+Date: Wed, 14 Feb 2024 13:45:37 +0200
+Subject: [PATCH] phy: rockchip: Add Samsung HDMI/eDP Combo PHY driver
+
+Add driver for the HDMI/eDP TX Combo PHY found on Rockchip RK3588 SoC.
+
+The PHY is based on a Samsung IP block and supports HDMI 2.1 TMDS, FRL
+and eDP links.  The maximum data rate is 12Gbps (FRL), while the minimum
+is 250Mbps (TMDS).
+
+Only the TMDS link is currently supported.
+
+Co-developed-by: Algea Cao <algea.cao@rock-chips.com>
+Signed-off-by: Algea Cao <algea.cao@rock-chips.com>
+Tested-by: Heiko Stuebner <heiko@sntech.de>
+Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
+Link: https://lore.kernel.org/r/20240214-phy-hdptx-v4-2-e7974f46c1a7@collabora.com
+Signed-off-by: Vinod Koul <vkoul@kernel.org>
+---
+ drivers/phy/rockchip/Kconfig                  |    8 +
+ drivers/phy/rockchip/Makefile                 |    1 +
+ .../phy/rockchip/phy-rockchip-samsung-hdptx.c | 1028 +++++++++++++++++
+ 3 files changed, 1037 insertions(+)
+ create mode 100644 drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c
+
+--- a/drivers/phy/rockchip/Kconfig
++++ b/drivers/phy/rockchip/Kconfig
+@@ -83,6 +83,14 @@ config PHY_ROCKCHIP_PCIE
+ 	help
+ 	  Enable this to support the Rockchip PCIe PHY.
+ 
++config PHY_ROCKCHIP_SAMSUNG_HDPTX
++	tristate "Rockchip Samsung HDMI/eDP Combo PHY driver"
++	depends on (ARCH_ROCKCHIP || COMPILE_TEST) && OF
++	select GENERIC_PHY
++	help
++	  Enable this to support the Rockchip HDMI/eDP Combo PHY
++	  with Samsung IP block.
++
+ config PHY_ROCKCHIP_SNPS_PCIE3
+ 	tristate "Rockchip Snps PCIe3 PHY Driver"
+ 	depends on (ARCH_ROCKCHIP && OF) || COMPILE_TEST
+--- a/drivers/phy/rockchip/Makefile
++++ b/drivers/phy/rockchip/Makefile
+@@ -8,6 +8,7 @@ obj-$(CONFIG_PHY_ROCKCHIP_INNO_HDMI)	+=
+ obj-$(CONFIG_PHY_ROCKCHIP_INNO_USB2)	+= phy-rockchip-inno-usb2.o
+ obj-$(CONFIG_PHY_ROCKCHIP_NANENG_COMBO_PHY)	+= phy-rockchip-naneng-combphy.o
+ obj-$(CONFIG_PHY_ROCKCHIP_PCIE)		+= phy-rockchip-pcie.o
++obj-$(CONFIG_PHY_ROCKCHIP_SAMSUNG_HDPTX)	+= phy-rockchip-samsung-hdptx.o
+ obj-$(CONFIG_PHY_ROCKCHIP_SNPS_PCIE3)	+= phy-rockchip-snps-pcie3.o
+ obj-$(CONFIG_PHY_ROCKCHIP_TYPEC)	+= phy-rockchip-typec.o
+ obj-$(CONFIG_PHY_ROCKCHIP_USB)		+= phy-rockchip-usb.o
+--- /dev/null
++++ b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c
+@@ -0,0 +1,1028 @@
++// SPDX-License-Identifier: GPL-2.0+
++/*
++ * Copyright (c) 2021-2022 Rockchip Electronics Co., Ltd.
++ * Copyright (c) 2024 Collabora Ltd.
++ *
++ * Author: Algea Cao <algea.cao@rock-chips.com>
++ * Author: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
++ */
++#include <linux/bitfield.h>
++#include <linux/clk.h>
++#include <linux/delay.h>
++#include <linux/mfd/syscon.h>
++#include <linux/module.h>
++#include <linux/of.h>
++#include <linux/of_platform.h>
++#include <linux/phy/phy.h>
++#include <linux/platform_device.h>
++#include <linux/rational.h>
++#include <linux/regmap.h>
++#include <linux/reset.h>
++
++#define GRF_HDPTX_CON0			0x00
++#define HDPTX_I_PLL_EN			BIT(7)
++#define HDPTX_I_BIAS_EN			BIT(6)
++#define HDPTX_I_BGR_EN			BIT(5)
++#define GRF_HDPTX_STATUS		0x80
++#define HDPTX_O_PLL_LOCK_DONE		BIT(3)
++#define HDPTX_O_PHY_CLK_RDY		BIT(2)
++#define HDPTX_O_PHY_RDY			BIT(1)
++#define HDPTX_O_SB_RDY			BIT(0)
++
++#define HDTPX_REG(_n, _min, _max)				\
++	(							\
++		BUILD_BUG_ON_ZERO((0x##_n) < (0x##_min)) +	\
++		BUILD_BUG_ON_ZERO((0x##_n) > (0x##_max)) +	\
++		((0x##_n) * 4)					\
++	)
++
++#define CMN_REG(n)			HDTPX_REG(n, 0000, 00a7)
++#define SB_REG(n)			HDTPX_REG(n, 0100, 0129)
++#define LNTOP_REG(n)			HDTPX_REG(n, 0200, 0229)
++#define LANE_REG(n)			HDTPX_REG(n, 0300, 062d)
++
++/* CMN_REG(0008) */
++#define LCPLL_EN_MASK			BIT(6)
++#define LCPLL_LCVCO_MODE_EN_MASK	BIT(4)
++/* CMN_REG(001e) */
++#define LCPLL_PI_EN_MASK		BIT(5)
++#define LCPLL_100M_CLK_EN_MASK		BIT(0)
++/* CMN_REG(0025) */
++#define LCPLL_PMS_IQDIV_RSTN		BIT(4)
++/* CMN_REG(0028) */
++#define LCPLL_SDC_FRAC_EN		BIT(2)
++#define LCPLL_SDC_FRAC_RSTN		BIT(0)
++/* CMN_REG(002d) */
++#define LCPLL_SDC_N_MASK		GENMASK(3, 1)
++/* CMN_REG(002e) */
++#define LCPLL_SDC_NUMBERATOR_MASK	GENMASK(5, 0)
++/* CMN_REG(002f) */
++#define LCPLL_SDC_DENOMINATOR_MASK	GENMASK(7, 2)
++#define LCPLL_SDC_NDIV_RSTN		BIT(0)
++/* CMN_REG(003d) */
++#define ROPLL_LCVCO_EN			BIT(4)
++/* CMN_REG(004e) */
++#define ROPLL_PI_EN			BIT(5)
++/* CMN_REG(005c) */
++#define ROPLL_PMS_IQDIV_RSTN		BIT(5)
++/* CMN_REG(005e) */
++#define ROPLL_SDM_EN_MASK		BIT(6)
++#define ROPLL_SDM_FRAC_EN_RBR		BIT(3)
++#define ROPLL_SDM_FRAC_EN_HBR		BIT(2)
++#define ROPLL_SDM_FRAC_EN_HBR2		BIT(1)
++#define ROPLL_SDM_FRAC_EN_HBR3		BIT(0)
++/* CMN_REG(0064) */
++#define ROPLL_SDM_NUM_SIGN_RBR_MASK	BIT(3)
++/* CMN_REG(0069) */
++#define ROPLL_SDC_N_RBR_MASK		GENMASK(2, 0)
++/* CMN_REG(0074) */
++#define ROPLL_SDC_NDIV_RSTN		BIT(2)
++#define ROPLL_SSC_EN			BIT(0)
++/* CMN_REG(0081) */
++#define OVRD_PLL_CD_CLK_EN		BIT(8)
++#define PLL_CD_HSCLK_EAST_EN		BIT(0)
++/* CMN_REG(0086) */
++#define PLL_PCG_POSTDIV_SEL_MASK	GENMASK(7, 4)
++#define PLL_PCG_CLK_SEL_MASK		GENMASK(3, 1)
++#define PLL_PCG_CLK_EN			BIT(0)
++/* CMN_REG(0087) */
++#define PLL_FRL_MODE_EN			BIT(3)
++#define PLL_TX_HS_CLK_EN		BIT(2)
++/* CMN_REG(0089) */
++#define LCPLL_ALONE_MODE		BIT(1)
++/* CMN_REG(0097) */
++#define DIG_CLK_SEL			BIT(1)
++#define ROPLL_REF			BIT(1)
++#define LCPLL_REF			0
++/* CMN_REG(0099) */
++#define CMN_ROPLL_ALONE_MODE		BIT(2)
++#define ROPLL_ALONE_MODE		BIT(2)
++/* CMN_REG(009a) */
++#define HS_SPEED_SEL			BIT(0)
++#define DIV_10_CLOCK			BIT(0)
++/* CMN_REG(009b) */
++#define IS_SPEED_SEL			BIT(4)
++#define LINK_SYMBOL_CLOCK		BIT(4)
++#define LINK_SYMBOL_CLOCK1_2		0
++
++/* SB_REG(0102) */
++#define OVRD_SB_RXTERM_EN_MASK		BIT(5)
++#define SB_RXTERM_EN_MASK		BIT(4)
++#define ANA_SB_RXTERM_OFFSP_MASK	GENMASK(3, 0)
++/* SB_REG(0103) */
++#define ANA_SB_RXTERM_OFFSN_MASK	GENMASK(6, 3)
++#define OVRD_SB_RX_RESCAL_DONE_MASK	BIT(1)
++#define SB_RX_RESCAL_DONE_MASK		BIT(0)
++/* SB_REG(0104) */
++#define OVRD_SB_EN_MASK			BIT(5)
++#define SB_EN_MASK			BIT(4)
++/* SB_REG(0105) */
++#define OVRD_SB_EARC_CMDC_EN_MASK	BIT(6)
++#define SB_EARC_CMDC_EN_MASK		BIT(5)
++#define ANA_SB_TX_HLVL_PROG_MASK	GENMASK(2, 0)
++/* SB_REG(0106) */
++#define ANA_SB_TX_LLVL_PROG_MASK	GENMASK(6, 4)
++/* SB_REG(0109) */
++#define ANA_SB_DMRX_AFC_DIV_RATIO_MASK	GENMASK(2, 0)
++/* SB_REG(010f) */
++#define OVRD_SB_VREG_EN_MASK		BIT(7)
++#define SB_VREG_EN_MASK			BIT(6)
++#define OVRD_SB_VREG_LPF_BYPASS_MASK	BIT(5)
++#define SB_VREG_LPF_BYPASS_MASK		BIT(4)
++#define ANA_SB_VREG_GAIN_CTRL_MASK	GENMASK(3, 0)
++/* SB_REG(0110) */
++#define ANA_SB_VREG_REF_SEL_MASK	BIT(0)
++/* SB_REG(0113) */
++#define SB_RX_RCAL_OPT_CODE_MASK	GENMASK(5, 4)
++#define SB_RX_RTERM_CTRL_MASK		GENMASK(3, 0)
++/* SB_REG(0114) */
++#define SB_TG_SB_EN_DELAY_TIME_MASK	GENMASK(5, 3)
++#define SB_TG_RXTERM_EN_DELAY_TIME_MASK	GENMASK(2, 0)
++/* SB_REG(0115) */
++#define SB_READY_DELAY_TIME_MASK	GENMASK(5, 3)
++#define SB_TG_OSC_EN_DELAY_TIME_MASK	GENMASK(2, 0)
++/* SB_REG(0116) */
++#define AFC_RSTN_DELAY_TIME_MASK	GENMASK(6, 4)
++/* SB_REG(0117) */
++#define FAST_PULSE_TIME_MASK		GENMASK(3, 0)
++/* SB_REG(011b) */
++#define SB_EARC_SIG_DET_BYPASS_MASK	BIT(4)
++#define SB_AFC_TOL_MASK			GENMASK(3, 0)
++/* SB_REG(011f) */
++#define SB_PWM_AFC_CTRL_MASK		GENMASK(7, 2)
++#define SB_RCAL_RSTN_MASK		BIT(1)
++/* SB_REG(0120) */
++#define SB_EARC_EN_MASK			BIT(1)
++#define SB_EARC_AFC_EN_MASK		BIT(2)
++/* SB_REG(0123) */
++#define OVRD_SB_READY_MASK		BIT(5)
++#define SB_READY_MASK			BIT(4)
++
++/* LNTOP_REG(0200) */
++#define PROTOCOL_SEL			BIT(2)
++#define HDMI_MODE			BIT(2)
++#define HDMI_TMDS_FRL_SEL		BIT(1)
++/* LNTOP_REG(0206) */
++#define DATA_BUS_SEL			BIT(0)
++#define DATA_BUS_36_40			BIT(0)
++/* LNTOP_REG(0207) */
++#define LANE_EN				0xf
++#define ALL_LANE_EN			0xf
++
++/* LANE_REG(0312) */
++#define LN0_TX_SER_RATE_SEL_RBR		BIT(5)
++#define LN0_TX_SER_RATE_SEL_HBR		BIT(4)
++#define LN0_TX_SER_RATE_SEL_HBR2	BIT(3)
++#define LN0_TX_SER_RATE_SEL_HBR3	BIT(2)
++/* LANE_REG(0412) */
++#define LN1_TX_SER_RATE_SEL_RBR		BIT(5)
++#define LN1_TX_SER_RATE_SEL_HBR		BIT(4)
++#define LN1_TX_SER_RATE_SEL_HBR2	BIT(3)
++#define LN1_TX_SER_RATE_SEL_HBR3	BIT(2)
++/* LANE_REG(0512) */
++#define LN2_TX_SER_RATE_SEL_RBR		BIT(5)
++#define LN2_TX_SER_RATE_SEL_HBR		BIT(4)
++#define LN2_TX_SER_RATE_SEL_HBR2	BIT(3)
++#define LN2_TX_SER_RATE_SEL_HBR3	BIT(2)
++/* LANE_REG(0612) */
++#define LN3_TX_SER_RATE_SEL_RBR		BIT(5)
++#define LN3_TX_SER_RATE_SEL_HBR		BIT(4)
++#define LN3_TX_SER_RATE_SEL_HBR2	BIT(3)
++#define LN3_TX_SER_RATE_SEL_HBR3	BIT(2)
++
++struct lcpll_config {
++	u32 bit_rate;
++	u8 lcvco_mode_en;
++	u8 pi_en;
++	u8 clk_en_100m;
++	u8 pms_mdiv;
++	u8 pms_mdiv_afc;
++	u8 pms_pdiv;
++	u8 pms_refdiv;
++	u8 pms_sdiv;
++	u8 pi_cdiv_rstn;
++	u8 pi_cdiv_sel;
++	u8 sdm_en;
++	u8 sdm_rstn;
++	u8 sdc_frac_en;
++	u8 sdc_rstn;
++	u8 sdm_deno;
++	u8 sdm_num_sign;
++	u8 sdm_num;
++	u8 sdc_n;
++	u8 sdc_n2;
++	u8 sdc_num;
++	u8 sdc_deno;
++	u8 sdc_ndiv_rstn;
++	u8 ssc_en;
++	u8 ssc_fm_dev;
++	u8 ssc_fm_freq;
++	u8 ssc_clk_div_sel;
++	u8 cd_tx_ser_rate_sel;
++};
++
++struct ropll_config {
++	u32 bit_rate;
++	u8 pms_mdiv;
++	u8 pms_mdiv_afc;
++	u8 pms_pdiv;
++	u8 pms_refdiv;
++	u8 pms_sdiv;
++	u8 pms_iqdiv_rstn;
++	u8 ref_clk_sel;
++	u8 sdm_en;
++	u8 sdm_rstn;
++	u8 sdc_frac_en;
++	u8 sdc_rstn;
++	u8 sdm_clk_div;
++	u8 sdm_deno;
++	u8 sdm_num_sign;
++	u8 sdm_num;
++	u8 sdc_n;
++	u8 sdc_num;
++	u8 sdc_deno;
++	u8 sdc_ndiv_rstn;
++	u8 ssc_en;
++	u8 ssc_fm_dev;
++	u8 ssc_fm_freq;
++	u8 ssc_clk_div_sel;
++	u8 ana_cpp_ctrl;
++	u8 ana_lpf_c_sel;
++	u8 cd_tx_ser_rate_sel;
++};
++
++enum rk_hdptx_reset {
++	RST_PHY = 0,
++	RST_APB,
++	RST_INIT,
++	RST_CMN,
++	RST_LANE,
++	RST_ROPLL,
++	RST_LCPLL,
++	RST_MAX
++};
++
++struct rk_hdptx_phy {
++	struct device *dev;
++	struct regmap *regmap;
++	struct regmap *grf;
++
++	struct phy *phy;
++	struct phy_config *phy_cfg;
++	struct clk_bulk_data *clks;
++	int nr_clks;
++	struct reset_control_bulk_data rsts[RST_MAX];
++};
++
++static const struct ropll_config ropll_tmds_cfg[] = {
++	{ 5940000, 124, 124, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0,
++	  1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
++	{ 3712500, 155, 155, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0,
++	  1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
++	{ 2970000, 124, 124, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0,
++	  1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
++	{ 1620000, 135, 135, 1, 1, 3, 1, 1, 0, 1, 1, 1, 1, 4, 0, 3, 5, 5, 0x10,
++	  1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
++	{ 1856250, 155, 155, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0,
++	  1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
++	{ 1540000, 193, 193, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 193, 1, 32, 2, 1,
++	  1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
++	{ 1485000, 0x7b, 0x7b, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 4, 0, 3, 5, 5,
++	  0x10, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
++	{ 1462500, 122, 122, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 244, 1, 16, 2, 1, 1,
++	  1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
++	{ 1190000, 149, 149, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 149, 1, 16, 2, 1, 1,
++	  1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
++	{ 1065000, 89, 89, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 89, 1, 16, 1, 0, 1,
++	  1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
++	{ 1080000, 135, 135, 1, 1, 5, 1, 1, 0, 1, 0, 1, 1, 0x9, 0, 0x05, 0,
++	  0x14, 0x18, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
++	{ 855000, 214, 214, 1, 1, 11, 1, 1, 1, 1, 1, 1, 1, 214, 1, 16, 2, 1,
++	  1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
++	{ 835000, 105, 105, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 42, 1, 16, 1, 0,
++	  1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
++	{ 928125, 155, 155, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0,
++	  1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
++	{ 742500, 124, 124, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0,
++	  1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
++	{ 650000, 162, 162, 1, 1, 11, 1, 1, 1, 1, 1, 1, 1, 54, 0, 16, 4, 1,
++	  1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
++	{ 337500, 0x70, 0x70, 1, 1, 0xf, 1, 1, 1, 1, 1, 1, 1, 0x2, 0, 0x01, 5,
++	  1, 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
++	{ 400000, 100, 100, 1, 1, 11, 1, 1, 0, 1, 0, 1, 1, 0x9, 0, 0x05, 0,
++	  0x14, 0x18, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
++	{ 270000, 0x5a, 0x5a, 1, 1, 0xf, 1, 1, 0, 1, 0, 1, 1, 0x9, 0, 0x05, 0,
++	  0x14, 0x18, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
++	{ 251750, 84, 84, 1, 1, 0xf, 1, 1, 1, 1, 1, 1, 1, 168, 1, 16, 4, 1, 1,
++	  1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
++};
++
++static const struct reg_sequence rk_hdtpx_common_cmn_init_seq[] = {
++	REG_SEQ0(CMN_REG(0009), 0x0c),
++	REG_SEQ0(CMN_REG(000a), 0x83),
++	REG_SEQ0(CMN_REG(000b), 0x06),
++	REG_SEQ0(CMN_REG(000c), 0x20),
++	REG_SEQ0(CMN_REG(000d), 0xb8),
++	REG_SEQ0(CMN_REG(000e), 0x0f),
++	REG_SEQ0(CMN_REG(000f), 0x0f),
++	REG_SEQ0(CMN_REG(0010), 0x04),
++	REG_SEQ0(CMN_REG(0011), 0x00),
++	REG_SEQ0(CMN_REG(0012), 0x26),
++	REG_SEQ0(CMN_REG(0013), 0x22),
++	REG_SEQ0(CMN_REG(0014), 0x24),
++	REG_SEQ0(CMN_REG(0015), 0x77),
++	REG_SEQ0(CMN_REG(0016), 0x08),
++	REG_SEQ0(CMN_REG(0017), 0x00),
++	REG_SEQ0(CMN_REG(0018), 0x04),
++	REG_SEQ0(CMN_REG(0019), 0x48),
++	REG_SEQ0(CMN_REG(001a), 0x01),
++	REG_SEQ0(CMN_REG(001b), 0x00),
++	REG_SEQ0(CMN_REG(001c), 0x01),
++	REG_SEQ0(CMN_REG(001d), 0x64),
++	REG_SEQ0(CMN_REG(001f), 0x00),
++	REG_SEQ0(CMN_REG(0026), 0x53),
++	REG_SEQ0(CMN_REG(0029), 0x01),
++	REG_SEQ0(CMN_REG(0030), 0x00),
++	REG_SEQ0(CMN_REG(0031), 0x20),
++	REG_SEQ0(CMN_REG(0032), 0x30),
++	REG_SEQ0(CMN_REG(0033), 0x0b),
++	REG_SEQ0(CMN_REG(0034), 0x23),
++	REG_SEQ0(CMN_REG(0035), 0x00),
++	REG_SEQ0(CMN_REG(0038), 0x00),
++	REG_SEQ0(CMN_REG(0039), 0x00),
++	REG_SEQ0(CMN_REG(003a), 0x00),
++	REG_SEQ0(CMN_REG(003b), 0x00),
++	REG_SEQ0(CMN_REG(003c), 0x80),
++	REG_SEQ0(CMN_REG(003e), 0x0c),
++	REG_SEQ0(CMN_REG(003f), 0x83),
++	REG_SEQ0(CMN_REG(0040), 0x06),
++	REG_SEQ0(CMN_REG(0041), 0x20),
++	REG_SEQ0(CMN_REG(0042), 0xb8),
++	REG_SEQ0(CMN_REG(0043), 0x00),
++	REG_SEQ0(CMN_REG(0044), 0x46),
++	REG_SEQ0(CMN_REG(0045), 0x24),
++	REG_SEQ0(CMN_REG(0046), 0xff),
++	REG_SEQ0(CMN_REG(0047), 0x00),
++	REG_SEQ0(CMN_REG(0048), 0x44),
++	REG_SEQ0(CMN_REG(0049), 0xfa),
++	REG_SEQ0(CMN_REG(004a), 0x08),
++	REG_SEQ0(CMN_REG(004b), 0x00),
++	REG_SEQ0(CMN_REG(004c), 0x01),
++	REG_SEQ0(CMN_REG(004d), 0x64),
++	REG_SEQ0(CMN_REG(004e), 0x14),
++	REG_SEQ0(CMN_REG(004f), 0x00),
++	REG_SEQ0(CMN_REG(0050), 0x00),
++	REG_SEQ0(CMN_REG(005d), 0x0c),
++	REG_SEQ0(CMN_REG(005f), 0x01),
++	REG_SEQ0(CMN_REG(006b), 0x04),
++	REG_SEQ0(CMN_REG(0073), 0x30),
++	REG_SEQ0(CMN_REG(0074), 0x00),
++	REG_SEQ0(CMN_REG(0075), 0x20),
++	REG_SEQ0(CMN_REG(0076), 0x30),
++	REG_SEQ0(CMN_REG(0077), 0x08),
++	REG_SEQ0(CMN_REG(0078), 0x0c),
++	REG_SEQ0(CMN_REG(0079), 0x00),
++	REG_SEQ0(CMN_REG(007b), 0x00),
++	REG_SEQ0(CMN_REG(007c), 0x00),
++	REG_SEQ0(CMN_REG(007d), 0x00),
++	REG_SEQ0(CMN_REG(007e), 0x00),
++	REG_SEQ0(CMN_REG(007f), 0x00),
++	REG_SEQ0(CMN_REG(0080), 0x00),
++	REG_SEQ0(CMN_REG(0081), 0x09),
++	REG_SEQ0(CMN_REG(0082), 0x04),
++	REG_SEQ0(CMN_REG(0083), 0x24),
++	REG_SEQ0(CMN_REG(0084), 0x20),
++	REG_SEQ0(CMN_REG(0085), 0x03),
++	REG_SEQ0(CMN_REG(0086), 0x01),
++	REG_SEQ0(CMN_REG(0087), 0x0c),
++	REG_SEQ0(CMN_REG(008a), 0x55),
++	REG_SEQ0(CMN_REG(008b), 0x25),
++	REG_SEQ0(CMN_REG(008c), 0x2c),
++	REG_SEQ0(CMN_REG(008d), 0x22),
++	REG_SEQ0(CMN_REG(008e), 0x14),
++	REG_SEQ0(CMN_REG(008f), 0x20),
++	REG_SEQ0(CMN_REG(0090), 0x00),
++	REG_SEQ0(CMN_REG(0091), 0x00),
++	REG_SEQ0(CMN_REG(0092), 0x00),
++	REG_SEQ0(CMN_REG(0093), 0x00),
++	REG_SEQ0(CMN_REG(009a), 0x11),
++	REG_SEQ0(CMN_REG(009b), 0x10),
++};
++
++static const struct reg_sequence rk_hdtpx_tmds_cmn_init_seq[] = {
++	REG_SEQ0(CMN_REG(0008), 0x00),
++	REG_SEQ0(CMN_REG(0011), 0x01),
++	REG_SEQ0(CMN_REG(0017), 0x20),
++	REG_SEQ0(CMN_REG(001e), 0x14),
++	REG_SEQ0(CMN_REG(0020), 0x00),
++	REG_SEQ0(CMN_REG(0021), 0x00),
++	REG_SEQ0(CMN_REG(0022), 0x11),
++	REG_SEQ0(CMN_REG(0023), 0x00),
++	REG_SEQ0(CMN_REG(0024), 0x00),
++	REG_SEQ0(CMN_REG(0025), 0x53),
++	REG_SEQ0(CMN_REG(0026), 0x00),
++	REG_SEQ0(CMN_REG(0027), 0x00),
++	REG_SEQ0(CMN_REG(0028), 0x01),
++	REG_SEQ0(CMN_REG(002a), 0x00),
++	REG_SEQ0(CMN_REG(002b), 0x00),
++	REG_SEQ0(CMN_REG(002c), 0x00),
++	REG_SEQ0(CMN_REG(002d), 0x00),
++	REG_SEQ0(CMN_REG(002e), 0x04),
++	REG_SEQ0(CMN_REG(002f), 0x00),
++	REG_SEQ0(CMN_REG(0030), 0x20),
++	REG_SEQ0(CMN_REG(0031), 0x30),
++	REG_SEQ0(CMN_REG(0032), 0x0b),
++	REG_SEQ0(CMN_REG(0033), 0x23),
++	REG_SEQ0(CMN_REG(0034), 0x00),
++	REG_SEQ0(CMN_REG(003d), 0x40),
++	REG_SEQ0(CMN_REG(0042), 0x78),
++	REG_SEQ0(CMN_REG(004e), 0x34),
++	REG_SEQ0(CMN_REG(005c), 0x25),
++	REG_SEQ0(CMN_REG(005e), 0x4f),
++	REG_SEQ0(CMN_REG(0074), 0x04),
++	REG_SEQ0(CMN_REG(0081), 0x01),
++	REG_SEQ0(CMN_REG(0087), 0x04),
++	REG_SEQ0(CMN_REG(0089), 0x00),
++	REG_SEQ0(CMN_REG(0095), 0x00),
++	REG_SEQ0(CMN_REG(0097), 0x02),
++	REG_SEQ0(CMN_REG(0099), 0x04),
++	REG_SEQ0(CMN_REG(009b), 0x00),
++};
++
++static const struct reg_sequence rk_hdtpx_common_sb_init_seq[] = {
++	REG_SEQ0(SB_REG(0114), 0x00),
++	REG_SEQ0(SB_REG(0115), 0x00),
++	REG_SEQ0(SB_REG(0116), 0x00),
++	REG_SEQ0(SB_REG(0117), 0x00),
++};
++
++static const struct reg_sequence rk_hdtpx_tmds_lntop_highbr_seq[] = {
++	REG_SEQ0(LNTOP_REG(0201), 0x00),
++	REG_SEQ0(LNTOP_REG(0202), 0x00),
++	REG_SEQ0(LNTOP_REG(0203), 0x0f),
++	REG_SEQ0(LNTOP_REG(0204), 0xff),
++	REG_SEQ0(LNTOP_REG(0205), 0xff),
++};
++
++static const struct reg_sequence rk_hdtpx_tmds_lntop_lowbr_seq[] = {
++	REG_SEQ0(LNTOP_REG(0201), 0x07),
++	REG_SEQ0(LNTOP_REG(0202), 0xc1),
++	REG_SEQ0(LNTOP_REG(0203), 0xf0),
++	REG_SEQ0(LNTOP_REG(0204), 0x7c),
++	REG_SEQ0(LNTOP_REG(0205), 0x1f),
++};
++
++static const struct reg_sequence rk_hdtpx_common_lane_init_seq[] = {
++	REG_SEQ0(LANE_REG(0303), 0x0c),
++	REG_SEQ0(LANE_REG(0307), 0x20),
++	REG_SEQ0(LANE_REG(030a), 0x17),
++	REG_SEQ0(LANE_REG(030b), 0x77),
++	REG_SEQ0(LANE_REG(030c), 0x77),
++	REG_SEQ0(LANE_REG(030d), 0x77),
++	REG_SEQ0(LANE_REG(030e), 0x38),
++	REG_SEQ0(LANE_REG(0310), 0x03),
++	REG_SEQ0(LANE_REG(0311), 0x0f),
++	REG_SEQ0(LANE_REG(0316), 0x02),
++	REG_SEQ0(LANE_REG(031b), 0x01),
++	REG_SEQ0(LANE_REG(031f), 0x15),
++	REG_SEQ0(LANE_REG(0320), 0xa0),
++	REG_SEQ0(LANE_REG(0403), 0x0c),
++	REG_SEQ0(LANE_REG(0407), 0x20),
++	REG_SEQ0(LANE_REG(040a), 0x17),
++	REG_SEQ0(LANE_REG(040b), 0x77),
++	REG_SEQ0(LANE_REG(040c), 0x77),
++	REG_SEQ0(LANE_REG(040d), 0x77),
++	REG_SEQ0(LANE_REG(040e), 0x38),
++	REG_SEQ0(LANE_REG(0410), 0x03),
++	REG_SEQ0(LANE_REG(0411), 0x0f),
++	REG_SEQ0(LANE_REG(0416), 0x02),
++	REG_SEQ0(LANE_REG(041b), 0x01),
++	REG_SEQ0(LANE_REG(041f), 0x15),
++	REG_SEQ0(LANE_REG(0420), 0xa0),
++	REG_SEQ0(LANE_REG(0503), 0x0c),
++	REG_SEQ0(LANE_REG(0507), 0x20),
++	REG_SEQ0(LANE_REG(050a), 0x17),
++	REG_SEQ0(LANE_REG(050b), 0x77),
++	REG_SEQ0(LANE_REG(050c), 0x77),
++	REG_SEQ0(LANE_REG(050d), 0x77),
++	REG_SEQ0(LANE_REG(050e), 0x38),
++	REG_SEQ0(LANE_REG(0510), 0x03),
++	REG_SEQ0(LANE_REG(0511), 0x0f),
++	REG_SEQ0(LANE_REG(0516), 0x02),
++	REG_SEQ0(LANE_REG(051b), 0x01),
++	REG_SEQ0(LANE_REG(051f), 0x15),
++	REG_SEQ0(LANE_REG(0520), 0xa0),
++	REG_SEQ0(LANE_REG(0603), 0x0c),
++	REG_SEQ0(LANE_REG(0607), 0x20),
++	REG_SEQ0(LANE_REG(060a), 0x17),
++	REG_SEQ0(LANE_REG(060b), 0x77),
++	REG_SEQ0(LANE_REG(060c), 0x77),
++	REG_SEQ0(LANE_REG(060d), 0x77),
++	REG_SEQ0(LANE_REG(060e), 0x38),
++	REG_SEQ0(LANE_REG(0610), 0x03),
++	REG_SEQ0(LANE_REG(0611), 0x0f),
++	REG_SEQ0(LANE_REG(0616), 0x02),
++	REG_SEQ0(LANE_REG(061b), 0x01),
++	REG_SEQ0(LANE_REG(061f), 0x15),
++	REG_SEQ0(LANE_REG(0620), 0xa0),
++};
++
++static const struct reg_sequence rk_hdtpx_tmds_lane_init_seq[] = {
++	REG_SEQ0(LANE_REG(0312), 0x00),
++	REG_SEQ0(LANE_REG(031e), 0x00),
++	REG_SEQ0(LANE_REG(0412), 0x00),
++	REG_SEQ0(LANE_REG(041e), 0x00),
++	REG_SEQ0(LANE_REG(0512), 0x00),
++	REG_SEQ0(LANE_REG(051e), 0x00),
++	REG_SEQ0(LANE_REG(0612), 0x00),
++	REG_SEQ0(LANE_REG(061e), 0x08),
++	REG_SEQ0(LANE_REG(0303), 0x2f),
++	REG_SEQ0(LANE_REG(0403), 0x2f),
++	REG_SEQ0(LANE_REG(0503), 0x2f),
++	REG_SEQ0(LANE_REG(0603), 0x2f),
++	REG_SEQ0(LANE_REG(0305), 0x03),
++	REG_SEQ0(LANE_REG(0405), 0x03),
++	REG_SEQ0(LANE_REG(0505), 0x03),
++	REG_SEQ0(LANE_REG(0605), 0x03),
++	REG_SEQ0(LANE_REG(0306), 0x1c),
++	REG_SEQ0(LANE_REG(0406), 0x1c),
++	REG_SEQ0(LANE_REG(0506), 0x1c),
++	REG_SEQ0(LANE_REG(0606), 0x1c),
++};
++
++static bool rk_hdptx_phy_is_rw_reg(struct device *dev, unsigned int reg)
++{
++	switch (reg) {
++	case 0x0000 ... 0x029c:
++	case 0x0400 ... 0x04a4:
++	case 0x0800 ... 0x08a4:
++	case 0x0c00 ... 0x0cb4:
++	case 0x1000 ... 0x10b4:
++	case 0x1400 ... 0x14b4:
++	case 0x1800 ... 0x18b4:
++		return true;
++	}
++
++	return false;
++}
++
++static const struct regmap_config rk_hdptx_phy_regmap_config = {
++	.reg_bits = 32,
++	.reg_stride = 4,
++	.val_bits = 32,
++	.writeable_reg = rk_hdptx_phy_is_rw_reg,
++	.readable_reg = rk_hdptx_phy_is_rw_reg,
++	.fast_io = true,
++	.max_register = 0x18b4,
++};
++
++#define rk_hdptx_multi_reg_write(hdptx, seq) \
++	regmap_multi_reg_write((hdptx)->regmap, seq, ARRAY_SIZE(seq))
++
++static void rk_hdptx_pre_power_up(struct rk_hdptx_phy *hdptx)
++{
++	u32 val;
++
++	reset_control_assert(hdptx->rsts[RST_APB].rstc);
++	usleep_range(20, 25);
++	reset_control_deassert(hdptx->rsts[RST_APB].rstc);
++
++	reset_control_assert(hdptx->rsts[RST_LANE].rstc);
++	reset_control_assert(hdptx->rsts[RST_CMN].rstc);
++	reset_control_assert(hdptx->rsts[RST_INIT].rstc);
++
++	val = (HDPTX_I_PLL_EN | HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN) << 16;
++	regmap_write(hdptx->grf, GRF_HDPTX_CON0, val);
++}
++
++static int rk_hdptx_post_enable_lane(struct rk_hdptx_phy *hdptx)
++{
++	u32 val;
++	int ret;
++
++	reset_control_deassert(hdptx->rsts[RST_LANE].rstc);
++
++	val = (HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN) << 16 |
++	       HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN;
++	regmap_write(hdptx->grf, GRF_HDPTX_CON0, val);
++
++	ret = regmap_read_poll_timeout(hdptx->grf, GRF_HDPTX_STATUS, val,
++				       (val & HDPTX_O_PHY_RDY) &&
++				       (val & HDPTX_O_PLL_LOCK_DONE),
++				       100, 5000);
++	if (ret) {
++		dev_err(hdptx->dev, "Failed to get PHY lane lock: %d\n", ret);
++		return ret;
++	}
++
++	dev_dbg(hdptx->dev, "PHY lane locked\n");
++
++	return 0;
++}
++
++static int rk_hdptx_post_enable_pll(struct rk_hdptx_phy *hdptx)
++{
++	u32 val;
++	int ret;
++
++	val = (HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN) << 16 |
++	       HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN;
++	regmap_write(hdptx->grf, GRF_HDPTX_CON0, val);
++
++	usleep_range(10, 15);
++	reset_control_deassert(hdptx->rsts[RST_INIT].rstc);
++
++	usleep_range(10, 15);
++	val = HDPTX_I_PLL_EN << 16 | HDPTX_I_PLL_EN;
++	regmap_write(hdptx->grf, GRF_HDPTX_CON0, val);
++
++	usleep_range(10, 15);
++	reset_control_deassert(hdptx->rsts[RST_CMN].rstc);
++
++	ret = regmap_read_poll_timeout(hdptx->grf, GRF_HDPTX_STATUS, val,
++				       val & HDPTX_O_PHY_CLK_RDY, 20, 400);
++	if (ret) {
++		dev_err(hdptx->dev, "Failed to get PHY clk ready: %d\n", ret);
++		return ret;
++	}
++
++	dev_dbg(hdptx->dev, "PHY clk ready\n");
++
++	return 0;
++}
++
++static void rk_hdptx_phy_disable(struct rk_hdptx_phy *hdptx)
++{
++	u32 val;
++
++	/* reset phy and apb, or phy locked flag may keep 1 */
++	reset_control_assert(hdptx->rsts[RST_PHY].rstc);
++	usleep_range(20, 30);
++	reset_control_deassert(hdptx->rsts[RST_PHY].rstc);
++
++	reset_control_assert(hdptx->rsts[RST_APB].rstc);
++	usleep_range(20, 30);
++	reset_control_deassert(hdptx->rsts[RST_APB].rstc);
++
++	regmap_write(hdptx->regmap, LANE_REG(0300), 0x82);
++	regmap_write(hdptx->regmap, SB_REG(010f), 0xc1);
++	regmap_write(hdptx->regmap, SB_REG(0110), 0x1);
++	regmap_write(hdptx->regmap, LANE_REG(0301), 0x80);
++	regmap_write(hdptx->regmap, LANE_REG(0401), 0x80);
++	regmap_write(hdptx->regmap, LANE_REG(0501), 0x80);
++	regmap_write(hdptx->regmap, LANE_REG(0601), 0x80);
++
++	reset_control_assert(hdptx->rsts[RST_LANE].rstc);
++	reset_control_assert(hdptx->rsts[RST_CMN].rstc);
++	reset_control_assert(hdptx->rsts[RST_INIT].rstc);
++
++	val = (HDPTX_I_PLL_EN | HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN) << 16;
++	regmap_write(hdptx->grf, GRF_HDPTX_CON0, val);
++}
++
++static bool rk_hdptx_phy_clk_pll_calc(unsigned int data_rate,
++				      struct ropll_config *cfg)
++{
++	const unsigned int fout = data_rate / 2, fref = 24000;
++	unsigned long k = 0, lc, k_sub, lc_sub;
++	unsigned int fvco, sdc;
++	u32 mdiv, sdiv, n = 8;
++
++	if (fout > 0xfffffff)
++		return false;
++
++	for (sdiv = 16; sdiv >= 1; sdiv--) {
++		if (sdiv % 2 && sdiv != 1)
++			continue;
++
++		fvco = fout * sdiv;
++
++		if (fvco < 2000000 || fvco > 4000000)
++			continue;
++
++		mdiv = DIV_ROUND_UP(fvco, fref);
++		if (mdiv < 20 || mdiv > 255)
++			continue;
++
++		if (fref * mdiv - fvco) {
++			for (sdc = 264000; sdc <= 750000; sdc += fref)
++				if (sdc * n > fref * mdiv)
++					break;
++
++			if (sdc > 750000)
++				continue;
++
++			rational_best_approximation(fref * mdiv - fvco,
++						    sdc / 16,
++						    GENMASK(6, 0),
++						    GENMASK(7, 0),
++						    &k, &lc);
++
++			rational_best_approximation(sdc * n - fref * mdiv,
++						    sdc,
++						    GENMASK(6, 0),
++						    GENMASK(7, 0),
++						    &k_sub, &lc_sub);
++		}
++
++		break;
++	}
++
++	if (sdiv < 1)
++		return false;
++
++	if (cfg) {
++		cfg->pms_mdiv = mdiv;
++		cfg->pms_mdiv_afc = mdiv;
++		cfg->pms_pdiv = 1;
++		cfg->pms_refdiv = 1;
++		cfg->pms_sdiv = sdiv - 1;
++
++		cfg->sdm_en = k > 0 ? 1 : 0;
++		if (cfg->sdm_en) {
++			cfg->sdm_deno = lc;
++			cfg->sdm_num_sign = 1;
++			cfg->sdm_num = k;
++			cfg->sdc_n = n - 3;
++			cfg->sdc_num = k_sub;
++			cfg->sdc_deno = lc_sub;
++		}
++	}
++
++	return true;
++}
++
++static int rk_hdptx_ropll_tmds_cmn_config(struct rk_hdptx_phy *hdptx,
++					  unsigned int rate)
++{
++	const struct ropll_config *cfg = NULL;
++	struct ropll_config rc = {0};
++	int i;
++
++	for (i = 0; i < ARRAY_SIZE(ropll_tmds_cfg); i++)
++		if (rate == ropll_tmds_cfg[i].bit_rate) {
++			cfg = &ropll_tmds_cfg[i];
++			break;
++		}
++
++	if (!cfg) {
++		if (rk_hdptx_phy_clk_pll_calc(rate, &rc)) {
++			cfg = &rc;
++		} else {
++			dev_err(hdptx->dev, "%s cannot find pll cfg\n", __func__);
++			return -EINVAL;
++		}
++	}
++
++	dev_dbg(hdptx->dev, "mdiv=%u, sdiv=%u, sdm_en=%u, k_sign=%u, k=%u, lc=%u\n",
++		cfg->pms_mdiv, cfg->pms_sdiv + 1, cfg->sdm_en,
++		cfg->sdm_num_sign, cfg->sdm_num, cfg->sdm_deno);
++
++	rk_hdptx_pre_power_up(hdptx);
++
++	reset_control_assert(hdptx->rsts[RST_ROPLL].rstc);
++	usleep_range(20, 30);
++	reset_control_deassert(hdptx->rsts[RST_ROPLL].rstc);
++
++	rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_common_cmn_init_seq);
++	rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_tmds_cmn_init_seq);
++
++	regmap_write(hdptx->regmap, CMN_REG(0051), cfg->pms_mdiv);
++	regmap_write(hdptx->regmap, CMN_REG(0055), cfg->pms_mdiv_afc);
++	regmap_write(hdptx->regmap, CMN_REG(0059),
++		     (cfg->pms_pdiv << 4) | cfg->pms_refdiv);
++	regmap_write(hdptx->regmap, CMN_REG(005a), cfg->pms_sdiv << 4);
++
++	regmap_update_bits(hdptx->regmap, CMN_REG(005e), ROPLL_SDM_EN_MASK,
++			   FIELD_PREP(ROPLL_SDM_EN_MASK, cfg->sdm_en));
++	if (!cfg->sdm_en)
++		regmap_update_bits(hdptx->regmap, CMN_REG(005e), 0xf, 0);
++
++	regmap_update_bits(hdptx->regmap, CMN_REG(0064), ROPLL_SDM_NUM_SIGN_RBR_MASK,
++			   FIELD_PREP(ROPLL_SDM_NUM_SIGN_RBR_MASK, cfg->sdm_num_sign));
++
++	regmap_write(hdptx->regmap, CMN_REG(0060), cfg->sdm_deno);
++	regmap_write(hdptx->regmap, CMN_REG(0065), cfg->sdm_num);
++
++	regmap_update_bits(hdptx->regmap, CMN_REG(0069), ROPLL_SDC_N_RBR_MASK,
++			   FIELD_PREP(ROPLL_SDC_N_RBR_MASK, cfg->sdc_n));
++
++	regmap_write(hdptx->regmap, CMN_REG(006c), cfg->sdc_num);
++	regmap_write(hdptx->regmap, CMN_REG(0070), cfg->sdc_deno);
++
++	regmap_update_bits(hdptx->regmap, CMN_REG(0086), PLL_PCG_POSTDIV_SEL_MASK,
++			   FIELD_PREP(PLL_PCG_POSTDIV_SEL_MASK, cfg->pms_sdiv));
++
++	regmap_update_bits(hdptx->regmap, CMN_REG(0086), PLL_PCG_CLK_EN,
++			   PLL_PCG_CLK_EN);
++
++	return rk_hdptx_post_enable_pll(hdptx);
++}
++
++static int rk_hdptx_ropll_tmds_mode_config(struct rk_hdptx_phy *hdptx,
++					   unsigned int rate)
++{
++	u32 val;
++	int ret;
++
++	ret = regmap_read(hdptx->grf, GRF_HDPTX_STATUS, &val);
++	if (ret)
++		return ret;
++
++	if (!(val & HDPTX_O_PLL_LOCK_DONE)) {
++		ret = rk_hdptx_ropll_tmds_cmn_config(hdptx, rate);
++		if (ret)
++			return ret;
++	}
++
++	rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_common_sb_init_seq);
++
++	regmap_write(hdptx->regmap, LNTOP_REG(0200), 0x06);
++
++	if (rate >= 3400000) {
++		/* For 1/40 bitrate clk */
++		rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_tmds_lntop_highbr_seq);
++	} else {
++		/* For 1/10 bitrate clk */
++		rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_tmds_lntop_lowbr_seq);
++	}
++
++	regmap_write(hdptx->regmap, LNTOP_REG(0206), 0x07);
++	regmap_write(hdptx->regmap, LNTOP_REG(0207), 0x0f);
++
++	rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_common_lane_init_seq);
++	rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_tmds_lane_init_seq);
++
++	return rk_hdptx_post_enable_lane(hdptx);
++}
++
++static int rk_hdptx_phy_power_on(struct phy *phy)
++{
++	struct rk_hdptx_phy *hdptx = phy_get_drvdata(phy);
++	int ret, bus_width = phy_get_bus_width(hdptx->phy);
++	/*
++	 * FIXME: Temporary workaround to pass pixel_clk_rate
++	 * from the HDMI bridge driver until phy_configure_opts_hdmi
++	 * becomes available in the PHY API.
++	 */
++	unsigned int rate = bus_width & 0xfffffff;
++
++	dev_dbg(hdptx->dev, "%s bus_width=%x rate=%u\n",
++		__func__, bus_width, rate);
++
++	ret = pm_runtime_resume_and_get(hdptx->dev);
++	if (ret) {
++		dev_err(hdptx->dev, "Failed to resume phy: %d\n", ret);
++		return ret;
++	}
++
++	ret = rk_hdptx_ropll_tmds_mode_config(hdptx, rate);
++	if (ret)
++		pm_runtime_put(hdptx->dev);
++
++	return ret;
++}
++
++static int rk_hdptx_phy_power_off(struct phy *phy)
++{
++	struct rk_hdptx_phy *hdptx = phy_get_drvdata(phy);
++	u32 val;
++	int ret;
++
++	ret = regmap_read(hdptx->grf, GRF_HDPTX_STATUS, &val);
++	if (ret == 0 && (val & HDPTX_O_PLL_LOCK_DONE))
++		rk_hdptx_phy_disable(hdptx);
++
++	pm_runtime_put(hdptx->dev);
++
++	return ret;
++}
++
++static const struct phy_ops rk_hdptx_phy_ops = {
++	.power_on  = rk_hdptx_phy_power_on,
++	.power_off = rk_hdptx_phy_power_off,
++	.owner	   = THIS_MODULE,
++};
++
++static int rk_hdptx_phy_runtime_suspend(struct device *dev)
++{
++	struct rk_hdptx_phy *hdptx = dev_get_drvdata(dev);
++
++	clk_bulk_disable_unprepare(hdptx->nr_clks, hdptx->clks);
++
++	return 0;
++}
++
++static int rk_hdptx_phy_runtime_resume(struct device *dev)
++{
++	struct rk_hdptx_phy *hdptx = dev_get_drvdata(dev);
++	int ret;
++
++	ret = clk_bulk_prepare_enable(hdptx->nr_clks, hdptx->clks);
++	if (ret)
++		dev_err(hdptx->dev, "Failed to enable clocks: %d\n", ret);
++
++	return ret;
++}
++
++static int rk_hdptx_phy_probe(struct platform_device *pdev)
++{
++	struct phy_provider *phy_provider;
++	struct device *dev = &pdev->dev;
++	struct rk_hdptx_phy *hdptx;
++	void __iomem *regs;
++	int ret;
++
++	hdptx = devm_kzalloc(dev, sizeof(*hdptx), GFP_KERNEL);
++	if (!hdptx)
++		return -ENOMEM;
++
++	hdptx->dev = dev;
++
++	regs = devm_platform_ioremap_resource(pdev, 0);
++	if (IS_ERR(regs))
++		return dev_err_probe(dev, PTR_ERR(regs),
++				     "Failed to ioremap resource\n");
++
++	ret = devm_clk_bulk_get_all(dev, &hdptx->clks);
++	if (ret < 0)
++		return dev_err_probe(dev, ret, "Failed to get clocks\n");
++	if (ret == 0)
++		return dev_err_probe(dev, -EINVAL, "Missing clocks\n");
++
++	hdptx->nr_clks = ret;
++
++	hdptx->regmap = devm_regmap_init_mmio(dev, regs,
++					      &rk_hdptx_phy_regmap_config);
++	if (IS_ERR(hdptx->regmap))
++		return dev_err_probe(dev, PTR_ERR(hdptx->regmap),
++				     "Failed to init regmap\n");
++
++	hdptx->rsts[RST_PHY].id = "phy";
++	hdptx->rsts[RST_APB].id = "apb";
++	hdptx->rsts[RST_INIT].id = "init";
++	hdptx->rsts[RST_CMN].id = "cmn";
++	hdptx->rsts[RST_LANE].id = "lane";
++	hdptx->rsts[RST_ROPLL].id = "ropll";
++	hdptx->rsts[RST_LCPLL].id = "lcpll";
++
++	ret = devm_reset_control_bulk_get_exclusive(dev, RST_MAX, hdptx->rsts);
++	if (ret)
++		return dev_err_probe(dev, ret, "Failed to get resets\n");
++
++	hdptx->grf = syscon_regmap_lookup_by_phandle(dev->of_node,
++						     "rockchip,grf");
++	if (IS_ERR(hdptx->grf))
++		return dev_err_probe(dev, PTR_ERR(hdptx->grf),
++				     "Could not get GRF syscon\n");
++
++	hdptx->phy = devm_phy_create(dev, NULL, &rk_hdptx_phy_ops);
++	if (IS_ERR(hdptx->phy))
++		return dev_err_probe(dev, PTR_ERR(hdptx->phy),
++				     "Failed to create HDMI PHY\n");
++
++	platform_set_drvdata(pdev, hdptx);
++	phy_set_drvdata(hdptx->phy, hdptx);
++	phy_set_bus_width(hdptx->phy, 8);
++
++	ret = devm_pm_runtime_enable(dev);
++	if (ret)
++		return dev_err_probe(dev, ret, "Failed to enable runtime PM\n");
++
++	phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
++	if (IS_ERR(phy_provider))
++		return dev_err_probe(dev, PTR_ERR(phy_provider),
++				     "Failed to register PHY provider\n");
++
++	reset_control_deassert(hdptx->rsts[RST_APB].rstc);
++	reset_control_deassert(hdptx->rsts[RST_CMN].rstc);
++	reset_control_deassert(hdptx->rsts[RST_INIT].rstc);
++
++	return 0;
++}
++
++static const struct dev_pm_ops rk_hdptx_phy_pm_ops = {
++	RUNTIME_PM_OPS(rk_hdptx_phy_runtime_suspend,
++		       rk_hdptx_phy_runtime_resume, NULL)
++};
++
++static const struct of_device_id rk_hdptx_phy_of_match[] = {
++	{ .compatible = "rockchip,rk3588-hdptx-phy", },
++	{}
++};
++MODULE_DEVICE_TABLE(of, rk_hdptx_phy_of_match);
++
++static struct platform_driver rk_hdptx_phy_driver = {
++	.probe  = rk_hdptx_phy_probe,
++	.driver = {
++		.name = "rockchip-hdptx-phy",
++		.pm = &rk_hdptx_phy_pm_ops,
++		.of_match_table = rk_hdptx_phy_of_match,
++	},
++};
++module_platform_driver(rk_hdptx_phy_driver);
++
++MODULE_AUTHOR("Algea Cao <algea.cao@rock-chips.com>");
++MODULE_AUTHOR("Cristian Ciocaltea <cristian.ciocaltea@collabora.com>");
++MODULE_DESCRIPTION("Samsung HDMI/eDP Transmitter Combo PHY Driver");
++MODULE_LICENSE("GPL");
diff --git a/patches-6.6/033-01-v6.7-drm-rockchip-vop-Add-rv1126-vop_lite-support.patch b/patches-6.6/033-01-v6.7-drm-rockchip-vop-Add-rv1126-vop_lite-support.patch
new file mode 100644
index 0000000..b2ce0c3
--- /dev/null
+++ b/patches-6.6/033-01-v6.7-drm-rockchip-vop-Add-rv1126-vop_lite-support.patch
@@ -0,0 +1,88 @@
+From 3c3cfcb93f6e6e1cede0cdfe3ec24f16ee108929 Mon Sep 17 00:00:00 2001
+From: Jagan Teki <jagan@edgeble.ai>
+Date: Mon, 31 Jul 2023 16:30:04 +0530
+Subject: [PATCH] drm/rockchip: vop: Add rv1126 vop_lite support
+
+RV1126 VOP_LITE supports the video output processing ofMIPI DSI,
+RGB display interfaces with max output resolution of 1920x1080.
+
+Add support for rv1126 vop.
+
+Signed-off-by: Jagan Teki <jagan@edgeble.ai>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230731110012.2913742-7-jagan@edgeble.ai
+---
+ drivers/gpu/drm/rockchip/rockchip_vop_reg.c | 55 +++++++++++++++++++++
+ 1 file changed, 55 insertions(+)
+
+--- a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
++++ b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
+@@ -1120,6 +1120,59 @@ static const struct vop_data rk3328_vop
+ 	.max_output = { 4096, 2160 },
+ };
+ 
++static const struct vop_common rv1126_common = {
++	.standby = VOP_REG_SYNC(PX30_SYS_CTRL2, 0x1, 1),
++	.out_mode = VOP_REG(PX30_DSP_CTRL2, 0xf, 16),
++	.dsp_blank = VOP_REG(PX30_DSP_CTRL2, 0x1, 14),
++	.dither_down_en = VOP_REG(PX30_DSP_CTRL2, 0x1, 8),
++	.dither_down_sel = VOP_REG(PX30_DSP_CTRL2, 0x1, 7),
++	.dither_down_mode = VOP_REG(PX30_DSP_CTRL2, 0x1, 6),
++	.cfg_done = VOP_REG_SYNC(PX30_REG_CFG_DONE, 0x1, 0),
++	.dither_up = VOP_REG(PX30_DSP_CTRL2, 0x1, 2),
++	.dsp_lut_en = VOP_REG(PX30_DSP_CTRL2, 0x1, 5),
++	.gate_en = VOP_REG(PX30_DSP_CTRL2, 0x1, 0),
++};
++
++static const struct vop_modeset rv1126_modeset = {
++	.htotal_pw = VOP_REG(PX30_DSP_HTOTAL_HS_END, 0x0fff0fff, 0),
++	.hact_st_end = VOP_REG(PX30_DSP_HACT_ST_END, 0x0fff0fff, 0),
++	.vtotal_pw = VOP_REG(PX30_DSP_VTOTAL_VS_END, 0x0fff0fff, 0),
++	.vact_st_end = VOP_REG(PX30_DSP_VACT_ST_END, 0x0fff0fff, 0),
++};
++
++static const struct vop_output rv1126_output = {
++	.rgb_dclk_pol = VOP_REG(PX30_DSP_CTRL0, 0x1, 1),
++	.rgb_pin_pol = VOP_REG(PX30_DSP_CTRL0, 0x7, 2),
++	.rgb_en = VOP_REG(PX30_DSP_CTRL0, 0x1, 0),
++	.mipi_dclk_pol = VOP_REG(PX30_DSP_CTRL0, 0x1, 25),
++	.mipi_pin_pol = VOP_REG(PX30_DSP_CTRL0, 0x7, 26),
++	.mipi_en = VOP_REG(PX30_DSP_CTRL0, 0x1, 24),
++};
++
++static const struct vop_misc rv1126_misc = {
++	.global_regdone_en = VOP_REG(PX30_SYS_CTRL2, 0x1, 13),
++};
++
++static const struct vop_win_data rv1126_vop_win_data[] = {
++	{ .base = 0x00, .phy = &px30_win0_data,
++	  .type = DRM_PLANE_TYPE_OVERLAY },
++	{ .base = 0x00, .phy = &px30_win2_data,
++	  .type = DRM_PLANE_TYPE_PRIMARY },
++};
++
++static const struct vop_data rv1126_vop = {
++	.version = VOP_VERSION(2, 0xb),
++	.intr = &px30_intr,
++	.common = &rv1126_common,
++	.modeset = &rv1126_modeset,
++	.output = &rv1126_output,
++	.misc = &rv1126_misc,
++	.win = rv1126_vop_win_data,
++	.win_size = ARRAY_SIZE(rv1126_vop_win_data),
++	.max_output = { 1920, 1080 },
++	.lut_size = 1024,
++};
++
+ static const struct of_device_id vop_driver_dt_match[] = {
+ 	{ .compatible = "rockchip,rk3036-vop",
+ 	  .data = &rk3036_vop },
+@@ -1147,6 +1200,8 @@ static const struct of_device_id vop_dri
+ 	  .data = &rk3228_vop },
+ 	{ .compatible = "rockchip,rk3328-vop",
+ 	  .data = &rk3328_vop },
++	{ .compatible = "rockchip,rv1126-vop",
++	  .data = &rv1126_vop },
+ 	{},
+ };
+ MODULE_DEVICE_TABLE(of, vop_driver_dt_match);
diff --git a/patches-6.6/033-02-v6.7-drm-rockchip-dsi-Add-rv1126-MIPI-DSI-support.patch b/patches-6.6/033-02-v6.7-drm-rockchip-dsi-Add-rv1126-MIPI-DSI-support.patch
new file mode 100644
index 0000000..83571e2
--- /dev/null
+++ b/patches-6.6/033-02-v6.7-drm-rockchip-dsi-Add-rv1126-MIPI-DSI-support.patch
@@ -0,0 +1,60 @@
+From 11fdb231f4127bf60839a63a8c7ed640ebe4751a Mon Sep 17 00:00:00 2001
+From: Jagan Teki <jagan@edgeble.ai>
+Date: Mon, 31 Jul 2023 16:30:06 +0530
+Subject: [PATCH] drm/rockchip: dsi: Add rv1126 MIPI DSI support
+
+RV1126 MIPI DSI supports V1.2 DPHY with 4 lanes and 1Gbps transfer
+rate for lane.
+
+Add support for it.
+
+Signed-off-by: Jagan Teki <jagan@edgeble.ai>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230731110012.2913742-9-jagan@edgeble.ai
+---
+ .../gpu/drm/rockchip/dw-mipi-dsi-rockchip.c   | 20 +++++++++++++++++++
+ 1 file changed, 20 insertions(+)
+
+--- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
++++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
+@@ -198,6 +198,11 @@
+ #define RK3568_DSI1_TURNDISABLE		BIT(2)
+ #define RK3568_DSI1_FORCERXMODE		BIT(0)
+ 
++#define RV1126_GRF_DSIPHY_CON		0x10220
++#define RV1126_DSI_FORCETXSTOPMODE	(0xf << 4)
++#define RV1126_DSI_TURNDISABLE		BIT(2)
++#define RV1126_DSI_FORCERXMODE		BIT(0)
++
+ #define HIWORD_UPDATE(val, mask)	(val | (mask) << 16)
+ 
+ enum {
+@@ -1651,6 +1656,18 @@ static const struct rockchip_dw_dsi_chip
+ 	{ /* sentinel */ }
+ };
+ 
++static const struct rockchip_dw_dsi_chip_data rv1126_chip_data[] = {
++	{
++		.reg = 0xffb30000,
++		.lanecfg1_grf_reg = RV1126_GRF_DSIPHY_CON,
++		.lanecfg1 = HIWORD_UPDATE(0, RV1126_DSI_TURNDISABLE |
++					     RV1126_DSI_FORCERXMODE |
++					     RV1126_DSI_FORCETXSTOPMODE),
++		.max_data_lanes = 4,
++	},
++	{ /* sentinel */ }
++};
++
+ static const struct of_device_id dw_mipi_dsi_rockchip_dt_ids[] = {
+ 	{
+ 	 .compatible = "rockchip,px30-mipi-dsi",
+@@ -1664,6 +1681,9 @@ static const struct of_device_id dw_mipi
+ 	}, {
+ 	 .compatible = "rockchip,rk3568-mipi-dsi",
+ 	 .data = &rk3568_chip_data,
++	}, {
++	 .compatible = "rockchip,rv1126-mipi-dsi",
++	 .data = &rv1126_chip_data,
+ 	},
+ 	{ /* sentinel */ }
+ };
diff --git a/patches-6.6/033-03-v6.7-drm-rockchip-vop-Use-cleanup-helper-directly-as-destroy.patch b/patches-6.6/033-03-v6.7-drm-rockchip-vop-Use-cleanup-helper-directly-as-destroy.patch
new file mode 100644
index 0000000..96cca08
--- /dev/null
+++ b/patches-6.6/033-03-v6.7-drm-rockchip-vop-Use-cleanup-helper-directly-as-destroy.patch
@@ -0,0 +1,71 @@
+From 800f7c332df7cd9614c416fd005a6bb53f96f13c Mon Sep 17 00:00:00 2001
+From: Jonas Karlman <jonas@kwiboo.se>
+Date: Wed, 21 Jun 2023 22:33:18 +0000
+Subject: [PATCH] drm/rockchip: vop: Use cleanup helper directly as destroy
+ funcs
+
+vop_plane_destroy and vop_crtc_destroy are plain wrappers around
+drm_plane_cleanup and drm_crtc_cleanup. Use them directly as plane and
+crtc funcs to closer match VOP2 driver.
+
+Signed-off-by: Jonas Karlman <jonas@kwiboo.se>
+Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230621223311.2239547-3-jonas@kwiboo.se
+---
+ drivers/gpu/drm/rockchip/rockchip_drm_vop.c | 16 +++-------------
+ 1 file changed, 3 insertions(+), 13 deletions(-)
+
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+@@ -773,11 +773,6 @@ out:
+ 	}
+ }
+ 
+-static void vop_plane_destroy(struct drm_plane *plane)
+-{
+-	drm_plane_cleanup(plane);
+-}
+-
+ static inline bool rockchip_afbc(u64 modifier)
+ {
+ 	return modifier == ROCKCHIP_AFBC_MOD;
+@@ -1139,7 +1134,7 @@ static const struct drm_plane_helper_fun
+ static const struct drm_plane_funcs vop_plane_funcs = {
+ 	.update_plane	= drm_atomic_helper_update_plane,
+ 	.disable_plane	= drm_atomic_helper_disable_plane,
+-	.destroy = vop_plane_destroy,
++	.destroy = drm_plane_cleanup,
+ 	.reset = drm_atomic_helper_plane_reset,
+ 	.atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
+ 	.atomic_destroy_state = drm_atomic_helper_plane_destroy_state,
+@@ -1610,11 +1605,6 @@ static const struct drm_crtc_helper_func
+ 	.atomic_disable = vop_crtc_atomic_disable,
+ };
+ 
+-static void vop_crtc_destroy(struct drm_crtc *crtc)
+-{
+-	drm_crtc_cleanup(crtc);
+-}
+-
+ static struct drm_crtc_state *vop_crtc_duplicate_state(struct drm_crtc *crtc)
+ {
+ 	struct rockchip_crtc_state *rockchip_state;
+@@ -1722,7 +1712,7 @@ vop_crtc_verify_crc_source(struct drm_cr
+ static const struct drm_crtc_funcs vop_crtc_funcs = {
+ 	.set_config = drm_atomic_helper_set_config,
+ 	.page_flip = drm_atomic_helper_page_flip,
+-	.destroy = vop_crtc_destroy,
++	.destroy = drm_crtc_cleanup,
+ 	.reset = vop_crtc_reset,
+ 	.atomic_duplicate_state = vop_crtc_duplicate_state,
+ 	.atomic_destroy_state = vop_crtc_destroy_state,
+@@ -1973,7 +1963,7 @@ static void vop_destroy_crtc(struct vop
+ 	 */
+ 	list_for_each_entry_safe(plane, tmp, &drm_dev->mode_config.plane_list,
+ 				 head)
+-		vop_plane_destroy(plane);
++		drm_plane_cleanup(plane);
+ 
+ 	/*
+ 	 * Destroy CRTC after vop_plane_destroy() since vop_disable_plane()
diff --git a/patches-6.6/033-04-v6.7-drm-rockchip-vop2-Demote-message-in-mod_supported-to.patch b/patches-6.6/033-04-v6.7-drm-rockchip-vop2-Demote-message-in-mod_supported-to.patch
new file mode 100644
index 0000000..f0811b3
--- /dev/null
+++ b/patches-6.6/033-04-v6.7-drm-rockchip-vop2-Demote-message-in-mod_supported-to.patch
@@ -0,0 +1,35 @@
+From eb23cffdd7f085149799e5eda12a9aff792cc34d Mon Sep 17 00:00:00 2001
+From: Michael Tretter <m.tretter@pengutronix.de>
+Date: Mon, 9 Oct 2023 12:37:53 +0200
+Subject: [PATCH] drm/rockchip: vop2: Demote message in mod_supported to
+ drm_dbg_kms
+
+Checking if a modifier is supported by a plane is normal behavior. It is
+normal that a plane may not support certain modifiers. Failing the check
+doesn't justify an error message in the kernel log and may mislead
+users.
+
+Demote the error message to drm_dbg_kms to only print the message if the
+respective debug messages are enabled. This is similar to the behavior
+in rockchip_drm_vop.c.
+
+Signed-off-by: Michael Tretter <m.tretter@pengutronix.de>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231009103753.830458-1-m.tretter@pengutronix.de
+---
+ drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+@@ -469,8 +469,8 @@ static bool rockchip_vop2_mod_supported(
+ 		return true;
+ 
+ 	if (!rockchip_afbc(plane, modifier)) {
+-		drm_err(vop2->drm, "Unsupported format modifier 0x%llx\n",
+-			modifier);
++		drm_dbg_kms(vop2->drm, "Unsupported format modifier 0x%llx\n",
++			    modifier);
+ 
+ 		return false;
+ 	}
diff --git a/patches-6.6/033-05-v6.7-drm-rockchip-remove-redundant-of_match_ptr.patch b/patches-6.6/033-05-v6.7-drm-rockchip-remove-redundant-of_match_ptr.patch
new file mode 100644
index 0000000..41d6e9d
--- /dev/null
+++ b/patches-6.6/033-05-v6.7-drm-rockchip-remove-redundant-of_match_ptr.patch
@@ -0,0 +1,53 @@
+From 63a06c9fe30bf84d1ab6f07d0e408bd1d4ccaf85 Mon Sep 17 00:00:00 2001
+From: Zhu Wang <wangzhu9@huawei.com>
+Date: Mon, 31 Jul 2023 20:53:04 +0800
+Subject: [PATCH] drm/rockchip: remove redundant of_match_ptr
+
+The driver depends on CONFIG_OF, so it is not necessary to use
+of_match_ptr here.
+
+Even for drivers that do not depend on CONFIG_OF, it's almost always
+better to leave out the of_match_ptr(), since the only thing it can
+possibly do is to save a few bytes of .text if a driver can be used both
+with and without it. Hence we remove of_match_ptr.
+
+Signed-off-by: Zhu Wang <wangzhu9@huawei.com>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230731125304.87059-1-wangzhu9@huawei.com
+---
+ drivers/gpu/drm/rockchip/cdn-dp-core.c       | 2 +-
+ drivers/gpu/drm/rockchip/rockchip_lvds.c     | 2 +-
+ drivers/gpu/drm/rockchip/rockchip_vop2_reg.c | 2 +-
+ 3 files changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/gpu/drm/rockchip/cdn-dp-core.c
++++ b/drivers/gpu/drm/rockchip/cdn-dp-core.c
+@@ -1261,7 +1261,7 @@ struct platform_driver cdn_dp_driver = {
+ 	.driver = {
+ 		   .name = "cdn-dp",
+ 		   .owner = THIS_MODULE,
+-		   .of_match_table = of_match_ptr(cdn_dp_dt_ids),
++		   .of_match_table = cdn_dp_dt_ids,
+ 		   .pm = &cdn_dp_pm_ops,
+ 	},
+ };
+--- a/drivers/gpu/drm/rockchip/rockchip_lvds.c
++++ b/drivers/gpu/drm/rockchip/rockchip_lvds.c
+@@ -751,6 +751,6 @@ struct platform_driver rockchip_lvds_dri
+ 	.remove_new = rockchip_lvds_remove,
+ 	.driver = {
+ 		   .name = "rockchip-lvds",
+-		   .of_match_table = of_match_ptr(rockchip_lvds_dt_ids),
++		   .of_match_table = rockchip_lvds_dt_ids,
+ 	},
+ };
+--- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
++++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
+@@ -274,6 +274,6 @@ struct platform_driver vop2_platform_dri
+ 	.remove_new = vop2_remove,
+ 	.driver = {
+ 		.name = "rockchip-vop2",
+-		.of_match_table = of_match_ptr(vop2_dt_match),
++		.of_match_table = vop2_dt_match,
+ 	},
+ };
diff --git a/patches-6.6/033-06-v6.7-drm-rockchip-dsi-Use-devm_platform_get_and_ioremap_reso.patch b/patches-6.6/033-06-v6.7-drm-rockchip-dsi-Use-devm_platform_get_and_ioremap_reso.patch
new file mode 100644
index 0000000..b7c34d9
--- /dev/null
+++ b/patches-6.6/033-06-v6.7-drm-rockchip-dsi-Use-devm_platform_get_and_ioremap_reso.patch
@@ -0,0 +1,29 @@
+From 253a1d33e5cfdf62525f5d6ed2bf03acbadd1582 Mon Sep 17 00:00:00 2001
+From: Yang Li <yang.lee@linux.alibaba.com>
+Date: Fri, 21 Apr 2023 16:13:03 +0800
+Subject: [PATCH] drm/rockchip: dsi: Use
+ devm_platform_get_and_ioremap_resource()
+
+Convert platform_get_resource(), devm_ioremap_resource() to a single
+call to devm_platform_get_and_ioremap_resource(), as this is exactly
+what this function does.
+
+Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230421081303.122452-1-yang.lee@linux.alibaba.com
+---
+ drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
++++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
+@@ -1358,8 +1358,7 @@ static int dw_mipi_dsi_rockchip_probe(st
+ 	if (!dsi)
+ 		return -ENOMEM;
+ 
+-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+-	dsi->base = devm_ioremap_resource(dev, res);
++	dsi->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+ 	if (IS_ERR(dsi->base)) {
+ 		DRM_DEV_ERROR(dev, "Unable to get dsi registers\n");
+ 		return PTR_ERR(dsi->base);
diff --git a/patches-6.6/033-07-v6.7-drm-rockchip-remove-unused-struct-in-vop2.patch b/patches-6.6/033-07-v6.7-drm-rockchip-remove-unused-struct-in-vop2.patch
new file mode 100644
index 0000000..d38f249
--- /dev/null
+++ b/patches-6.6/033-07-v6.7-drm-rockchip-remove-unused-struct-in-vop2.patch
@@ -0,0 +1,54 @@
+From ac1c11c23fc51c1ba51a3ed586df40ffe6b1de35 Mon Sep 17 00:00:00 2001
+From: Andy Yan <andy.yan@rock-chips.com>
+Date: Fri, 13 Oct 2023 20:20:36 +0800
+Subject: [PATCH] drm/rockchip: remove unused struct in vop2
+
+These structs are undefined and un used.
+
+Fixes: 604be85547ce ("drm/rockchip: Add VOP2 driver")
+Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
+Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231013122036.1594090-1-andyshrk@163.com
+---
+ drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 2 --
+ drivers/gpu/drm/rockchip/rockchip_drm_vop2.h | 3 ---
+ 2 files changed, 5 deletions(-)
+
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+@@ -160,7 +160,6 @@ struct vop2_video_port {
+ 	struct vop2 *vop2;
+ 	struct clk *dclk;
+ 	unsigned int id;
+-	const struct vop2_video_port_regs *regs;
+ 	const struct vop2_video_port_data *data;
+ 
+ 	struct completion dsp_hold_completion;
+@@ -2275,7 +2274,6 @@ static int vop2_create_crtcs(struct vop2
+ 		vp = &vop2->vps[i];
+ 		vp->vop2 = vop2;
+ 		vp->id = vp_data->id;
+-		vp->regs = vp_data->regs;
+ 		vp->data = vp_data;
+ 
+ 		snprintf(dclk_name, sizeof(dclk_name), "dclk_vp%d", vp->id);
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
+@@ -134,16 +134,13 @@ struct vop2_video_port_data {
+ 	u16 cubic_lut_len;
+ 	struct vop_rect max_output;
+ 	const u8 pre_scan_max_dly[4];
+-	const struct vop2_video_port_regs *regs;
+ 	unsigned int offset;
+ };
+ 
+ struct vop2_data {
+ 	u8 nr_vps;
+-	const struct vop2_ctrl *ctrl;
+ 	const struct vop2_win_data *win;
+ 	const struct vop2_video_port_data *vp;
+-	const struct vop_csc_table *csc_table;
+ 	struct vop_rect max_input;
+ 	struct vop_rect max_output;
+ 
diff --git a/patches-6.6/033-08-v6.7-drm-rockchip-remove-NR_LAYERS-macro-on-vop2.patch b/patches-6.6/033-08-v6.7-drm-rockchip-remove-NR_LAYERS-macro-on-vop2.patch
new file mode 100644
index 0000000..b4499db
--- /dev/null
+++ b/patches-6.6/033-08-v6.7-drm-rockchip-remove-NR_LAYERS-macro-on-vop2.patch
@@ -0,0 +1,36 @@
+From dc00748adcf03d754bf43035c668bc5b20fb6597 Mon Sep 17 00:00:00 2001
+From: Andy Yan <andy.yan@rock-chips.com>
+Date: Fri, 13 Oct 2023 20:20:51 +0800
+Subject: [PATCH] drm/rockchip: remove NR_LAYERS macro on vop2
+
+There are 8 layers on rk3588, so a fix defined macro is
+not appropriate.
+
+Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
+Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231013122051.1594164-1-andyshrk@163.com
+---
+ drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+@@ -2252,8 +2252,6 @@ static struct vop2_video_port *find_vp_w
+ 	return NULL;
+ }
+ 
+-#define NR_LAYERS 6
+-
+ static int vop2_create_crtcs(struct vop2 *vop2)
+ {
+ 	const struct vop2_data *vop2_data = vop2->data;
+@@ -2372,7 +2370,7 @@ static int vop2_create_crtcs(struct vop2
+ 		struct vop2_video_port *vp = &vop2->vps[i];
+ 
+ 		if (vp->crtc.port)
+-			vp->nlayers = NR_LAYERS / nvps;
++			vp->nlayers = vop2_data->win_size / nvps;
+ 	}
+ 
+ 	return 0;
diff --git a/patches-6.6/033-09-v6.7-drm-rockchip-vop-fix-format-bpp-calculation.patch b/patches-6.6/033-09-v6.7-drm-rockchip-vop-fix-format-bpp-calculation.patch
new file mode 100644
index 0000000..7793309
--- /dev/null
+++ b/patches-6.6/033-09-v6.7-drm-rockchip-vop-fix-format-bpp-calculation.patch
@@ -0,0 +1,57 @@
+From 45ad07c7053df0b67e13d8deb574920d11651fb2 Mon Sep 17 00:00:00 2001
+From: Andy Yan <andy.yan@rock-chips.com>
+Date: Wed, 18 Oct 2023 17:42:10 +0800
+Subject: [PATCH] drm/rockchip: vop: fix format bpp calculation
+
+We can't rely on cpp for bpp calculation as the cpp of
+some formats(DRM_FORMAT_YUV420_8BIT/10BIT, etc) is zero.
+
+Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
+Acked-by: Sascha Hauer <s.hauer@pengutronix.de>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231018094210.2475771-1-andyshrk@163.com
+---
+ drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 18 ++++++++++++++++--
+ 1 file changed, 16 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+@@ -282,6 +282,20 @@ static void vop2_win_disable(struct vop2
+ 		vop2_win_write(win, VOP2_WIN_CLUSTER_ENABLE, 0);
+ }
+ 
++static u32 vop2_get_bpp(const struct drm_format_info *format)
++{
++	switch (format->format) {
++	case DRM_FORMAT_YUV420_8BIT:
++		return 12;
++	case DRM_FORMAT_YUV420_10BIT:
++		return 15;
++	case DRM_FORMAT_VUY101010:
++		return 30;
++	default:
++		return drm_format_info_bpp(format, 0);
++	}
++}
++
+ static enum vop2_data_format vop2_convert_format(u32 format)
+ {
+ 	switch (format) {
+@@ -482,7 +496,7 @@ static u32 vop2_afbc_transform_offset(st
+ {
+ 	struct drm_rect *src = &pstate->src;
+ 	struct drm_framebuffer *fb = pstate->fb;
+-	u32 bpp = fb->format->cpp[0] * 8;
++	u32 bpp = vop2_get_bpp(fb->format);
+ 	u32 vir_width = (fb->pitches[0] << 3) / bpp;
+ 	u32 width = drm_rect_width(src) >> 16;
+ 	u32 height = drm_rect_height(src) >> 16;
+@@ -1082,7 +1096,7 @@ static void vop2_plane_atomic_update(str
+ 	struct drm_display_mode *adjusted_mode = &crtc->state->adjusted_mode;
+ 	struct vop2 *vop2 = win->vop2;
+ 	struct drm_framebuffer *fb = pstate->fb;
+-	u32 bpp = fb->format->cpp[0] * 8;
++	u32 bpp = vop2_get_bpp(fb->format);
+ 	u32 actual_w, actual_h, dsp_w, dsp_h;
+ 	u32 act_info, dsp_info;
+ 	u32 format;
diff --git a/patches-6.6/033-10-v6.7-drm-rockchip-vop2-remove-the-unsupported-format-of-cluste.patch b/patches-6.6/033-10-v6.7-drm-rockchip-vop2-remove-the-unsupported-format-of-cluste.patch
new file mode 100644
index 0000000..efc718b
--- /dev/null
+++ b/patches-6.6/033-10-v6.7-drm-rockchip-vop2-remove-the-unsupported-format-of-cluste.patch
@@ -0,0 +1,89 @@
+From 01d5a75370a60c3a8d691347ae6ebb2a9f8dc44a Mon Sep 17 00:00:00 2001
+From: Andy Yan <andy.yan@rock-chips.com>
+Date: Wed, 18 Oct 2023 17:42:39 +0800
+Subject: [PATCH] drm/rockchip: vop2: remove the unsupported format of cluster
+ window
+
+The cluster window on vop2 doesn't support linear yuv
+format(NV12/16/24), it only support afbc based yuv
+format(DRM_FORMAT_YUV420_8BIT/10BIT), which will be
+added in next patch.
+
+Fixes: 604be85547ce ("drm/rockchip: Add VOP2 driver")
+Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
+Acked-by: Sascha Hauer <s.hauer@pengutronix.de>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231018094239.2475851-1-andyshrk@163.com
+---
+ drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 24 +-------------------
+ drivers/gpu/drm/rockchip/rockchip_vop2_reg.c |  3 ---
+ 2 files changed, 1 insertion(+), 26 deletions(-)
+
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+@@ -342,10 +342,6 @@ static enum vop2_afbc_format vop2_conver
+ 	case DRM_FORMAT_RGB565:
+ 	case DRM_FORMAT_BGR565:
+ 		return VOP2_AFBC_FMT_RGB565;
+-	case DRM_FORMAT_NV12:
+-		return VOP2_AFBC_FMT_YUV420;
+-	case DRM_FORMAT_NV16:
+-		return VOP2_AFBC_FMT_YUV422;
+ 	default:
+ 		return VOP2_AFBC_FMT_INVALID;
+ 	}
+@@ -366,25 +362,9 @@ static bool vop2_win_rb_swap(u32 format)
+ 	}
+ }
+ 
+-static bool vop2_afbc_rb_swap(u32 format)
+-{
+-	switch (format) {
+-	case DRM_FORMAT_NV24:
+-		return true;
+-	default:
+-		return false;
+-	}
+-}
+-
+ static bool vop2_afbc_uv_swap(u32 format)
+ {
+-	switch (format) {
+-	case DRM_FORMAT_NV12:
+-	case DRM_FORMAT_NV16:
+-		return true;
+-	default:
+-		return false;
+-	}
++	return false;
+ }
+ 
+ static bool vop2_win_uv_swap(u32 format)
+@@ -1234,7 +1214,6 @@ static void vop2_plane_atomic_update(str
+ 			drm_err(vop2->drm, "vp%d %s stride[%d] not 64 pixel aligned\n",
+ 				vp->id, win->data->name, stride);
+ 
+-		rb_swap = vop2_afbc_rb_swap(fb->format->format);
+ 		uv_swap = vop2_afbc_uv_swap(fb->format->format);
+ 		/*
+ 		 * This is a workaround for crazy IC design, Cluster
+@@ -1251,7 +1230,6 @@ static void vop2_plane_atomic_update(str
+ 		if (vop2_cluster_window(win))
+ 			vop2_win_write(win, VOP2_WIN_AFBC_ENABLE, 1);
+ 		vop2_win_write(win, VOP2_WIN_AFBC_FORMAT, afbc_format);
+-		vop2_win_write(win, VOP2_WIN_AFBC_RB_SWAP, rb_swap);
+ 		vop2_win_write(win, VOP2_WIN_AFBC_UV_SWAP, uv_swap);
+ 		vop2_win_write(win, VOP2_WIN_AFBC_AUTO_GATING_EN, 0);
+ 		vop2_win_write(win, VOP2_WIN_AFBC_BLOCK_SPLIT_EN, 0);
+--- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
++++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
+@@ -24,9 +24,6 @@ static const uint32_t formats_win_full_1
+ 	DRM_FORMAT_BGR888,
+ 	DRM_FORMAT_RGB565,
+ 	DRM_FORMAT_BGR565,
+-	DRM_FORMAT_NV12,
+-	DRM_FORMAT_NV16,
+-	DRM_FORMAT_NV24,
+ };
+ 
+ static const uint32_t formats_win_full_10bit_yuyv[] = {
diff --git a/patches-6.6/033-11-v6.7-drm-rockchip-vop2-Add-more-supported-10bit-formats.patch b/patches-6.6/033-11-v6.7-drm-rockchip-vop2-Add-more-supported-10bit-formats.patch
new file mode 100644
index 0000000..660845e
--- /dev/null
+++ b/patches-6.6/033-11-v6.7-drm-rockchip-vop2-Add-more-supported-10bit-formats.patch
@@ -0,0 +1,162 @@
+From bfd8a5c228fa3bb97884f77529c09e8745da08b9 Mon Sep 17 00:00:00 2001
+From: Andy Yan <andy.yan@rock-chips.com>
+Date: Wed, 18 Oct 2023 17:43:18 +0800
+Subject: [PATCH] drm/rockchip: vop2: Add more supported 10bit formats
+
+Add 10 bit RGB and AFBC based YUV format supported
+by vop2.
+
+Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
+Acked-by: Sascha Hauer <s.hauer@pengutronix.de>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231018094318.2476081-1-andyshrk@163.com
+---
+ drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 45 +++++++++++++++++++-
+ drivers/gpu/drm/rockchip/rockchip_vop2_reg.c | 22 +++++++---
+ 2 files changed, 61 insertions(+), 6 deletions(-)
+
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+@@ -299,6 +299,11 @@ static u32 vop2_get_bpp(const struct drm
+ static enum vop2_data_format vop2_convert_format(u32 format)
+ {
+ 	switch (format) {
++	case DRM_FORMAT_XRGB2101010:
++	case DRM_FORMAT_ARGB2101010:
++	case DRM_FORMAT_XBGR2101010:
++	case DRM_FORMAT_ABGR2101010:
++		return VOP2_FMT_XRGB101010;
+ 	case DRM_FORMAT_XRGB8888:
+ 	case DRM_FORMAT_ARGB8888:
+ 	case DRM_FORMAT_XBGR8888:
+@@ -311,10 +316,19 @@ static enum vop2_data_format vop2_conver
+ 	case DRM_FORMAT_BGR565:
+ 		return VOP2_FMT_RGB565;
+ 	case DRM_FORMAT_NV12:
++	case DRM_FORMAT_NV21:
++	case DRM_FORMAT_YUV420_8BIT:
+ 		return VOP2_FMT_YUV420SP;
++	case DRM_FORMAT_NV15:
++	case DRM_FORMAT_YUV420_10BIT:
++		return VOP2_FMT_YUV420SP_10;
+ 	case DRM_FORMAT_NV16:
++	case DRM_FORMAT_NV61:
+ 		return VOP2_FMT_YUV422SP;
++	case DRM_FORMAT_Y210:
++		return VOP2_FMT_YUV422SP_10;
+ 	case DRM_FORMAT_NV24:
++	case DRM_FORMAT_NV42:
+ 		return VOP2_FMT_YUV444SP;
+ 	case DRM_FORMAT_YUYV:
+ 	case DRM_FORMAT_YVYU:
+@@ -331,6 +345,11 @@ static enum vop2_data_format vop2_conver
+ static enum vop2_afbc_format vop2_convert_afbc_format(u32 format)
+ {
+ 	switch (format) {
++	case DRM_FORMAT_XRGB2101010:
++	case DRM_FORMAT_ARGB2101010:
++	case DRM_FORMAT_XBGR2101010:
++	case DRM_FORMAT_ABGR2101010:
++		return VOP2_AFBC_FMT_ARGB2101010;
+ 	case DRM_FORMAT_XRGB8888:
+ 	case DRM_FORMAT_ARGB8888:
+ 	case DRM_FORMAT_XBGR8888:
+@@ -342,6 +361,17 @@ static enum vop2_afbc_format vop2_conver
+ 	case DRM_FORMAT_RGB565:
+ 	case DRM_FORMAT_BGR565:
+ 		return VOP2_AFBC_FMT_RGB565;
++	case DRM_FORMAT_YUV420_8BIT:
++		return VOP2_AFBC_FMT_YUV420;
++	case DRM_FORMAT_YUV420_10BIT:
++		return VOP2_AFBC_FMT_YUV420_10BIT;
++	case DRM_FORMAT_YVYU:
++	case DRM_FORMAT_YUYV:
++	case DRM_FORMAT_VYUY:
++	case DRM_FORMAT_UYVY:
++		return VOP2_AFBC_FMT_YUV422;
++	case DRM_FORMAT_Y210:
++		return VOP2_AFBC_FMT_YUV422_10BIT;
+ 	default:
+ 		return VOP2_AFBC_FMT_INVALID;
+ 	}
+@@ -352,6 +382,8 @@ static enum vop2_afbc_format vop2_conver
+ static bool vop2_win_rb_swap(u32 format)
+ {
+ 	switch (format) {
++	case DRM_FORMAT_XBGR2101010:
++	case DRM_FORMAT_ABGR2101010:
+ 	case DRM_FORMAT_XBGR8888:
+ 	case DRM_FORMAT_ABGR8888:
+ 	case DRM_FORMAT_BGR888:
+@@ -364,7 +396,15 @@ static bool vop2_win_rb_swap(u32 format)
+ 
+ static bool vop2_afbc_uv_swap(u32 format)
+ {
+-	return false;
++	switch (format) {
++	case DRM_FORMAT_YUYV:
++	case DRM_FORMAT_Y210:
++	case DRM_FORMAT_YUV420_8BIT:
++	case DRM_FORMAT_YUV420_10BIT:
++		return true;
++	default:
++		return false;
++	}
+ }
+ 
+ static bool vop2_win_uv_swap(u32 format)
+@@ -373,6 +413,9 @@ static bool vop2_win_uv_swap(u32 format)
+ 	case DRM_FORMAT_NV12:
+ 	case DRM_FORMAT_NV16:
+ 	case DRM_FORMAT_NV24:
++	case DRM_FORMAT_NV15:
++	case DRM_FORMAT_YUYV:
++	case DRM_FORMAT_UYVY:
+ 		return true;
+ 	default:
+ 		return false;
+--- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
++++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
+@@ -16,6 +16,10 @@
+ #include "rockchip_drm_vop2.h"
+ 
+ static const uint32_t formats_win_full_10bit[] = {
++	DRM_FORMAT_XRGB2101010,
++	DRM_FORMAT_ARGB2101010,
++	DRM_FORMAT_XBGR2101010,
++	DRM_FORMAT_ABGR2101010,
+ 	DRM_FORMAT_XRGB8888,
+ 	DRM_FORMAT_ARGB8888,
+ 	DRM_FORMAT_XBGR8888,
+@@ -24,6 +28,10 @@ static const uint32_t formats_win_full_1
+ 	DRM_FORMAT_BGR888,
+ 	DRM_FORMAT_RGB565,
+ 	DRM_FORMAT_BGR565,
++	DRM_FORMAT_YUV420_8BIT, /* yuv420_8bit non-Linear mode only */
++	DRM_FORMAT_YUV420_10BIT, /* yuv420_10bit non-Linear mode only */
++	DRM_FORMAT_YUYV, /* yuv422_8bit non-Linear mode only*/
++	DRM_FORMAT_Y210, /* yuv422_10bit non-Linear mode only */
+ };
+ 
+ static const uint32_t formats_win_full_10bit_yuyv[] = {
+@@ -35,11 +43,15 @@ static const uint32_t formats_win_full_1
+ 	DRM_FORMAT_BGR888,
+ 	DRM_FORMAT_RGB565,
+ 	DRM_FORMAT_BGR565,
+-	DRM_FORMAT_NV12,
+-	DRM_FORMAT_NV16,
+-	DRM_FORMAT_NV24,
+-	DRM_FORMAT_YVYU,
+-	DRM_FORMAT_VYUY,
++	DRM_FORMAT_NV12, /* yuv420_8bit linear mode, 2 plane */
++	DRM_FORMAT_NV21, /* yuv420_8bit linear mode, 2 plane */
++	DRM_FORMAT_NV15, /* yuv420_10bit linear mode, 2 plane, no padding */
++	DRM_FORMAT_NV16, /* yuv422_8bit linear mode, 2 plane */
++	DRM_FORMAT_NV61, /* yuv422_8bit linear mode, 2 plane */
++	DRM_FORMAT_NV24, /* yuv444_8bit linear mode, 2 plane */
++	DRM_FORMAT_NV42, /* yuv444_8bit linear mode, 2 plane */
++	DRM_FORMAT_YVYU, /* yuv422_8bit[YVYU] linear mode */
++	DRM_FORMAT_VYUY, /* yuv422_8bit[VYUY] linear mode */
+ };
+ 
+ static const uint32_t formats_win_lite[] = {
diff --git a/patches-6.6/033-12-v6.7-drm-rockchip-vop2-rename-window-formats-to-show-window-ty.patch b/patches-6.6/033-12-v6.7-drm-rockchip-vop2-rename-window-formats-to-show-window-ty.patch
new file mode 100644
index 0000000..8d73568
--- /dev/null
+++ b/patches-6.6/033-12-v6.7-drm-rockchip-vop2-rename-window-formats-to-show-window-ty.patch
@@ -0,0 +1,116 @@
+From 215737e37d07ade8952048339e37aec6c6f82223 Mon Sep 17 00:00:00 2001
+From: Andy Yan <andy.yan@rock-chips.com>
+Date: Wed, 18 Oct 2023 17:43:39 +0800
+Subject: [PATCH] drm/rockchip: vop2: rename window formats to show window type
+ using them
+
+formats_win_full_10bit is for cluster window,
+formats_win_full_10bit_yuyv is for rk356x esmart, rk3588 esmart window
+will support more format.
+formats_win_lite is for smart window.
+
+Rename it based the windows type may let meaning is clearer
+
+Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
+Acked-by: Sascha Hauer <s.hauer@pengutronix.de>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231018094339.2476142-1-andyshrk@163.com
+---
+ drivers/gpu/drm/rockchip/rockchip_vop2_reg.c | 30 ++++++++++----------
+ 1 file changed, 15 insertions(+), 15 deletions(-)
+
+--- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
++++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
+@@ -15,7 +15,7 @@
+ 
+ #include "rockchip_drm_vop2.h"
+ 
+-static const uint32_t formats_win_full_10bit[] = {
++static const uint32_t formats_cluster[] = {
+ 	DRM_FORMAT_XRGB2101010,
+ 	DRM_FORMAT_ARGB2101010,
+ 	DRM_FORMAT_XBGR2101010,
+@@ -34,7 +34,7 @@ static const uint32_t formats_win_full_1
+ 	DRM_FORMAT_Y210, /* yuv422_10bit non-Linear mode only */
+ };
+ 
+-static const uint32_t formats_win_full_10bit_yuyv[] = {
++static const uint32_t formats_rk356x_esmart[] = {
+ 	DRM_FORMAT_XRGB8888,
+ 	DRM_FORMAT_ARGB8888,
+ 	DRM_FORMAT_XBGR8888,
+@@ -54,7 +54,7 @@ static const uint32_t formats_win_full_1
+ 	DRM_FORMAT_VYUY, /* yuv422_8bit[VYUY] linear mode */
+ };
+ 
+-static const uint32_t formats_win_lite[] = {
++static const uint32_t formats_smart[] = {
+ 	DRM_FORMAT_XRGB8888,
+ 	DRM_FORMAT_ARGB8888,
+ 	DRM_FORMAT_XBGR8888,
+@@ -153,8 +153,8 @@ static const struct vop2_win_data rk3568
+ 		.name = "Smart0-win0",
+ 		.phys_id = ROCKCHIP_VOP2_SMART0,
+ 		.base = 0x1c00,
+-		.formats = formats_win_lite,
+-		.nformats = ARRAY_SIZE(formats_win_lite),
++		.formats = formats_smart,
++		.nformats = ARRAY_SIZE(formats_smart),
+ 		.format_modifiers = format_modifiers,
+ 		.layer_sel_id = 3,
+ 		.supported_rotations = DRM_MODE_REFLECT_Y,
+@@ -165,8 +165,8 @@ static const struct vop2_win_data rk3568
+ 	}, {
+ 		.name = "Smart1-win0",
+ 		.phys_id = ROCKCHIP_VOP2_SMART1,
+-		.formats = formats_win_lite,
+-		.nformats = ARRAY_SIZE(formats_win_lite),
++		.formats = formats_smart,
++		.nformats = ARRAY_SIZE(formats_smart),
+ 		.format_modifiers = format_modifiers,
+ 		.base = 0x1e00,
+ 		.layer_sel_id = 7,
+@@ -178,8 +178,8 @@ static const struct vop2_win_data rk3568
+ 	}, {
+ 		.name = "Esmart1-win0",
+ 		.phys_id = ROCKCHIP_VOP2_ESMART1,
+-		.formats = formats_win_full_10bit_yuyv,
+-		.nformats = ARRAY_SIZE(formats_win_full_10bit_yuyv),
++		.formats = formats_rk356x_esmart,
++		.nformats = ARRAY_SIZE(formats_rk356x_esmart),
+ 		.format_modifiers = format_modifiers,
+ 		.base = 0x1a00,
+ 		.layer_sel_id = 6,
+@@ -191,8 +191,8 @@ static const struct vop2_win_data rk3568
+ 	}, {
+ 		.name = "Esmart0-win0",
+ 		.phys_id = ROCKCHIP_VOP2_ESMART0,
+-		.formats = formats_win_full_10bit_yuyv,
+-		.nformats = ARRAY_SIZE(formats_win_full_10bit_yuyv),
++		.formats = formats_rk356x_esmart,
++		.nformats = ARRAY_SIZE(formats_rk356x_esmart),
+ 		.format_modifiers = format_modifiers,
+ 		.base = 0x1800,
+ 		.layer_sel_id = 2,
+@@ -205,8 +205,8 @@ static const struct vop2_win_data rk3568
+ 		.name = "Cluster0-win0",
+ 		.phys_id = ROCKCHIP_VOP2_CLUSTER0,
+ 		.base = 0x1000,
+-		.formats = formats_win_full_10bit,
+-		.nformats = ARRAY_SIZE(formats_win_full_10bit),
++		.formats = formats_cluster,
++		.nformats = ARRAY_SIZE(formats_cluster),
+ 		.format_modifiers = format_modifiers_afbc,
+ 		.layer_sel_id = 0,
+ 		.supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 |
+@@ -220,8 +220,8 @@ static const struct vop2_win_data rk3568
+ 		.name = "Cluster1-win0",
+ 		.phys_id = ROCKCHIP_VOP2_CLUSTER1,
+ 		.base = 0x1200,
+-		.formats = formats_win_full_10bit,
+-		.nformats = ARRAY_SIZE(formats_win_full_10bit),
++		.formats = formats_cluster,
++		.nformats = ARRAY_SIZE(formats_cluster),
+ 		.format_modifiers = format_modifiers_afbc,
+ 		.layer_sel_id = 1,
+ 		.supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 |
diff --git a/patches-6.6/033-13-v6.7-drm-fourcc-Add-NV20-and-NV30-YUV-formats.patch b/patches-6.6/033-13-v6.7-drm-fourcc-Add-NV20-and-NV30-YUV-formats.patch
new file mode 100644
index 0000000..43b0d5a
--- /dev/null
+++ b/patches-6.6/033-13-v6.7-drm-fourcc-Add-NV20-and-NV30-YUV-formats.patch
@@ -0,0 +1,57 @@
+From 728c15b4b5f3369cbde73d5e0f14701ab370f985 Mon Sep 17 00:00:00 2001
+From: Jonas Karlman <jonas@kwiboo.se>
+Date: Mon, 23 Oct 2023 17:37:14 +0000
+Subject: [PATCH] drm/fourcc: Add NV20 and NV30 YUV formats
+
+DRM_FORMAT_NV20 and DRM_FORMAT_NV30 formats is the 2x1 and non-subsampled
+variant of NV15, a 10-bit 2-plane YUV format that has no padding between
+components. Instead, luminance and chrominance samples are grouped into 4s
+so that each group is packed into an integer number of bytes:
+
+YYYY = UVUV = 4 * 10 bits = 40 bits = 5 bytes
+
+The '20' and '30' suffix refers to the optimum effective bits per pixel
+which is achieved when the total number of luminance samples is a multiple
+of 4.
+
+V2: Added NV30 format
+
+Signed-off-by: Jonas Karlman <jonas@kwiboo.se>
+Reviewed-by: Sandy Huang <hjc@rock-chips.com>
+Reviewed-by: Christopher Obbard <chris.obbard@collabora.com>
+Tested-by: Christopher Obbard <chris.obbard@collabora.com>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231023173718.188102-2-jonas@kwiboo.se
+---
+ drivers/gpu/drm/drm_fourcc.c  | 8 ++++++++
+ include/uapi/drm/drm_fourcc.h | 2 ++
+ 2 files changed, 10 insertions(+)
+
+--- a/drivers/gpu/drm/drm_fourcc.c
++++ b/drivers/gpu/drm/drm_fourcc.c
+@@ -299,6 +299,14 @@ const struct drm_format_info *__drm_form
+ 		  .num_planes = 2, .char_per_block = { 5, 5, 0 },
+ 		  .block_w = { 4, 2, 0 }, .block_h = { 1, 1, 0 }, .hsub = 2,
+ 		  .vsub = 2, .is_yuv = true },
++		{ .format = DRM_FORMAT_NV20,		.depth = 0,
++		  .num_planes = 2, .char_per_block = { 5, 5, 0 },
++		  .block_w = { 4, 2, 0 }, .block_h = { 1, 1, 0 }, .hsub = 2,
++		  .vsub = 1, .is_yuv = true },
++		{ .format = DRM_FORMAT_NV30,		.depth = 0,
++		  .num_planes = 2, .char_per_block = { 5, 5, 0 },
++		  .block_w = { 4, 2, 0 }, .block_h = { 1, 1, 0 }, .hsub = 1,
++		  .vsub = 1, .is_yuv = true },
+ 		{ .format = DRM_FORMAT_Q410,		.depth = 0,
+ 		  .num_planes = 3, .char_per_block = { 2, 2, 2 },
+ 		  .block_w = { 1, 1, 1 }, .block_h = { 1, 1, 1 }, .hsub = 1,
+--- a/include/uapi/drm/drm_fourcc.h
++++ b/include/uapi/drm/drm_fourcc.h
+@@ -323,6 +323,8 @@ extern "C" {
+  * index 1 = Cr:Cb plane, [39:0] Cr1:Cb1:Cr0:Cb0 little endian
+  */
+ #define DRM_FORMAT_NV15		fourcc_code('N', 'V', '1', '5') /* 2x2 subsampled Cr:Cb plane */
++#define DRM_FORMAT_NV20		fourcc_code('N', 'V', '2', '0') /* 2x1 subsampled Cr:Cb plane */
++#define DRM_FORMAT_NV30		fourcc_code('N', 'V', '3', '0') /* non-subsampled Cr:Cb plane */
+ 
+ /*
+  * 2 plane YCbCr MSB aligned
diff --git a/patches-6.6/033-14-v6.7-drm-rockchip-vop-Add-NV15-NV20-and-NV30-support.patch b/patches-6.6/033-14-v6.7-drm-rockchip-vop-Add-NV15-NV20-and-NV30-support.patch
new file mode 100644
index 0000000..38c9d2b
--- /dev/null
+++ b/patches-6.6/033-14-v6.7-drm-rockchip-vop-Add-NV15-NV20-and-NV30-support.patch
@@ -0,0 +1,231 @@
+From d4b384228562848e4b76b608a5876c92160e993c Mon Sep 17 00:00:00 2001
+From: Jonas Karlman <jonas@kwiboo.se>
+Date: Mon, 23 Oct 2023 17:37:15 +0000
+Subject: [PATCH] drm/rockchip: vop: Add NV15, NV20 and NV30 support
+
+Add support for displaying 10-bit 4:2:0 and 4:2:2 formats produced by
+the Rockchip Video Decoder on RK322X, RK3288, RK3328 and RK3399.
+Also add support for 10-bit 4:4:4 format while at it.
+
+V5: Use drm_format_info_min_pitch() for correct bpp
+    Add missing NV21, NV61 and NV42 formats
+V4: Rework RK3328/RK3399 win0/1 data to not affect RK3368
+V2: Added NV30 support
+
+Signed-off-by: Jonas Karlman <jonas@kwiboo.se>
+Reviewed-by: Sandy Huang <hjc@rock-chips.com>
+Reviewed-by: Christopher Obbard <chris.obbard@collabora.com>
+Tested-by: Christopher Obbard <chris.obbard@collabora.com>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231023173718.188102-3-jonas@kwiboo.se
+---
+ drivers/gpu/drm/rockchip/rockchip_drm_vop.c | 36 ++++++++---
+ drivers/gpu/drm/rockchip/rockchip_drm_vop.h |  1 +
+ drivers/gpu/drm/rockchip/rockchip_vop_reg.c | 66 +++++++++++++++++----
+ 3 files changed, 86 insertions(+), 17 deletions(-)
+
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+@@ -280,6 +280,18 @@ static bool has_uv_swapped(uint32_t form
+ 	}
+ }
+ 
++static bool is_fmt_10(uint32_t format)
++{
++	switch (format) {
++	case DRM_FORMAT_NV15:
++	case DRM_FORMAT_NV20:
++	case DRM_FORMAT_NV30:
++		return true;
++	default:
++		return false;
++	}
++}
++
+ static enum vop_data_format vop_convert_format(uint32_t format)
+ {
+ 	switch (format) {
+@@ -295,12 +307,15 @@ static enum vop_data_format vop_convert_
+ 	case DRM_FORMAT_BGR565:
+ 		return VOP_FMT_RGB565;
+ 	case DRM_FORMAT_NV12:
++	case DRM_FORMAT_NV15:
+ 	case DRM_FORMAT_NV21:
+ 		return VOP_FMT_YUV420SP;
+ 	case DRM_FORMAT_NV16:
++	case DRM_FORMAT_NV20:
+ 	case DRM_FORMAT_NV61:
+ 		return VOP_FMT_YUV422SP;
+ 	case DRM_FORMAT_NV24:
++	case DRM_FORMAT_NV30:
+ 	case DRM_FORMAT_NV42:
+ 		return VOP_FMT_YUV444SP;
+ 	default:
+@@ -947,7 +962,12 @@ static void vop_plane_atomic_update(stru
+ 	dsp_sty = dest->y1 + crtc->mode.vtotal - crtc->mode.vsync_start;
+ 	dsp_st = dsp_sty << 16 | (dsp_stx & 0xffff);
+ 
+-	offset = (src->x1 >> 16) * fb->format->cpp[0];
++	if (fb->format->char_per_block[0])
++		offset = drm_format_info_min_pitch(fb->format, 0,
++						   src->x1 >> 16);
++	else
++		offset = (src->x1 >> 16) * fb->format->cpp[0];
++
+ 	offset += (src->y1 >> 16) * fb->pitches[0];
+ 	dma_addr = rk_obj->dma_addr + offset + fb->offsets[0];
+ 
+@@ -973,6 +993,7 @@ static void vop_plane_atomic_update(stru
+ 	}
+ 
+ 	VOP_WIN_SET(vop, win, format, format);
++	VOP_WIN_SET(vop, win, fmt_10, is_fmt_10(fb->format->format));
+ 	VOP_WIN_SET(vop, win, yrgb_vir, DIV_ROUND_UP(fb->pitches[0], 4));
+ 	VOP_WIN_SET(vop, win, yrgb_mst, dma_addr);
+ 	VOP_WIN_YUV2YUV_SET(vop, win_yuv2yuv, y2r_en, is_yuv);
+@@ -982,15 +1003,16 @@ static void vop_plane_atomic_update(stru
+ 		    (new_state->rotation & DRM_MODE_REFLECT_X) ? 1 : 0);
+ 
+ 	if (is_yuv) {
+-		int hsub = fb->format->hsub;
+-		int vsub = fb->format->vsub;
+-		int bpp = fb->format->cpp[1];
+-
+ 		uv_obj = fb->obj[1];
+ 		rk_uv_obj = to_rockchip_obj(uv_obj);
+ 
+-		offset = (src->x1 >> 16) * bpp / hsub;
+-		offset += (src->y1 >> 16) * fb->pitches[1] / vsub;
++		if (fb->format->char_per_block[1])
++			offset = drm_format_info_min_pitch(fb->format, 1,
++							   src->x1 >> 16);
++		else
++			offset = (src->x1 >> 16) * fb->format->cpp[1];
++		offset /= fb->format->hsub;
++		offset += (src->y1 >> 16) * fb->pitches[1] / fb->format->vsub;
+ 
+ 		dma_addr = rk_uv_obj->dma_addr + offset + fb->offsets[1];
+ 		VOP_WIN_SET(vop, win, uv_vir, DIV_ROUND_UP(fb->pitches[1], 4));
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
+@@ -186,6 +186,7 @@ struct vop_win_phy {
+ 	struct vop_reg enable;
+ 	struct vop_reg gate;
+ 	struct vop_reg format;
++	struct vop_reg fmt_10;
+ 	struct vop_reg rb_swap;
+ 	struct vop_reg uv_swap;
+ 	struct vop_reg act_info;
+--- a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
++++ b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
+@@ -53,6 +53,26 @@ static const uint32_t formats_win_full[]
+ 	DRM_FORMAT_NV42,
+ };
+ 
++static const uint32_t formats_win_full_10[] = {
++	DRM_FORMAT_XRGB8888,
++	DRM_FORMAT_ARGB8888,
++	DRM_FORMAT_XBGR8888,
++	DRM_FORMAT_ABGR8888,
++	DRM_FORMAT_RGB888,
++	DRM_FORMAT_BGR888,
++	DRM_FORMAT_RGB565,
++	DRM_FORMAT_BGR565,
++	DRM_FORMAT_NV12,
++	DRM_FORMAT_NV21,
++	DRM_FORMAT_NV16,
++	DRM_FORMAT_NV61,
++	DRM_FORMAT_NV24,
++	DRM_FORMAT_NV42,
++	DRM_FORMAT_NV15,
++	DRM_FORMAT_NV20,
++	DRM_FORMAT_NV30,
++};
++
+ static const uint64_t format_modifiers_win_full[] = {
+ 	DRM_FORMAT_MOD_LINEAR,
+ 	DRM_FORMAT_MOD_INVALID,
+@@ -627,11 +647,12 @@ static const struct vop_scl_regs rk3288_
+ 
+ static const struct vop_win_phy rk3288_win01_data = {
+ 	.scl = &rk3288_win_full_scl,
+-	.data_formats = formats_win_full,
+-	.nformats = ARRAY_SIZE(formats_win_full),
++	.data_formats = formats_win_full_10,
++	.nformats = ARRAY_SIZE(formats_win_full_10),
+ 	.format_modifiers = format_modifiers_win_full,
+ 	.enable = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 0),
+ 	.format = VOP_REG(RK3288_WIN0_CTRL0, 0x7, 1),
++	.fmt_10 = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 4),
+ 	.rb_swap = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 12),
+ 	.uv_swap = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 15),
+ 	.act_info = VOP_REG(RK3288_WIN0_ACT_INFO, 0x1fff1fff, 0),
+@@ -936,13 +957,38 @@ static const struct vop_win_yuv2yuv_data
+ 
+ };
+ 
+-static const struct vop_win_phy rk3399_win01_data = {
++static const struct vop_win_phy rk3399_win0_data = {
+ 	.scl = &rk3288_win_full_scl,
+-	.data_formats = formats_win_full,
+-	.nformats = ARRAY_SIZE(formats_win_full),
++	.data_formats = formats_win_full_10,
++	.nformats = ARRAY_SIZE(formats_win_full_10),
+ 	.format_modifiers = format_modifiers_win_full_afbc,
+ 	.enable = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 0),
+ 	.format = VOP_REG(RK3288_WIN0_CTRL0, 0x7, 1),
++	.fmt_10 = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 4),
++	.rb_swap = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 12),
++	.uv_swap = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 15),
++	.x_mir_en = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 21),
++	.y_mir_en = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 22),
++	.act_info = VOP_REG(RK3288_WIN0_ACT_INFO, 0x1fff1fff, 0),
++	.dsp_info = VOP_REG(RK3288_WIN0_DSP_INFO, 0x0fff0fff, 0),
++	.dsp_st = VOP_REG(RK3288_WIN0_DSP_ST, 0x1fff1fff, 0),
++	.yrgb_mst = VOP_REG(RK3288_WIN0_YRGB_MST, 0xffffffff, 0),
++	.uv_mst = VOP_REG(RK3288_WIN0_CBR_MST, 0xffffffff, 0),
++	.yrgb_vir = VOP_REG(RK3288_WIN0_VIR, 0x3fff, 0),
++	.uv_vir = VOP_REG(RK3288_WIN0_VIR, 0x3fff, 16),
++	.src_alpha_ctl = VOP_REG(RK3288_WIN0_SRC_ALPHA_CTRL, 0xff, 0),
++	.dst_alpha_ctl = VOP_REG(RK3288_WIN0_DST_ALPHA_CTRL, 0xff, 0),
++	.channel = VOP_REG(RK3288_WIN0_CTRL2, 0xff, 0),
++};
++
++static const struct vop_win_phy rk3399_win1_data = {
++	.scl = &rk3288_win_full_scl,
++	.data_formats = formats_win_full_10,
++	.nformats = ARRAY_SIZE(formats_win_full_10),
++	.format_modifiers = format_modifiers_win_full,
++	.enable = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 0),
++	.format = VOP_REG(RK3288_WIN0_CTRL0, 0x7, 1),
++	.fmt_10 = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 4),
+ 	.rb_swap = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 12),
+ 	.uv_swap = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 15),
+ 	.x_mir_en = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 21),
+@@ -965,9 +1011,9 @@ static const struct vop_win_phy rk3399_w
+  * AFBC on the primary plane.
+  */
+ static const struct vop_win_data rk3399_vop_win_data[] = {
+-	{ .base = 0x00, .phy = &rk3399_win01_data,
++	{ .base = 0x00, .phy = &rk3399_win0_data,
+ 	  .type = DRM_PLANE_TYPE_PRIMARY },
+-	{ .base = 0x40, .phy = &rk3368_win01_data,
++	{ .base = 0x40, .phy = &rk3399_win1_data,
+ 	  .type = DRM_PLANE_TYPE_OVERLAY },
+ 	{ .base = 0x00, .phy = &rk3368_win23_data,
+ 	  .type = DRM_PLANE_TYPE_OVERLAY },
+@@ -1099,11 +1145,11 @@ static const struct vop_intr rk3328_vop_
+ };
+ 
+ static const struct vop_win_data rk3328_vop_win_data[] = {
+-	{ .base = 0xd0, .phy = &rk3368_win01_data,
++	{ .base = 0xd0, .phy = &rk3399_win1_data,
+ 	  .type = DRM_PLANE_TYPE_PRIMARY },
+-	{ .base = 0x1d0, .phy = &rk3368_win01_data,
++	{ .base = 0x1d0, .phy = &rk3399_win1_data,
+ 	  .type = DRM_PLANE_TYPE_OVERLAY },
+-	{ .base = 0x2d0, .phy = &rk3368_win01_data,
++	{ .base = 0x2d0, .phy = &rk3399_win1_data,
+ 	  .type = DRM_PLANE_TYPE_CURSOR },
+ };
+ 
diff --git a/patches-6.6/033-15-v6.8-drm-rockchip-vop2-Add-NV20-and-NV30-support.patch b/patches-6.6/033-15-v6.8-drm-rockchip-vop2-Add-NV20-and-NV30-support.patch
new file mode 100644
index 0000000..ead6b64
--- /dev/null
+++ b/patches-6.6/033-15-v6.8-drm-rockchip-vop2-Add-NV20-and-NV30-support.patch
@@ -0,0 +1,67 @@
+From 5fc6aa7db080fd90ef00846aac04e8a211088132 Mon Sep 17 00:00:00 2001
+From: Jonas Karlman <jonas@kwiboo.se>
+Date: Wed, 25 Oct 2023 21:32:46 +0000
+Subject: [PATCH] drm/rockchip: vop2: Add NV20 and NV30 support
+
+Add support for the 10-bit 4:2:2 and 4:4:4 formats NV20 and NV30.
+
+These formats can be tested using modetest [1]:
+
+  modetest -P <plane_id>@<crtc_id>:1920x1080@<format>
+
+e.g. on a ROCK 3 Model A (rk3568):
+
+  modetest -P 43@67:1920x1080@NV20 -F tiles,tiles
+  modetest -P 43@67:1920x1080@NV30 -F smpte,smpte
+
+[1] https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/329
+
+Signed-off-by: Jonas Karlman <jonas@kwiboo.se>
+Reviewed-by: Christopher Obbard <chris.obbard@collabora.com>
+Tested-by: Christopher Obbard <chris.obbard@collabora.com>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231025213248.2641962-1-jonas@kwiboo.se
+---
+ drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 5 +++++
+ drivers/gpu/drm/rockchip/rockchip_vop2_reg.c | 2 ++
+ 2 files changed, 7 insertions(+)
+
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+@@ -325,11 +325,14 @@ static enum vop2_data_format vop2_conver
+ 	case DRM_FORMAT_NV16:
+ 	case DRM_FORMAT_NV61:
+ 		return VOP2_FMT_YUV422SP;
++	case DRM_FORMAT_NV20:
+ 	case DRM_FORMAT_Y210:
+ 		return VOP2_FMT_YUV422SP_10;
+ 	case DRM_FORMAT_NV24:
+ 	case DRM_FORMAT_NV42:
+ 		return VOP2_FMT_YUV444SP;
++	case DRM_FORMAT_NV30:
++		return VOP2_FMT_YUV444SP_10;
+ 	case DRM_FORMAT_YUYV:
+ 	case DRM_FORMAT_YVYU:
+ 		return VOP2_FMT_VYUY422;
+@@ -414,6 +417,8 @@ static bool vop2_win_uv_swap(u32 format)
+ 	case DRM_FORMAT_NV16:
+ 	case DRM_FORMAT_NV24:
+ 	case DRM_FORMAT_NV15:
++	case DRM_FORMAT_NV20:
++	case DRM_FORMAT_NV30:
+ 	case DRM_FORMAT_YUYV:
+ 	case DRM_FORMAT_UYVY:
+ 		return true;
+--- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
++++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
+@@ -48,8 +48,10 @@ static const uint32_t formats_rk356x_esm
+ 	DRM_FORMAT_NV15, /* yuv420_10bit linear mode, 2 plane, no padding */
+ 	DRM_FORMAT_NV16, /* yuv422_8bit linear mode, 2 plane */
+ 	DRM_FORMAT_NV61, /* yuv422_8bit linear mode, 2 plane */
++	DRM_FORMAT_NV20, /* yuv422_10bit linear mode, 2 plane, no padding */
+ 	DRM_FORMAT_NV24, /* yuv444_8bit linear mode, 2 plane */
+ 	DRM_FORMAT_NV42, /* yuv444_8bit linear mode, 2 plane */
++	DRM_FORMAT_NV30, /* yuv444_10bit linear mode, 2 plane, no padding */
+ 	DRM_FORMAT_YVYU, /* yuv422_8bit[YVYU] linear mode */
+ 	DRM_FORMAT_VYUY, /* yuv422_8bit[VYUY] linear mode */
+ };
diff --git a/patches-6.6/033-16-v6.8-drm-rockchip-rk3066_hdmi-Remove-useless-mode_fixup.patch b/patches-6.6/033-16-v6.8-drm-rockchip-rk3066_hdmi-Remove-useless-mode_fixup.patch
new file mode 100644
index 0000000..861e58d
--- /dev/null
+++ b/patches-6.6/033-16-v6.8-drm-rockchip-rk3066_hdmi-Remove-useless-mode_fixup.patch
@@ -0,0 +1,40 @@
+From 1044f4a31734eef000f42cdaaf35bb2f76286be5 Mon Sep 17 00:00:00 2001
+From: Johan Jonker <jbx6244@gmail.com>
+Date: Thu, 2 Nov 2023 14:41:48 +0100
+Subject: [PATCH] drm/rockchip: rk3066_hdmi: Remove useless mode_fixup
+
+The mode_fixup implementation doesn't do anything, so we can simply
+remove it.
+
+Signed-off-by: Johan Jonker <jbx6244@gmail.com>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/5649ac03-db92-42a9-d86a-76dfa1af7c64@gmail.com
+---
+ drivers/gpu/drm/rockchip/rk3066_hdmi.c | 9 ---------
+ 1 file changed, 9 deletions(-)
+
+--- a/drivers/gpu/drm/rockchip/rk3066_hdmi.c
++++ b/drivers/gpu/drm/rockchip/rk3066_hdmi.c
+@@ -434,14 +434,6 @@ static void rk3066_hdmi_encoder_disable(
+ 	rk3066_hdmi_set_power_mode(hdmi, HDMI_SYS_POWER_MODE_A);
+ }
+ 
+-static bool
+-rk3066_hdmi_encoder_mode_fixup(struct drm_encoder *encoder,
+-			       const struct drm_display_mode *mode,
+-			       struct drm_display_mode *adj_mode)
+-{
+-	return true;
+-}
+-
+ static int
+ rk3066_hdmi_encoder_atomic_check(struct drm_encoder *encoder,
+ 				 struct drm_crtc_state *crtc_state,
+@@ -459,7 +451,6 @@ static const
+ struct drm_encoder_helper_funcs rk3066_hdmi_encoder_helper_funcs = {
+ 	.enable       = rk3066_hdmi_encoder_enable,
+ 	.disable      = rk3066_hdmi_encoder_disable,
+-	.mode_fixup   = rk3066_hdmi_encoder_mode_fixup,
+ 	.mode_set     = rk3066_hdmi_encoder_mode_set,
+ 	.atomic_check = rk3066_hdmi_encoder_atomic_check,
+ };
diff --git a/patches-6.6/033-17-v6.8-drm-rockchip-rk3066_hdmi-Switch-encoder-hooks-to-atomic.patch b/patches-6.6/033-17-v6.8-drm-rockchip-rk3066_hdmi-Switch-encoder-hooks-to-atomic.patch
new file mode 100644
index 0000000..1167932
--- /dev/null
+++ b/patches-6.6/033-17-v6.8-drm-rockchip-rk3066_hdmi-Switch-encoder-hooks-to-atomic.patch
@@ -0,0 +1,88 @@
+From ae3436a5e7c2ef4f92938133bd99f92fc47ea34e Mon Sep 17 00:00:00 2001
+From: Johan Jonker <jbx6244@gmail.com>
+Date: Thu, 2 Nov 2023 14:42:04 +0100
+Subject: [PATCH] drm/rockchip: rk3066_hdmi: Switch encoder hooks to atomic
+
+The rk3066_hdmi encoder still uses the non atomic variants
+of enable and disable. Convert to their atomic equivalents.
+In atomic mode there is no need to save the adjusted mode,
+so remove the mode_set function.
+
+Signed-off-by: Johan Jonker <jbx6244@gmail.com>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/034c3446-d619-f4c3-3aaa-ab51dc19d07f@gmail.com
+---
+ drivers/gpu/drm/rockchip/rk3066_hdmi.c | 35 +++++++++++++-------------
+ 1 file changed, 17 insertions(+), 18 deletions(-)
+
+--- a/drivers/gpu/drm/rockchip/rk3066_hdmi.c
++++ b/drivers/gpu/drm/rockchip/rk3066_hdmi.c
+@@ -55,7 +55,6 @@ struct rk3066_hdmi {
+ 	unsigned int tmdsclk;
+ 
+ 	struct hdmi_data_info hdmi_data;
+-	struct drm_display_mode previous_mode;
+ };
+ 
+ static struct rk3066_hdmi *encoder_to_rk3066_hdmi(struct drm_encoder *encoder)
+@@ -387,21 +386,21 @@ static int rk3066_hdmi_setup(struct rk30
+ 	return 0;
+ }
+ 
+-static void
+-rk3066_hdmi_encoder_mode_set(struct drm_encoder *encoder,
+-			     struct drm_display_mode *mode,
+-			     struct drm_display_mode *adj_mode)
++static void rk3066_hdmi_encoder_enable(struct drm_encoder *encoder,
++				       struct drm_atomic_state *state)
+ {
+ 	struct rk3066_hdmi *hdmi = encoder_to_rk3066_hdmi(encoder);
++	struct drm_connector_state *conn_state;
++	struct drm_crtc_state *crtc_state;
++	int mux, val;
+ 
+-	/* Store the display mode for plugin/DPMS poweron events. */
+-	drm_mode_copy(&hdmi->previous_mode, adj_mode);
+-}
++	conn_state = drm_atomic_get_new_connector_state(state, &hdmi->connector);
++	if (WARN_ON(!conn_state))
++		return;
+ 
+-static void rk3066_hdmi_encoder_enable(struct drm_encoder *encoder)
+-{
+-	struct rk3066_hdmi *hdmi = encoder_to_rk3066_hdmi(encoder);
+-	int mux, val;
++	crtc_state = drm_atomic_get_new_crtc_state(state, conn_state->crtc);
++	if (WARN_ON(!crtc_state))
++		return;
+ 
+ 	mux = drm_of_encoder_active_endpoint_id(hdmi->dev->of_node, encoder);
+ 	if (mux)
+@@ -414,10 +413,11 @@ static void rk3066_hdmi_encoder_enable(s
+ 	DRM_DEV_DEBUG(hdmi->dev, "hdmi encoder enable select: vop%s\n",
+ 		      (mux) ? "1" : "0");
+ 
+-	rk3066_hdmi_setup(hdmi, &hdmi->previous_mode);
++	rk3066_hdmi_setup(hdmi, &crtc_state->adjusted_mode);
+ }
+ 
+-static void rk3066_hdmi_encoder_disable(struct drm_encoder *encoder)
++static void rk3066_hdmi_encoder_disable(struct drm_encoder *encoder,
++					struct drm_atomic_state *state)
+ {
+ 	struct rk3066_hdmi *hdmi = encoder_to_rk3066_hdmi(encoder);
+ 
+@@ -449,10 +449,9 @@ rk3066_hdmi_encoder_atomic_check(struct
+ 
+ static const
+ struct drm_encoder_helper_funcs rk3066_hdmi_encoder_helper_funcs = {
+-	.enable       = rk3066_hdmi_encoder_enable,
+-	.disable      = rk3066_hdmi_encoder_disable,
+-	.mode_set     = rk3066_hdmi_encoder_mode_set,
+-	.atomic_check = rk3066_hdmi_encoder_atomic_check,
++	.atomic_check   = rk3066_hdmi_encoder_atomic_check,
++	.atomic_enable  = rk3066_hdmi_encoder_enable,
++	.atomic_disable = rk3066_hdmi_encoder_disable,
+ };
+ 
+ static enum drm_connector_status
diff --git a/patches-6.6/033-18-v6.8-drm-rockchip-rk3066_hdmi-include-drm-drm_atomic.h.patch b/patches-6.6/033-18-v6.8-drm-rockchip-rk3066_hdmi-include-drm-drm_atomic.h.patch
new file mode 100644
index 0000000..99acce8
--- /dev/null
+++ b/patches-6.6/033-18-v6.8-drm-rockchip-rk3066_hdmi-include-drm-drm_atomic.h.patch
@@ -0,0 +1,43 @@
+From f4814c20d14ca168382e8887c768f290e4a2a861 Mon Sep 17 00:00:00 2001
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Wed, 22 Nov 2023 23:18:29 +0100
+Subject: [PATCH] drm/rockchip: rk3066_hdmi: include drm/drm_atomic.h
+
+Without this header, the newly added code fails to build:
+
+drivers/gpu/drm/rockchip/rk3066_hdmi.c: In function 'rk3066_hdmi_encoder_enable':
+drivers/gpu/drm/rockchip/rk3066_hdmi.c:397:22: error: implicit declaration of function 'drm_atomic_get_new_connector_state'; did you mean 'drm_atomic_helper_connector_reset'? [-Werror=implicit-function-declaration]
+  397 |         conn_state = drm_atomic_get_new_connector_state(state, &hdmi->connector);
+      |                      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+      |                      drm_atomic_helper_connector_reset
+drivers/gpu/drm/rockchip/rk3066_hdmi.c:397:20: error: assignment to 'struct drm_connector_state *' from 'int' makes pointer from integer without a cast [-Werror=int-conversion]
+  397 |         conn_state = drm_atomic_get_new_connector_state(state, &hdmi->connector);
+      |                    ^
+drivers/gpu/drm/rockchip/rk3066_hdmi.c:401:22: error: implicit declaration of function 'drm_atomic_get_new_crtc_state'; did you mean 'drm_atomic_helper_swap_state'? [-Werror=implicit-function-declaration]
+  401 |         crtc_state = drm_atomic_get_new_crtc_state(state, conn_state->crtc);
+      |                      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+      |                      drm_atomic_helper_swap_state
+drivers/gpu/drm/rockchip/rk3066_hdmi.c:401:20: error: assignment to 'struct drm_crtc_state *' from 'int' makes pointer from integer without a cast [-Werror=int-conversion]
+  401 |         crtc_state = drm_atomic_get_new_crtc_state(state, conn_state->crtc);
+      |                    ^
+
+Fixes: ae3436a5e7c2 ("drm/rockchip: rk3066_hdmi: Switch encoder hooks to atomic")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Acked-by: Randy Dunlap <rdunlap@infradead.org>
+Tested-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231122221838.3164349-1-arnd@kernel.org
+---
+ drivers/gpu/drm/rockchip/rk3066_hdmi.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/gpu/drm/rockchip/rk3066_hdmi.c
++++ b/drivers/gpu/drm/rockchip/rk3066_hdmi.c
+@@ -4,6 +4,7 @@
+  *    Zheng Yang <zhengyang@rock-chips.com>
+  */
+ 
++#include <drm/drm_atomic.h>
+ #include <drm/drm_edid.h>
+ #include <drm/drm_of.h>
+ #include <drm/drm_probe_helper.h>
diff --git a/patches-6.6/033-19-v6.8-drm-rockchip-move-output-interface-related-definition-to.patch b/patches-6.6/033-19-v6.8-drm-rockchip-move-output-interface-related-definition-to.patch
new file mode 100644
index 0000000..8694228
--- /dev/null
+++ b/patches-6.6/033-19-v6.8-drm-rockchip-move-output-interface-related-definition-to.patch
@@ -0,0 +1,189 @@
+From 8c8546546f256f834e9c7cab48e5946df340d1a8 Mon Sep 17 00:00:00 2001
+From: Andy Yan <andy.yan@rock-chips.com>
+Date: Mon, 11 Dec 2023 19:56:27 +0800
+Subject: [PATCH] drm/rockchip: move output interface related definition to
+ rockchip_drm_drv.h
+
+The output interface related definition can shared between
+vop and vop2, move them to rockchip_drm_drv.h can avoid duplicated
+definition.
+
+Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
+Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231211115627.1784735-1-andyshrk@163.com
+---
+ drivers/gpu/drm/rockchip/analogix_dp-rockchip.c |  1 -
+ drivers/gpu/drm/rockchip/cdn-dp-core.c          |  1 -
+ drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c |  1 -
+ drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c     |  1 -
+ drivers/gpu/drm/rockchip/inno_hdmi.c            |  1 -
+ drivers/gpu/drm/rockchip/rk3066_hdmi.c          |  1 -
+ drivers/gpu/drm/rockchip/rockchip_drm_drv.h     | 17 +++++++++++++++++
+ drivers/gpu/drm/rockchip/rockchip_drm_vop.h     | 12 ------------
+ drivers/gpu/drm/rockchip/rockchip_drm_vop2.h    | 16 +---------------
+ drivers/gpu/drm/rockchip/rockchip_lvds.c        |  1 -
+ drivers/gpu/drm/rockchip/rockchip_rgb.c         |  1 -
+ 11 files changed, 18 insertions(+), 35 deletions(-)
+
+--- a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
++++ b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
+@@ -30,7 +30,6 @@
+ #include <drm/drm_simple_kms_helper.h>
+ 
+ #include "rockchip_drm_drv.h"
+-#include "rockchip_drm_vop.h"
+ 
+ #define RK3288_GRF_SOC_CON6		0x25c
+ #define RK3288_EDP_LCDC_SEL		BIT(5)
+--- a/drivers/gpu/drm/rockchip/cdn-dp-core.c
++++ b/drivers/gpu/drm/rockchip/cdn-dp-core.c
+@@ -24,7 +24,6 @@
+ 
+ #include "cdn-dp-core.h"
+ #include "cdn-dp-reg.h"
+-#include "rockchip_drm_vop.h"
+ 
+ static inline struct cdn_dp_device *connector_to_dp(struct drm_connector *connector)
+ {
+--- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
++++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
+@@ -26,7 +26,6 @@
+ #include <drm/drm_simple_kms_helper.h>
+ 
+ #include "rockchip_drm_drv.h"
+-#include "rockchip_drm_vop.h"
+ 
+ #define DSI_PHY_RSTZ			0xa0
+ #define PHY_DISFORCEPLL			0
+--- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
++++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
+@@ -18,7 +18,6 @@
+ #include <drm/drm_simple_kms_helper.h>
+ 
+ #include "rockchip_drm_drv.h"
+-#include "rockchip_drm_vop.h"
+ 
+ #define RK3228_GRF_SOC_CON2		0x0408
+ #define RK3228_HDMI_SDAIN_MSK		BIT(14)
+--- a/drivers/gpu/drm/rockchip/inno_hdmi.c
++++ b/drivers/gpu/drm/rockchip/inno_hdmi.c
+@@ -23,7 +23,6 @@
+ #include <drm/drm_simple_kms_helper.h>
+ 
+ #include "rockchip_drm_drv.h"
+-#include "rockchip_drm_vop.h"
+ 
+ #include "inno_hdmi.h"
+ 
+--- a/drivers/gpu/drm/rockchip/rk3066_hdmi.c
++++ b/drivers/gpu/drm/rockchip/rk3066_hdmi.c
+@@ -18,7 +18,6 @@
+ #include "rk3066_hdmi.h"
+ 
+ #include "rockchip_drm_drv.h"
+-#include "rockchip_drm_vop.h"
+ 
+ #define DEFAULT_PLLA_RATE 30000000
+ 
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
+@@ -20,6 +20,23 @@
+ #define ROCKCHIP_MAX_CONNECTOR	2
+ #define ROCKCHIP_MAX_CRTC	4
+ 
++/*
++ * display output interface supported by rockchip lcdc
++ */
++#define ROCKCHIP_OUT_MODE_P888		0
++#define ROCKCHIP_OUT_MODE_BT1120	0
++#define ROCKCHIP_OUT_MODE_P666		1
++#define ROCKCHIP_OUT_MODE_P565		2
++#define ROCKCHIP_OUT_MODE_BT656		5
++#define ROCKCHIP_OUT_MODE_S888		8
++#define ROCKCHIP_OUT_MODE_S888_DUMMY	12
++#define ROCKCHIP_OUT_MODE_YUV420	14
++/* for use special outface */
++#define ROCKCHIP_OUT_MODE_AAAA		15
++
++/* output flags */
++#define ROCKCHIP_OUTPUT_DSI_DUAL	BIT(0)
++
+ struct drm_device;
+ struct drm_connector;
+ struct iommu_domain;
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
+@@ -277,18 +277,6 @@ struct vop_data {
+ /* dst alpha ctrl define */
+ #define DST_FACTOR_M0(x)		(((x) & 0x7) << 6)
+ 
+-/*
+- * display output interface supported by rockchip lcdc
+- */
+-#define ROCKCHIP_OUT_MODE_P888	0
+-#define ROCKCHIP_OUT_MODE_P666	1
+-#define ROCKCHIP_OUT_MODE_P565	2
+-/* for use special outface */
+-#define ROCKCHIP_OUT_MODE_AAAA	15
+-
+-/* output flags */
+-#define ROCKCHIP_OUTPUT_DSI_DUAL	BIT(0)
+-
+ enum alpha_mode {
+ 	ALPHA_STRAIGHT,
+ 	ALPHA_INVERSE,
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
+@@ -7,10 +7,9 @@
+ #ifndef _ROCKCHIP_DRM_VOP2_H
+ #define _ROCKCHIP_DRM_VOP2_H
+ 
+-#include "rockchip_drm_vop.h"
+-
+ #include <linux/regmap.h>
+ #include <drm/drm_modes.h>
++#include "rockchip_drm_vop.h"
+ 
+ #define VOP_FEATURE_OUTPUT_10BIT        BIT(0)
+ 
+@@ -166,19 +165,6 @@ struct vop2_data {
+ #define WB_YRGB_FIFO_FULL_INTR		BIT(18)
+ #define WB_COMPLETE_INTR		BIT(19)
+ 
+-/*
+- * display output interface supported by rockchip lcdc
+- */
+-#define ROCKCHIP_OUT_MODE_P888		0
+-#define ROCKCHIP_OUT_MODE_BT1120	0
+-#define ROCKCHIP_OUT_MODE_P666		1
+-#define ROCKCHIP_OUT_MODE_P565		2
+-#define ROCKCHIP_OUT_MODE_BT656		5
+-#define ROCKCHIP_OUT_MODE_S888		8
+-#define ROCKCHIP_OUT_MODE_S888_DUMMY	12
+-#define ROCKCHIP_OUT_MODE_YUV420	14
+-/* for use special outface */
+-#define ROCKCHIP_OUT_MODE_AAAA		15
+ 
+ enum vop_csc_format {
+ 	CSC_BT601L,
+--- a/drivers/gpu/drm/rockchip/rockchip_lvds.c
++++ b/drivers/gpu/drm/rockchip/rockchip_lvds.c
+@@ -27,7 +27,6 @@
+ #include <drm/drm_simple_kms_helper.h>
+ 
+ #include "rockchip_drm_drv.h"
+-#include "rockchip_drm_vop.h"
+ #include "rockchip_lvds.h"
+ 
+ #define DISPLAY_OUTPUT_RGB		0
+--- a/drivers/gpu/drm/rockchip/rockchip_rgb.c
++++ b/drivers/gpu/drm/rockchip/rockchip_rgb.c
+@@ -19,7 +19,6 @@
+ #include <drm/drm_simple_kms_helper.h>
+ 
+ #include "rockchip_drm_drv.h"
+-#include "rockchip_drm_vop.h"
+ #include "rockchip_rgb.h"
+ 
+ struct rockchip_rgb {
diff --git a/patches-6.6/033-20-v6.8-Revert-drm-rockchip-vop2-Use-regcache_sync-to-fix.patch b/patches-6.6/033-20-v6.8-Revert-drm-rockchip-vop2-Use-regcache_sync-to-fix.patch
new file mode 100644
index 0000000..563a67e
--- /dev/null
+++ b/patches-6.6/033-20-v6.8-Revert-drm-rockchip-vop2-Use-regcache_sync-to-fix.patch
@@ -0,0 +1,60 @@
+From 81a06f1d02e588cfa14c5e5953d9dc50b1d404be Mon Sep 17 00:00:00 2001
+From: Andy Yan <andy.yan@rock-chips.com>
+Date: Mon, 11 Dec 2023 19:57:19 +0800
+Subject: [PATCH] Revert "drm/rockchip: vop2: Use regcache_sync() to fix
+ suspend/resume"
+
+This reverts commit b63a553e8f5aa6574eeb535a551817a93c426d8c.
+
+regcache_sync will try to reload the configuration in regcache to
+hardware, but the registers of 4 Cluster windows and Esmart1/2/3 on
+the upcoming rk3588 can not be set successfully before internal PD
+power on.
+
+Also it's better to keep the hardware register as it is before we really
+enable it.
+
+So let's revert this version, and keep the first version:
+commit afa965a45e01 ("drm/rockchip: vop2: fix suspend/resume")
+
+Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
+Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231211115719.1784834-1-andyshrk@163.com
+---
+ drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+@@ -217,6 +217,8 @@ struct vop2 {
+ 	struct vop2_win win[];
+ };
+ 
++static const struct regmap_config vop2_regmap_config;
++
+ static struct vop2_video_port *to_vop2_video_port(struct drm_crtc *crtc)
+ {
+ 	return container_of(crtc, struct vop2_video_port, crtc);
+@@ -885,7 +887,11 @@ static void vop2_enable(struct vop2 *vop
+ 		return;
+ 	}
+ 
+-	regcache_sync(vop2->map);
++	ret = regmap_reinit_cache(vop2->map, &vop2_regmap_config);
++	if (ret) {
++		drm_err(vop2->drm, "failed to reinit cache: %d\n", ret);
++		return;
++	}
+ 
+ 	if (vop2->data->soc_id == 3566)
+ 		vop2_writel(vop2, RK3568_OTP_WIN_EN, 1);
+@@ -915,8 +921,6 @@ static void vop2_disable(struct vop2 *vo
+ 
+ 	pm_runtime_put_sync(vop2->dev);
+ 
+-	regcache_mark_dirty(vop2->map);
+-
+ 	clk_disable_unprepare(vop2->aclk);
+ 	clk_disable_unprepare(vop2->hclk);
+ }
diff --git a/patches-6.6/033-21-v6.8-drm-rockchip-vop2-set-half_block_en-bit-in-all-mode.patch b/patches-6.6/033-21-v6.8-drm-rockchip-vop2-set-half_block_en-bit-in-all-mode.patch
new file mode 100644
index 0000000..b4cd238
--- /dev/null
+++ b/patches-6.6/033-21-v6.8-drm-rockchip-vop2-set-half_block_en-bit-in-all-mode.patch
@@ -0,0 +1,83 @@
+From bebad6bd4fbdc448ad3b337ad281b813e68f6f53 Mon Sep 17 00:00:00 2001
+From: Andy Yan <andy.yan@rock-chips.com>
+Date: Mon, 11 Dec 2023 19:57:30 +0800
+Subject: [PATCH] drm/rockchip: vop2: set half_block_en bit in all mode
+
+At first we thought the half_block_en bit in AFBCD_CTRL register
+only work in afbc mode. But the fact is that it control the line
+buffer in all mode(afbc/tile/linear), so we need configure it in
+all case.
+
+As the cluster windows of rk3568 only supports afbc format
+so is therefore not affected.
+
+Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
+Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231211115730.1784893-1-andyshrk@163.com
+---
+ drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 25 ++++++++++++++------
+ 1 file changed, 18 insertions(+), 7 deletions(-)
+
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+@@ -521,6 +521,18 @@ static bool rockchip_vop2_mod_supported(
+ 	return vop2_convert_afbc_format(format) >= 0;
+ }
+ 
++/*
++ * 0: Full mode, 16 lines for one tail
++ * 1: half block mode, 8 lines one tail
++ */
++static bool vop2_half_block_enable(struct drm_plane_state *pstate)
++{
++	if (pstate->rotation & (DRM_MODE_ROTATE_270 | DRM_MODE_ROTATE_90))
++		return false;
++	else
++		return true;
++}
++
+ static u32 vop2_afbc_transform_offset(struct drm_plane_state *pstate,
+ 				      bool afbc_half_block_en)
+ {
+@@ -1146,6 +1158,7 @@ static void vop2_plane_atomic_update(str
+ 	bool rotate_90 = pstate->rotation & DRM_MODE_ROTATE_90;
+ 	struct rockchip_gem_object *rk_obj;
+ 	unsigned long offset;
++	bool half_block_en;
+ 	bool afbc_en;
+ 	dma_addr_t yrgb_mst;
+ 	dma_addr_t uv_mst;
+@@ -1238,6 +1251,7 @@ static void vop2_plane_atomic_update(str
+ 	dsp_info = (dsp_h - 1) << 16 | ((dsp_w - 1) & 0xffff);
+ 
+ 	format = vop2_convert_format(fb->format->format);
++	half_block_en = vop2_half_block_enable(pstate);
+ 
+ 	drm_dbg(vop2->drm, "vp%d update %s[%dx%d->%dx%d@%dx%d] fmt[%p4cc_%s] addr[%pad]\n",
+ 		vp->id, win->data->name, actual_w, actual_h, dsp_w, dsp_h,
+@@ -1245,6 +1259,9 @@ static void vop2_plane_atomic_update(str
+ 		&fb->format->format,
+ 		afbc_en ? "AFBC" : "", &yrgb_mst);
+ 
++	if (vop2_cluster_window(win))
++		vop2_win_write(win, VOP2_WIN_AFBC_HALF_BLOCK_EN, half_block_en);
++
+ 	if (afbc_en) {
+ 		u32 stride;
+ 
+@@ -1285,13 +1302,7 @@ static void vop2_plane_atomic_update(str
+ 		vop2_win_write(win, VOP2_WIN_AFBC_UV_SWAP, uv_swap);
+ 		vop2_win_write(win, VOP2_WIN_AFBC_AUTO_GATING_EN, 0);
+ 		vop2_win_write(win, VOP2_WIN_AFBC_BLOCK_SPLIT_EN, 0);
+-		if (pstate->rotation & (DRM_MODE_ROTATE_270 | DRM_MODE_ROTATE_90)) {
+-			vop2_win_write(win, VOP2_WIN_AFBC_HALF_BLOCK_EN, 0);
+-			transform_offset = vop2_afbc_transform_offset(pstate, false);
+-		} else {
+-			vop2_win_write(win, VOP2_WIN_AFBC_HALF_BLOCK_EN, 1);
+-			transform_offset = vop2_afbc_transform_offset(pstate, true);
+-		}
++		transform_offset = vop2_afbc_transform_offset(pstate, half_block_en);
+ 		vop2_win_write(win, VOP2_WIN_AFBC_HDR_PTR, yrgb_mst);
+ 		vop2_win_write(win, VOP2_WIN_AFBC_PIC_SIZE, act_info);
+ 		vop2_win_write(win, VOP2_WIN_AFBC_TRANSFORM_OFFSET, transform_offset);
diff --git a/patches-6.6/033-22-v6.8-drm-rockchip-vop2-clear-afbc-en-and-transform-bit-for.patch b/patches-6.6/033-22-v6.8-drm-rockchip-vop2-clear-afbc-en-and-transform-bit-for.patch
new file mode 100644
index 0000000..f129011
--- /dev/null
+++ b/patches-6.6/033-22-v6.8-drm-rockchip-vop2-clear-afbc-en-and-transform-bit-for.patch
@@ -0,0 +1,36 @@
+From 20529a68307feed00dd3d431d3fff0572616b0f2 Mon Sep 17 00:00:00 2001
+From: Andy Yan <andy.yan@rock-chips.com>
+Date: Mon, 11 Dec 2023 19:57:41 +0800
+Subject: [PATCH] drm/rockchip: vop2: clear afbc en and transform bit for
+ cluster window at linear mode
+
+The enable bit and transform offset of cluster windows should be
+cleared when it work at linear mode, or we may have a iommu fault
+issue on rk3588 which cluster windows switch between afbc and linear
+mode.
+
+As the cluster windows of rk3568 only supports afbc format
+so is therefore not affected.
+
+Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
+Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231211115741.1784954-1-andyshrk@163.com
+---
+ drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+@@ -1314,6 +1314,11 @@ static void vop2_plane_atomic_update(str
+ 		vop2_win_write(win, VOP2_WIN_AFBC_ROTATE_270, rotate_270);
+ 		vop2_win_write(win, VOP2_WIN_AFBC_ROTATE_90, rotate_90);
+ 	} else {
++		if (vop2_cluster_window(win)) {
++			vop2_win_write(win, VOP2_WIN_AFBC_ENABLE, 0);
++			vop2_win_write(win, VOP2_WIN_AFBC_TRANSFORM_OFFSET, 0);
++		}
++
+ 		vop2_win_write(win, VOP2_WIN_YRGB_VIR, DIV_ROUND_UP(fb->pitches[0], 4));
+ 	}
+ 
diff --git a/patches-6.6/033-23-v6.8-drm-rockchip-vop2-Add-write-mask-for-VP-config-done.patch b/patches-6.6/033-23-v6.8-drm-rockchip-vop2-Add-write-mask-for-VP-config-done.patch
new file mode 100644
index 0000000..33ca068
--- /dev/null
+++ b/patches-6.6/033-23-v6.8-drm-rockchip-vop2-Add-write-mask-for-VP-config-done.patch
@@ -0,0 +1,50 @@
+From d1f8face0fc1298c88ef4a0479c3027b46ca2c77 Mon Sep 17 00:00:00 2001
+From: Andy Yan <andy.yan@rock-chips.com>
+Date: Mon, 11 Dec 2023 19:57:52 +0800
+Subject: [PATCH] drm/rockchip: vop2: Add write mask for VP config done
+
+The write mask bit is used to make sure when writing
+config done bit for one VP will not overwrite the other.
+
+Unfortunately, the write mask bit is missing on
+rk3566/8, that means when we write to these bits,
+it will not take any effect.
+
+We need this to make the vop work properly after
+rk3566/8 variants.
+
+Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231211115752.1785013-1-andyshrk@163.com
+---
+ drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 15 +++++++++++++--
+ 1 file changed, 13 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+@@ -268,12 +268,23 @@ static bool vop2_cluster_window(const st
+ 	return win->data->feature & WIN_FEATURE_CLUSTER;
+ }
+ 
++/*
++ * Note:
++ * The write mask function is documented but missing on rk3566/8, writes
++ * to these bits have no effect. For newer soc(rk3588 and following) the
++ * write mask is needed for register writes.
++ *
++ * GLB_CFG_DONE_EN has no write mask bit.
++ *
++ */
+ static void vop2_cfg_done(struct vop2_video_port *vp)
+ {
+ 	struct vop2 *vop2 = vp->vop2;
++	u32 val = RK3568_REG_CFG_DONE__GLB_CFG_DONE_EN;
+ 
+-	regmap_set_bits(vop2->map, RK3568_REG_CFG_DONE,
+-			BIT(vp->id) | RK3568_REG_CFG_DONE__GLB_CFG_DONE_EN);
++	val |= BIT(vp->id) | (BIT(vp->id) << 16);
++
++	regmap_set_bits(vop2->map, RK3568_REG_CFG_DONE, val);
+ }
+ 
+ static void vop2_win_disable(struct vop2_win *win)
diff --git a/patches-6.6/033-24-v6.8-drm-rockchip-vop2-Set-YUV-RGB-overlay-mode.patch b/patches-6.6/033-24-v6.8-drm-rockchip-vop2-Set-YUV-RGB-overlay-mode.patch
new file mode 100644
index 0000000..68c6310
--- /dev/null
+++ b/patches-6.6/033-24-v6.8-drm-rockchip-vop2-Set-YUV-RGB-overlay-mode.patch
@@ -0,0 +1,95 @@
+From dd49ee4614cfb0b1f627c4353b60cecfe998a374 Mon Sep 17 00:00:00 2001
+From: Andy Yan <andy.yan@rock-chips.com>
+Date: Mon, 11 Dec 2023 19:58:05 +0800
+Subject: [PATCH] drm/rockchip: vop2: Set YUV/RGB overlay mode
+
+Set overlay mode register according to the
+output mode is yuv or rgb.
+
+Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231211115805.1785073-1-andyshrk@163.com
+---
+ drivers/gpu/drm/rockchip/rockchip_drm_drv.h  |  1 +
+ drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 17 ++++++++++++++---
+ drivers/gpu/drm/rockchip/rockchip_drm_vop2.h |  1 +
+ 3 files changed, 16 insertions(+), 3 deletions(-)
+
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
+@@ -48,6 +48,7 @@ struct rockchip_crtc_state {
+ 	int output_bpc;
+ 	int output_flags;
+ 	bool enable_afbc;
++	bool yuv_overlay;
+ 	u32 bus_format;
+ 	u32 bus_flags;
+ 	int color_space;
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+@@ -1625,6 +1625,8 @@ static void vop2_crtc_atomic_enable(stru
+ 
+ 	vop2->enable_count++;
+ 
++	vcstate->yuv_overlay = is_yuv_output(vcstate->bus_format);
++
+ 	vop2_crtc_enable_irq(vp, VP_INT_POST_BUF_EMPTY);
+ 
+ 	polflags = 0;
+@@ -1652,7 +1654,7 @@ static void vop2_crtc_atomic_enable(stru
+ 	if (vop2_output_uv_swap(vcstate->bus_format, vcstate->output_mode))
+ 		dsp_ctrl |= RK3568_VP_DSP_CTRL__DSP_RB_SWAP;
+ 
+-	if (is_yuv_output(vcstate->bus_format))
++	if (vcstate->yuv_overlay)
+ 		dsp_ctrl |= RK3568_VP_DSP_CTRL__POST_DSP_OUT_R2Y;
+ 
+ 	vop2_dither_setup(crtc, &dsp_ctrl);
+@@ -1961,10 +1963,12 @@ static void vop2_setup_layer_mixer(struc
+ 	u16 hdisplay;
+ 	u32 bg_dly;
+ 	u32 pre_scan_dly;
++	u32 ovl_ctrl;
+ 	int i;
+ 	struct vop2_video_port *vp0 = &vop2->vps[0];
+ 	struct vop2_video_port *vp1 = &vop2->vps[1];
+ 	struct vop2_video_port *vp2 = &vop2->vps[2];
++	struct rockchip_crtc_state *vcstate = to_rockchip_crtc_state(vp->crtc.state);
+ 
+ 	adjusted_mode = &vp->crtc.state->adjusted_mode;
+ 	hsync_len = adjusted_mode->crtc_hsync_end - adjusted_mode->crtc_hsync_start;
+@@ -1977,7 +1981,15 @@ static void vop2_setup_layer_mixer(struc
+ 	pre_scan_dly = ((bg_dly + (hdisplay >> 1) - 1) << 16) | hsync_len;
+ 	vop2_vp_write(vp, RK3568_VP_PRE_SCAN_HTIMING, pre_scan_dly);
+ 
+-	vop2_writel(vop2, RK3568_OVL_CTRL, 0);
++	ovl_ctrl = vop2_readl(vop2, RK3568_OVL_CTRL);
++	ovl_ctrl |= RK3568_OVL_CTRL__LAYERSEL_REGDONE_IMD;
++	if (vcstate->yuv_overlay)
++		ovl_ctrl |= RK3568_OVL_CTRL__YUV_MODE(vp->id);
++	else
++		ovl_ctrl &= ~RK3568_OVL_CTRL__YUV_MODE(vp->id);
++
++	vop2_writel(vop2, RK3568_OVL_CTRL, ovl_ctrl);
++
+ 	port_sel = vop2_readl(vop2, RK3568_OVL_PORT_SEL);
+ 	port_sel &= RK3568_OVL_PORT_SEL__SEL_PORT;
+ 
+@@ -2051,7 +2063,6 @@ static void vop2_setup_layer_mixer(struc
+ 
+ 	vop2_writel(vop2, RK3568_OVL_LAYER_SEL, layer_sel);
+ 	vop2_writel(vop2, RK3568_OVL_PORT_SEL, port_sel);
+-	vop2_writel(vop2, RK3568_OVL_CTRL, RK3568_OVL_CTRL__LAYERSEL_REGDONE_IMD);
+ }
+ 
+ static void vop2_setup_dly_for_windows(struct vop2 *vop2)
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
+@@ -401,6 +401,7 @@ enum dst_factor_mode {
+ #define VOP2_COLOR_KEY_MASK				BIT(31)
+ 
+ #define RK3568_OVL_CTRL__LAYERSEL_REGDONE_IMD		BIT(28)
++#define RK3568_OVL_CTRL__YUV_MODE(vp)			BIT(vp)
+ 
+ #define RK3568_VP_BG_MIX_CTRL__BG_DLY			GENMASK(31, 24)
+ 
diff --git a/patches-6.6/033-25-v6.8-drm-rockchip-vop2-set-bg-dly-and-prescan-dly-at.patch b/patches-6.6/033-25-v6.8-drm-rockchip-vop2-set-bg-dly-and-prescan-dly-at.patch
new file mode 100644
index 0000000..b26bcf6
--- /dev/null
+++ b/patches-6.6/033-25-v6.8-drm-rockchip-vop2-set-bg-dly-and-prescan-dly-at.patch
@@ -0,0 +1,70 @@
+From 075a5b3969becb1ebc2f1d4fa1a1fe9163679273 Mon Sep 17 00:00:00 2001
+From: Andy Yan <andy.yan@rock-chips.com>
+Date: Mon, 11 Dec 2023 19:58:15 +0800
+Subject: [PATCH] drm/rockchip: vop2: set bg dly and prescan dly at
+ vop2_post_config
+
+We need to setup background delay cycle and prescan
+delay cycle when a mode is enable to avoid trigger
+POST_BUF_EMPTY irq on rk3588.
+
+Note: RK356x has no such requirement.
+
+Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231211115815.1785131-1-andyshrk@163.com
+---
+ drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 26 ++++++++------------
+ 1 file changed, 10 insertions(+), 16 deletions(-)
+
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+@@ -1462,8 +1462,18 @@ static void vop2_post_config(struct drm_
+ 	u32 top_margin = 100, bottom_margin = 100;
+ 	u16 hsize = hdisplay * (left_margin + right_margin) / 200;
+ 	u16 vsize = vdisplay * (top_margin + bottom_margin) / 200;
++	u16 hsync_len = mode->crtc_hsync_end - mode->crtc_hsync_start;
+ 	u16 hact_end, vact_end;
+ 	u32 val;
++	u32 bg_dly;
++	u32 pre_scan_dly;
++
++	bg_dly = vp->data->pre_scan_max_dly[3];
++	vop2_writel(vp->vop2, RK3568_VP_BG_MIX_CTRL(vp->id),
++		    FIELD_PREP(RK3568_VP_BG_MIX_CTRL__BG_DLY, bg_dly));
++
++	pre_scan_dly = ((bg_dly + (hdisplay >> 1) - 1) << 16) | hsync_len;
++	vop2_vp_write(vp, RK3568_VP_PRE_SCAN_HTIMING, pre_scan_dly);
+ 
+ 	vsize = rounddown(vsize, 2);
+ 	hsize = rounddown(hsize, 2);
+@@ -1958,11 +1968,6 @@ static void vop2_setup_layer_mixer(struc
+ 	u32 layer_sel = 0;
+ 	u32 port_sel;
+ 	unsigned int nlayer, ofs;
+-	struct drm_display_mode *adjusted_mode;
+-	u16 hsync_len;
+-	u16 hdisplay;
+-	u32 bg_dly;
+-	u32 pre_scan_dly;
+ 	u32 ovl_ctrl;
+ 	int i;
+ 	struct vop2_video_port *vp0 = &vop2->vps[0];
+@@ -1970,17 +1975,6 @@ static void vop2_setup_layer_mixer(struc
+ 	struct vop2_video_port *vp2 = &vop2->vps[2];
+ 	struct rockchip_crtc_state *vcstate = to_rockchip_crtc_state(vp->crtc.state);
+ 
+-	adjusted_mode = &vp->crtc.state->adjusted_mode;
+-	hsync_len = adjusted_mode->crtc_hsync_end - adjusted_mode->crtc_hsync_start;
+-	hdisplay = adjusted_mode->crtc_hdisplay;
+-
+-	bg_dly = vp->data->pre_scan_max_dly[3];
+-	vop2_writel(vop2, RK3568_VP_BG_MIX_CTRL(vp->id),
+-		    FIELD_PREP(RK3568_VP_BG_MIX_CTRL__BG_DLY, bg_dly));
+-
+-	pre_scan_dly = ((bg_dly + (hdisplay >> 1) - 1) << 16) | hsync_len;
+-	vop2_vp_write(vp, RK3568_VP_PRE_SCAN_HTIMING, pre_scan_dly);
+-
+ 	ovl_ctrl = vop2_readl(vop2, RK3568_OVL_CTRL);
+ 	ovl_ctrl |= RK3568_OVL_CTRL__LAYERSEL_REGDONE_IMD;
+ 	if (vcstate->yuv_overlay)
diff --git a/patches-6.6/033-26-v6.8-drm-rockchip-vop2-rename-grf-to-sys_grf.patch b/patches-6.6/033-26-v6.8-drm-rockchip-vop2-rename-grf-to-sys_grf.patch
new file mode 100644
index 0000000..156b470
--- /dev/null
+++ b/patches-6.6/033-26-v6.8-drm-rockchip-vop2-rename-grf-to-sys_grf.patch
@@ -0,0 +1,50 @@
+From c408af1afc4b74ea6df69e0313be97f1f83e981a Mon Sep 17 00:00:00 2001
+From: Andy Yan <andy.yan@rock-chips.com>
+Date: Mon, 11 Dec 2023 19:58:26 +0800
+Subject: [PATCH] drm/rockchip: vop2: rename grf to sys_grf
+
+The vop2 need to reference more grf(system grf, vop grf, vo0/1 grf,etc)
+in the upcoming rk3588.
+
+So we rename the current system grf to sys_grf.
+
+Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
+Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231211115826.1785190-1-andyshrk@163.com
+---
+ drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+@@ -190,7 +190,7 @@ struct vop2 {
+ 	void __iomem *regs;
+ 	struct regmap *map;
+ 
+-	struct regmap *grf;
++	struct regmap *sys_grf;
+ 
+ 	/* physical map length of vop2 register */
+ 	u32 len;
+@@ -1526,9 +1526,9 @@ static void rk3568_set_intf_mux(struct v
+ 		dip &= ~RK3568_DSP_IF_POL__RGB_LVDS_PIN_POL;
+ 		dip |= FIELD_PREP(RK3568_DSP_IF_POL__RGB_LVDS_PIN_POL, polflags);
+ 		if (polflags & POLFLAG_DCLK_INV)
+-			regmap_write(vop2->grf, RK3568_GRF_VO_CON1, BIT(3 + 16) | BIT(3));
++			regmap_write(vop2->sys_grf, RK3568_GRF_VO_CON1, BIT(3 + 16) | BIT(3));
+ 		else
+-			regmap_write(vop2->grf, RK3568_GRF_VO_CON1, BIT(3 + 16));
++			regmap_write(vop2->sys_grf, RK3568_GRF_VO_CON1, BIT(3 + 16));
+ 		break;
+ 	case ROCKCHIP_VOP2_EP_HDMI0:
+ 		die &= ~RK3568_SYS_DSP_INFACE_EN_HDMI_MUX;
+@@ -2769,7 +2769,7 @@ static int vop2_bind(struct device *dev,
+ 			return PTR_ERR(vop2->lut_regs);
+ 	}
+ 
+-	vop2->grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,grf");
++	vop2->sys_grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,grf");
+ 
+ 	vop2->hclk = devm_clk_get(vop2->dev, "hclk");
+ 	if (IS_ERR(vop2->hclk)) {
diff --git a/patches-6.6/033-27-v6.8-dt-bindings-rockchip-vop2-Add-more-endpoint-definition.patch b/patches-6.6/033-27-v6.8-dt-bindings-rockchip-vop2-Add-more-endpoint-definition.patch
new file mode 100644
index 0000000..9e92daf
--- /dev/null
+++ b/patches-6.6/033-27-v6.8-dt-bindings-rockchip-vop2-Add-more-endpoint-definition.patch
@@ -0,0 +1,28 @@
+From dc7226acacc6502291446f9e33cf96246ec49a30 Mon Sep 17 00:00:00 2001
+From: Andy Yan <andy.yan@rock-chips.com>
+Date: Mon, 11 Dec 2023 19:59:07 +0800
+Subject: [PATCH] dt-bindings: rockchip,vop2: Add more endpoint definition
+
+There are 2 HDMI, 2 DP, 2 eDP on rk3588, so add
+corresponding endpoint definition for it.
+
+Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
+Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231211115907.1785377-1-andyshrk@163.com
+---
+ include/dt-bindings/soc/rockchip,vop2.h | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/include/dt-bindings/soc/rockchip,vop2.h
++++ b/include/dt-bindings/soc/rockchip,vop2.h
+@@ -10,5 +10,9 @@
+ #define ROCKCHIP_VOP2_EP_LVDS0	5
+ #define ROCKCHIP_VOP2_EP_MIPI1	6
+ #define ROCKCHIP_VOP2_EP_LVDS1	7
++#define ROCKCHIP_VOP2_EP_HDMI1	8
++#define ROCKCHIP_VOP2_EP_EDP1	9
++#define ROCKCHIP_VOP2_EP_DP0	10
++#define ROCKCHIP_VOP2_EP_DP1	11
+ 
+ #endif /* __DT_BINDINGS_ROCKCHIP_VOP2_H */
diff --git a/patches-6.6/033-28-v6.8-drm-rockchip-vop2-Add-support-for-rk3588.patch b/patches-6.6/033-28-v6.8-drm-rockchip-vop2-Add-support-for-rk3588.patch
new file mode 100644
index 0000000..a2d8efc
--- /dev/null
+++ b/patches-6.6/033-28-v6.8-drm-rockchip-vop2-Add-support-for-rk3588.patch
@@ -0,0 +1,997 @@
+From 5a028e8f062fc862f051f8e62a0d5a1abac91955 Mon Sep 17 00:00:00 2001
+From: Andy Yan <andy.yan@rock-chips.com>
+Date: Mon, 11 Dec 2023 19:59:19 +0800
+Subject: [PATCH] drm/rockchip: vop2: Add support for rk3588
+
+VOP2 on rk3588:
+
+Four video ports:
+VP0 Max 4096x2160
+VP1 Max 4096x2160
+VP2 Max 4096x2160
+VP3 Max 2048x1080
+
+4 4K Cluster windows with AFBC/line RGB and AFBC-only YUV support
+4 4K Esmart windows with line RGB/YUV support
+
+Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
+Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231211115919.1785435-1-andyshrk@163.com
+---
+ drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 400 ++++++++++++++++++-
+ drivers/gpu/drm/rockchip/rockchip_drm_vop2.h |  81 ++++
+ drivers/gpu/drm/rockchip/rockchip_vop2_reg.c | 221 ++++++++++
+ 3 files changed, 696 insertions(+), 6 deletions(-)
+
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+@@ -191,6 +191,9 @@ struct vop2 {
+ 	struct regmap *map;
+ 
+ 	struct regmap *sys_grf;
++	struct regmap *vop_grf;
++	struct regmap *vo1_grf;
++	struct regmap *sys_pmu;
+ 
+ 	/* physical map length of vop2 register */
+ 	u32 len;
+@@ -209,6 +212,7 @@ struct vop2 {
+ 	unsigned int enable_count;
+ 	struct clk *hclk;
+ 	struct clk *aclk;
++	struct clk *pclk;
+ 
+ 	/* optional internal rgb encoder */
+ 	struct rockchip_rgb *rgb;
+@@ -217,6 +221,23 @@ struct vop2 {
+ 	struct vop2_win win[];
+ };
+ 
++#define vop2_output_if_is_hdmi(x)	((x) == ROCKCHIP_VOP2_EP_HDMI0 || \
++					 (x) == ROCKCHIP_VOP2_EP_HDMI1)
++
++#define vop2_output_if_is_dp(x)		((x) == ROCKCHIP_VOP2_EP_DP0 || \
++					 (x) == ROCKCHIP_VOP2_EP_DP1)
++
++#define vop2_output_if_is_edp(x)	((x) == ROCKCHIP_VOP2_EP_EDP0 || \
++					 (x) == ROCKCHIP_VOP2_EP_EDP1)
++
++#define vop2_output_if_is_mipi(x)	((x) == ROCKCHIP_VOP2_EP_MIPI0 || \
++					 (x) == ROCKCHIP_VOP2_EP_MIPI1)
++
++#define vop2_output_if_is_lvds(x)	((x) == ROCKCHIP_VOP2_EP_LVDS0 || \
++					 (x) == ROCKCHIP_VOP2_EP_LVDS1)
++
++#define vop2_output_if_is_dpi(x)	((x) == ROCKCHIP_VOP2_EP_RGB0)
++
+ static const struct regmap_config vop2_regmap_config;
+ 
+ static struct vop2_video_port *to_vop2_video_port(struct drm_crtc *crtc)
+@@ -475,6 +496,17 @@ static bool vop2_output_uv_swap(u32 bus_
+ 		return false;
+ }
+ 
++static bool vop2_output_rg_swap(struct vop2 *vop2, u32 bus_format)
++{
++	if (vop2->data->soc_id == 3588) {
++		if (bus_format == MEDIA_BUS_FMT_YUV8_1X24 ||
++		    bus_format == MEDIA_BUS_FMT_YUV10_1X30)
++			return true;
++	}
++
++	return false;
++}
++
+ static bool is_yuv_output(u32 bus_format)
+ {
+ 	switch (bus_format) {
+@@ -881,13 +913,32 @@ static int vop2_core_clks_prepare_enable
+ 		goto err;
+ 	}
+ 
++	ret = clk_prepare_enable(vop2->pclk);
++	if (ret < 0) {
++		drm_err(vop2->drm, "failed to enable pclk - %d\n", ret);
++		goto err1;
++	}
++
+ 	return 0;
++err1:
++	clk_disable_unprepare(vop2->aclk);
+ err:
+ 	clk_disable_unprepare(vop2->hclk);
+ 
+ 	return ret;
+ }
+ 
++static void rk3588_vop2_power_domain_enable_all(struct vop2 *vop2)
++{
++	u32 pd;
++
++	pd = vop2_readl(vop2, RK3588_SYS_PD_CTRL);
++	pd &= ~(VOP2_PD_CLUSTER0 | VOP2_PD_CLUSTER1 | VOP2_PD_CLUSTER2 |
++		VOP2_PD_CLUSTER3 | VOP2_PD_ESMART);
++
++	vop2_writel(vop2, RK3588_SYS_PD_CTRL, pd);
++}
++
+ static void vop2_enable(struct vop2 *vop2)
+ {
+ 	int ret;
+@@ -919,6 +970,9 @@ static void vop2_enable(struct vop2 *vop
+ 	if (vop2->data->soc_id == 3566)
+ 		vop2_writel(vop2, RK3568_OTP_WIN_EN, 1);
+ 
++	if (vop2->data->soc_id == 3588)
++		rk3588_vop2_power_domain_enable_all(vop2);
++
+ 	vop2_writel(vop2, RK3568_REG_CFG_DONE, RK3568_REG_CFG_DONE__GLB_CFG_DONE_EN);
+ 
+ 	/*
+@@ -944,6 +998,7 @@ static void vop2_disable(struct vop2 *vo
+ 
+ 	pm_runtime_put_sync(vop2->dev);
+ 
++	clk_disable_unprepare(vop2->pclk);
+ 	clk_disable_unprepare(vop2->aclk);
+ 	clk_disable_unprepare(vop2->hclk);
+ }
+@@ -1311,7 +1366,19 @@ static void vop2_plane_atomic_update(str
+ 			vop2_win_write(win, VOP2_WIN_AFBC_ENABLE, 1);
+ 		vop2_win_write(win, VOP2_WIN_AFBC_FORMAT, afbc_format);
+ 		vop2_win_write(win, VOP2_WIN_AFBC_UV_SWAP, uv_swap);
+-		vop2_win_write(win, VOP2_WIN_AFBC_AUTO_GATING_EN, 0);
++		/*
++		 * On rk3566/8, this bit is auto gating enable,
++		 * but this function is not work well so we need
++		 * to disable it for these two platform.
++		 * On rk3588, and the following new soc(rk3528/rk3576),
++		 * this bit is gating disable, we should write 1 to
++		 * disable gating when enable afbc.
++		 */
++		if (vop2->data->soc_id == 3566 || vop2->data->soc_id == 3568)
++			vop2_win_write(win, VOP2_WIN_AFBC_AUTO_GATING_EN, 0);
++		else
++			vop2_win_write(win, VOP2_WIN_AFBC_AUTO_GATING_EN, 1);
++
+ 		vop2_win_write(win, VOP2_WIN_AFBC_BLOCK_SPLIT_EN, 0);
+ 		transform_offset = vop2_afbc_transform_offset(pstate, half_block_en);
+ 		vop2_win_write(win, VOP2_WIN_AFBC_HDR_PTR, yrgb_mst);
+@@ -1509,10 +1576,10 @@ static void vop2_post_config(struct drm_
+ 	vop2_vp_write(vp, RK3568_VP_DSP_BG, 0);
+ }
+ 
+-static void rk3568_set_intf_mux(struct vop2_video_port *vp, int id,
+-				u32 polflags)
++static unsigned long rk3568_set_intf_mux(struct vop2_video_port *vp, int id, u32 polflags)
+ {
+ 	struct vop2 *vop2 = vp->vop2;
++	struct drm_crtc *crtc = &vp->crtc;
+ 	u32 die, dip;
+ 
+ 	die = vop2_readl(vop2, RK3568_DSP_IF_EN);
+@@ -1574,13 +1641,281 @@ static void rk3568_set_intf_mux(struct v
+ 		break;
+ 	default:
+ 		drm_err(vop2->drm, "Invalid interface id %d on vp%d\n", id, vp->id);
+-		return;
++		return 0;
++	}
++
++	dip |= RK3568_DSP_IF_POL__CFG_DONE_IMD;
++
++	vop2_writel(vop2, RK3568_DSP_IF_EN, die);
++	vop2_writel(vop2, RK3568_DSP_IF_POL, dip);
++
++	return crtc->state->adjusted_mode.crtc_clock  * 1000LL;
++}
++
++/*
++ * calc the dclk on rk3588
++ * the available div of dclk is 1, 2, 4
++ */
++static unsigned long rk3588_calc_dclk(unsigned long child_clk, unsigned long max_dclk)
++{
++	if (child_clk * 4 <= max_dclk)
++		return child_clk * 4;
++	else if (child_clk * 2 <= max_dclk)
++		return child_clk * 2;
++	else if (child_clk <= max_dclk)
++		return child_clk;
++	else
++		return 0;
++}
++
++/*
++ * 4 pixclk/cycle on rk3588
++ * RGB/eDP/HDMI: if_pixclk >= dclk_core
++ * DP: dp_pixclk = dclk_out <= dclk_core
++ * DSI: mipi_pixclk <= dclk_out <= dclk_core
++ */
++static unsigned long rk3588_calc_cru_cfg(struct vop2_video_port *vp, int id,
++					 int *dclk_core_div, int *dclk_out_div,
++					 int *if_pixclk_div, int *if_dclk_div)
++{
++	struct vop2 *vop2 = vp->vop2;
++	struct drm_crtc *crtc = &vp->crtc;
++	struct drm_display_mode *adjusted_mode = &crtc->state->adjusted_mode;
++	struct rockchip_crtc_state *vcstate = to_rockchip_crtc_state(crtc->state);
++	int output_mode = vcstate->output_mode;
++	unsigned long v_pixclk = adjusted_mode->crtc_clock * 1000LL; /* video timing pixclk */
++	unsigned long dclk_core_rate = v_pixclk >> 2;
++	unsigned long dclk_rate = v_pixclk;
++	unsigned long dclk_out_rate;
++	unsigned long if_dclk_rate;
++	unsigned long if_pixclk_rate;
++	int K = 1;
++
++	if (vop2_output_if_is_hdmi(id)) {
++		/*
++		 * K = 2: dclk_core = if_pixclk_rate > if_dclk_rate
++		 * K = 1: dclk_core = hdmie_edp_dclk > if_pixclk_rate
++		 */
++		if (output_mode == ROCKCHIP_OUT_MODE_YUV420) {
++			dclk_rate = dclk_rate >> 1;
++			K = 2;
++		}
++
++		if_pixclk_rate = (dclk_core_rate << 1) / K;
++		if_dclk_rate = dclk_core_rate / K;
++		/*
++		 * *if_pixclk_div = dclk_rate / if_pixclk_rate;
++		 * *if_dclk_div = dclk_rate / if_dclk_rate;
++		 */
++		 *if_pixclk_div = 2;
++		 *if_dclk_div = 4;
++	} else if (vop2_output_if_is_edp(id)) {
++		/*
++		 * edp_pixclk = edp_dclk > dclk_core
++		 */
++		if_pixclk_rate = v_pixclk / K;
++		dclk_rate = if_pixclk_rate * K;
++		/*
++		 * *if_pixclk_div = dclk_rate / if_pixclk_rate;
++		 * *if_dclk_div = *if_pixclk_div;
++		 */
++		*if_pixclk_div = K;
++		*if_dclk_div = K;
++	} else if (vop2_output_if_is_dp(id)) {
++		if (output_mode == ROCKCHIP_OUT_MODE_YUV420)
++			dclk_out_rate = v_pixclk >> 3;
++		else
++			dclk_out_rate = v_pixclk >> 2;
++
++		dclk_rate = rk3588_calc_dclk(dclk_out_rate, 600000);
++		if (!dclk_rate) {
++			drm_err(vop2->drm, "DP dclk_out_rate out of range, dclk_out_rate: %ld KHZ\n",
++				dclk_out_rate);
++			return 0;
++		}
++		*dclk_out_div = dclk_rate / dclk_out_rate;
++	} else if (vop2_output_if_is_mipi(id)) {
++		if_pixclk_rate = dclk_core_rate / K;
++		/*
++		 * dclk_core = dclk_out * K = if_pixclk * K = v_pixclk / 4
++		 */
++		dclk_out_rate = if_pixclk_rate;
++		/*
++		 * dclk_rate = N * dclk_core_rate N = (1,2,4 ),
++		 * we get a little factor here
++		 */
++		dclk_rate = rk3588_calc_dclk(dclk_out_rate, 600000);
++		if (!dclk_rate) {
++			drm_err(vop2->drm, "MIPI dclk out of range, dclk_out_rate: %ld KHZ\n",
++				dclk_out_rate);
++			return 0;
++		}
++		*dclk_out_div = dclk_rate / dclk_out_rate;
++		/*
++		 * mipi pixclk == dclk_out
++		 */
++		*if_pixclk_div = 1;
++	} else if (vop2_output_if_is_dpi(id)) {
++		dclk_rate = v_pixclk;
++	}
++
++	*dclk_core_div = dclk_rate / dclk_core_rate;
++	*if_pixclk_div = ilog2(*if_pixclk_div);
++	*if_dclk_div = ilog2(*if_dclk_div);
++	*dclk_core_div = ilog2(*dclk_core_div);
++	*dclk_out_div = ilog2(*dclk_out_div);
++
++	drm_dbg(vop2->drm, "dclk: %ld, pixclk_div: %d, dclk_div: %d\n",
++		dclk_rate, *if_pixclk_div, *if_dclk_div);
++
++	return dclk_rate;
++}
++
++/*
++ * MIPI port mux on rk3588:
++ * 0: Video Port2
++ * 1: Video Port3
++ * 3: Video Port 1(MIPI1 only)
++ */
++static u32 rk3588_get_mipi_port_mux(int vp_id)
++{
++	if (vp_id == 1)
++		return 3;
++	else if (vp_id == 3)
++		return 1;
++	else
++		return 0;
++}
++
++static u32 rk3588_get_hdmi_pol(u32 flags)
++{
++	u32 val;
++
++	val = (flags & DRM_MODE_FLAG_NHSYNC) ? BIT(HSYNC_POSITIVE) : 0;
++	val |= (flags & DRM_MODE_FLAG_NVSYNC) ? BIT(VSYNC_POSITIVE) : 0;
++
++	return val;
++}
++
++static unsigned long rk3588_set_intf_mux(struct vop2_video_port *vp, int id, u32 polflags)
++{
++	struct vop2 *vop2 = vp->vop2;
++	int dclk_core_div, dclk_out_div, if_pixclk_div, if_dclk_div;
++	unsigned long clock;
++	u32 die, dip, div, vp_clk_div, val;
++
++	clock = rk3588_calc_cru_cfg(vp, id, &dclk_core_div, &dclk_out_div,
++				    &if_pixclk_div, &if_dclk_div);
++	if (!clock)
++		return 0;
++
++	vp_clk_div = FIELD_PREP(RK3588_VP_CLK_CTRL__DCLK_CORE_DIV, dclk_core_div);
++	vp_clk_div |= FIELD_PREP(RK3588_VP_CLK_CTRL__DCLK_OUT_DIV, dclk_out_div);
++
++	die = vop2_readl(vop2, RK3568_DSP_IF_EN);
++	dip = vop2_readl(vop2, RK3568_DSP_IF_POL);
++	div = vop2_readl(vop2, RK3568_DSP_IF_CTRL);
++
++	switch (id) {
++	case ROCKCHIP_VOP2_EP_HDMI0:
++		div &= ~RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV;
++		div &= ~RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV;
++		div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV, if_dclk_div);
++		div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV, if_pixclk_div);
++		die &= ~RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX;
++		die |= RK3588_SYS_DSP_INFACE_EN_HDMI0 |
++			    FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX, vp->id);
++		val = rk3588_get_hdmi_pol(polflags);
++		regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 1, 1));
++		regmap_write(vop2->vo1_grf, RK3588_GRF_VO1_CON0, HIWORD_UPDATE(val, 6, 5));
++		break;
++	case ROCKCHIP_VOP2_EP_HDMI1:
++		div &= ~RK3588_DSP_IF_EDP_HDMI1_DCLK_DIV;
++		div &= ~RK3588_DSP_IF_EDP_HDMI1_PCLK_DIV;
++		div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI1_DCLK_DIV, if_dclk_div);
++		div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI1_PCLK_DIV, if_pixclk_div);
++		die &= ~RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX;
++		die |= RK3588_SYS_DSP_INFACE_EN_HDMI1 |
++			    FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX, vp->id);
++		val = rk3588_get_hdmi_pol(polflags);
++		regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 4, 4));
++		regmap_write(vop2->vo1_grf, RK3588_GRF_VO1_CON0, HIWORD_UPDATE(val, 8, 7));
++		break;
++	case ROCKCHIP_VOP2_EP_EDP0:
++		div &= ~RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV;
++		div &= ~RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV;
++		div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV, if_dclk_div);
++		div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV, if_pixclk_div);
++		die &= ~RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX;
++		die |= RK3588_SYS_DSP_INFACE_EN_EDP0 |
++			   FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX, vp->id);
++		regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 0, 0));
++		break;
++	case ROCKCHIP_VOP2_EP_EDP1:
++		div &= ~RK3588_DSP_IF_EDP_HDMI1_DCLK_DIV;
++		div &= ~RK3588_DSP_IF_EDP_HDMI1_PCLK_DIV;
++		div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV, if_dclk_div);
++		div |= FIELD_PREP(RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV, if_pixclk_div);
++		die &= ~RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX;
++		die |= RK3588_SYS_DSP_INFACE_EN_EDP1 |
++			   FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX, vp->id);
++		regmap_write(vop2->vop_grf, RK3588_GRF_VOP_CON2, HIWORD_UPDATE(1, 3, 3));
++		break;
++	case ROCKCHIP_VOP2_EP_MIPI0:
++		div &= ~RK3588_DSP_IF_MIPI0_PCLK_DIV;
++		div |= FIELD_PREP(RK3588_DSP_IF_MIPI0_PCLK_DIV, if_pixclk_div);
++		die &= ~RK3588_SYS_DSP_INFACE_EN_MIPI0_MUX;
++		val = rk3588_get_mipi_port_mux(vp->id);
++		die |= RK3588_SYS_DSP_INFACE_EN_MIPI0 |
++			   FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_MIPI0_MUX, !!val);
++		break;
++	case ROCKCHIP_VOP2_EP_MIPI1:
++		div &= ~RK3588_DSP_IF_MIPI1_PCLK_DIV;
++		div |= FIELD_PREP(RK3588_DSP_IF_MIPI1_PCLK_DIV, if_pixclk_div);
++		die &= ~RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX;
++		val = rk3588_get_mipi_port_mux(vp->id);
++		die |= RK3588_SYS_DSP_INFACE_EN_MIPI1 |
++			   FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX, val);
++		break;
++	case ROCKCHIP_VOP2_EP_DP0:
++		die &= ~RK3588_SYS_DSP_INFACE_EN_DP0_MUX;
++		die |= RK3588_SYS_DSP_INFACE_EN_DP0 |
++			   FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_DP0_MUX, vp->id);
++		dip &= ~RK3588_DSP_IF_POL__DP0_PIN_POL;
++		dip |= FIELD_PREP(RK3588_DSP_IF_POL__DP0_PIN_POL, polflags);
++		break;
++	case ROCKCHIP_VOP2_EP_DP1:
++		die &= ~RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX;
++		die |= RK3588_SYS_DSP_INFACE_EN_MIPI1 |
++			   FIELD_PREP(RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX, vp->id);
++		dip &= ~RK3588_DSP_IF_POL__DP1_PIN_POL;
++		dip |= FIELD_PREP(RK3588_DSP_IF_POL__DP1_PIN_POL, polflags);
++		break;
++	default:
++		drm_err(vop2->drm, "Invalid interface id %d on vp%d\n", id, vp->id);
++		return 0;
+ 	}
+ 
+ 	dip |= RK3568_DSP_IF_POL__CFG_DONE_IMD;
+ 
++	vop2_vp_write(vp, RK3588_VP_CLK_CTRL, vp_clk_div);
+ 	vop2_writel(vop2, RK3568_DSP_IF_EN, die);
++	vop2_writel(vop2, RK3568_DSP_IF_CTRL, div);
+ 	vop2_writel(vop2, RK3568_DSP_IF_POL, dip);
++
++	return clock;
++}
++
++static unsigned long vop2_set_intf_mux(struct vop2_video_port *vp, int ep_id, u32 polflags)
++{
++	struct vop2 *vop2 = vp->vop2;
++
++	if (vop2->data->soc_id == 3566 || vop2->data->soc_id == 3568)
++		return rk3568_set_intf_mux(vp, ep_id, polflags);
++	else if (vop2->data->soc_id == 3588)
++		return rk3588_set_intf_mux(vp, ep_id, polflags);
++	else
++		return 0;
+ }
+ 
+ static int us_to_vertical_line(struct drm_display_mode *mode, int us)
+@@ -1650,9 +1985,17 @@ static void vop2_crtc_atomic_enable(stru
+ 	drm_for_each_encoder_mask(encoder, crtc->dev, crtc_state->encoder_mask) {
+ 		struct rockchip_encoder *rkencoder = to_rockchip_encoder(encoder);
+ 
+-		rk3568_set_intf_mux(vp, rkencoder->crtc_endpoint_id, polflags);
++		/*
++		 * for drive a high resolution(4KP120, 8K), vop on rk3588/rk3576 need
++		 * process multi(1/2/4/8) pixels per cycle, so the dclk feed by the
++		 * system cru may be the 1/2 or 1/4 of mode->clock.
++		 */
++		clock = vop2_set_intf_mux(vp, rkencoder->crtc_endpoint_id, polflags);
+ 	}
+ 
++	if (!clock)
++		return;
++
+ 	if (vcstate->output_mode == ROCKCHIP_OUT_MODE_AAAA &&
+ 	    !(vp_data->feature & VOP_FEATURE_OUTPUT_10BIT))
+ 		out_mode = ROCKCHIP_OUT_MODE_P888;
+@@ -1663,6 +2006,8 @@ static void vop2_crtc_atomic_enable(stru
+ 
+ 	if (vop2_output_uv_swap(vcstate->bus_format, vcstate->output_mode))
+ 		dsp_ctrl |= RK3568_VP_DSP_CTRL__DSP_RB_SWAP;
++	if (vop2_output_rg_swap(vop2, vcstate->bus_format))
++		dsp_ctrl |= RK3568_VP_DSP_CTRL__DSP_RG_SWAP;
+ 
+ 	if (vcstate->yuv_overlay)
+ 		dsp_ctrl |= RK3568_VP_DSP_CTRL__POST_DSP_OUT_R2Y;
+@@ -2024,6 +2369,14 @@ static void vop2_setup_layer_mixer(struc
+ 			port_sel &= ~RK3568_OVL_PORT_SEL__CLUSTER1;
+ 			port_sel |= FIELD_PREP(RK3568_OVL_PORT_SEL__CLUSTER1, vp->id);
+ 			break;
++		case ROCKCHIP_VOP2_CLUSTER2:
++			port_sel &= ~RK3588_OVL_PORT_SEL__CLUSTER2;
++			port_sel |= FIELD_PREP(RK3588_OVL_PORT_SEL__CLUSTER2, vp->id);
++			break;
++		case ROCKCHIP_VOP2_CLUSTER3:
++			port_sel &= ~RK3588_OVL_PORT_SEL__CLUSTER3;
++			port_sel |= FIELD_PREP(RK3588_OVL_PORT_SEL__CLUSTER3, vp->id);
++			break;
+ 		case ROCKCHIP_VOP2_ESMART0:
+ 			port_sel &= ~RK3568_OVL_PORT_SEL__ESMART0;
+ 			port_sel |= FIELD_PREP(RK3568_OVL_PORT_SEL__ESMART0, vp->id);
+@@ -2032,6 +2385,14 @@ static void vop2_setup_layer_mixer(struc
+ 			port_sel &= ~RK3568_OVL_PORT_SEL__ESMART1;
+ 			port_sel |= FIELD_PREP(RK3568_OVL_PORT_SEL__ESMART1, vp->id);
+ 			break;
++		case ROCKCHIP_VOP2_ESMART2:
++			port_sel &= ~RK3588_OVL_PORT_SEL__ESMART2;
++			port_sel |= FIELD_PREP(RK3588_OVL_PORT_SEL__ESMART2, vp->id);
++			break;
++		case ROCKCHIP_VOP2_ESMART3:
++			port_sel &= ~RK3588_OVL_PORT_SEL__ESMART3;
++			port_sel |= FIELD_PREP(RK3588_OVL_PORT_SEL__ESMART3, vp->id);
++			break;
+ 		case ROCKCHIP_VOP2_SMART0:
+ 			port_sel &= ~RK3568_OVL_PORT_SEL__SMART0;
+ 			port_sel |= FIELD_PREP(RK3568_OVL_PORT_SEL__SMART0, vp->id);
+@@ -2768,8 +3129,29 @@ static int vop2_bind(struct device *dev,
+ 		if (IS_ERR(vop2->lut_regs))
+ 			return PTR_ERR(vop2->lut_regs);
+ 	}
++	if (vop2_data->feature & VOP2_FEATURE_HAS_SYS_GRF) {
++		vop2->sys_grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,grf");
++		if (IS_ERR(vop2->sys_grf))
++			return dev_err_probe(dev, PTR_ERR(vop2->sys_grf), "cannot get sys_grf");
++	}
++
++	if (vop2_data->feature & VOP2_FEATURE_HAS_VOP_GRF) {
++		vop2->vop_grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,vop-grf");
++		if (IS_ERR(vop2->vop_grf))
++			return dev_err_probe(dev, PTR_ERR(vop2->vop_grf), "cannot get vop_grf");
++	}
++
++	if (vop2_data->feature & VOP2_FEATURE_HAS_VO1_GRF) {
++		vop2->vo1_grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,vo1-grf");
++		if (IS_ERR(vop2->vo1_grf))
++			return dev_err_probe(dev, PTR_ERR(vop2->vo1_grf), "cannot get vo1_grf");
++	}
+ 
+-	vop2->sys_grf = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,grf");
++	if (vop2_data->feature & VOP2_FEATURE_HAS_SYS_PMU) {
++		vop2->sys_pmu = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,pmu");
++		if (IS_ERR(vop2->sys_pmu))
++			return dev_err_probe(dev, PTR_ERR(vop2->sys_pmu), "cannot get sys_pmu");
++	}
+ 
+ 	vop2->hclk = devm_clk_get(vop2->dev, "hclk");
+ 	if (IS_ERR(vop2->hclk)) {
+@@ -2783,6 +3165,12 @@ static int vop2_bind(struct device *dev,
+ 		return PTR_ERR(vop2->aclk);
+ 	}
+ 
++	vop2->pclk = devm_clk_get_optional(vop2->dev, "pclk_vop");
++	if (IS_ERR(vop2->pclk)) {
++		drm_err(vop2->drm, "failed to get pclk source\n");
++		return PTR_ERR(vop2->pclk);
++	}
++
+ 	vop2->irq = platform_get_irq(pdev, 0);
+ 	if (vop2->irq < 0) {
+ 		drm_err(vop2->drm, "cannot find irq for vop2\n");
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
+@@ -13,9 +13,16 @@
+ 
+ #define VOP_FEATURE_OUTPUT_10BIT        BIT(0)
+ 
++#define VOP2_FEATURE_HAS_SYS_GRF	BIT(0)
++#define VOP2_FEATURE_HAS_VO0_GRF	BIT(1)
++#define VOP2_FEATURE_HAS_VO1_GRF	BIT(2)
++#define VOP2_FEATURE_HAS_VOP_GRF	BIT(3)
++#define VOP2_FEATURE_HAS_SYS_PMU	BIT(4)
++
+ #define WIN_FEATURE_AFBDC		BIT(0)
+ #define WIN_FEATURE_CLUSTER		BIT(1)
+ 
++#define HIWORD_UPDATE(v, h, l)  ((GENMASK(h, l) << 16) | ((v) << (l)))
+ /*
+  *  the delay number of a window in different mode.
+  */
+@@ -38,6 +45,18 @@ enum vop2_scale_down_mode {
+ 	VOP2_SCALE_DOWN_AVG,
+ };
+ 
++/*
++ * vop2 internal power domain id,
++ * should be all none zero, 0 will be treat as invalid;
++ */
++#define VOP2_PD_CLUSTER0	BIT(0)
++#define VOP2_PD_CLUSTER1	BIT(1)
++#define VOP2_PD_CLUSTER2	BIT(2)
++#define VOP2_PD_CLUSTER3	BIT(3)
++#define VOP2_PD_DSC_8K		BIT(5)
++#define VOP2_PD_DSC_4K		BIT(6)
++#define VOP2_PD_ESMART		BIT(7)
++
+ enum vop2_win_regs {
+ 	VOP2_WIN_ENABLE,
+ 	VOP2_WIN_FORMAT,
+@@ -138,6 +157,7 @@ struct vop2_video_port_data {
+ 
+ struct vop2_data {
+ 	u8 nr_vps;
++	u64 feature;
+ 	const struct vop2_win_data *win;
+ 	const struct vop2_video_port_data *vp;
+ 	struct vop_rect max_input;
+@@ -192,6 +212,11 @@ enum dst_factor_mode {
+ };
+ 
+ #define RK3568_GRF_VO_CON1			0x0364
++
++#define RK3588_GRF_SOC_CON1			0x0304
++#define RK3588_GRF_VOP_CON2			0x08
++#define RK3588_GRF_VO1_CON0			0x00
++
+ /* System registers definition */
+ #define RK3568_REG_CFG_DONE			0x000
+ #define RK3568_VERSION_INFO			0x004
+@@ -200,6 +225,7 @@ enum dst_factor_mode {
+ #define RK3568_DSP_IF_EN			0x028
+ #define RK3568_DSP_IF_CTRL			0x02c
+ #define RK3568_DSP_IF_POL			0x030
++#define RK3588_SYS_PD_CTRL			0x034
+ #define RK3568_WB_CTRL				0x40
+ #define RK3568_WB_XSCAL_FACTOR			0x44
+ #define RK3568_WB_YRGB_MST			0x48
+@@ -220,9 +246,14 @@ enum dst_factor_mode {
+ #define RK3568_VP_INT_RAW_STATUS(vp)		(0xAC + (vp) * 0x10)
+ 
+ /* Video Port registers definition */
++#define RK3568_VP0_CTRL_BASE			0x0C00
++#define RK3568_VP1_CTRL_BASE			0x0D00
++#define RK3568_VP2_CTRL_BASE			0x0E00
++#define RK3588_VP3_CTRL_BASE			0x0F00
+ #define RK3568_VP_DSP_CTRL			0x00
+ #define RK3568_VP_MIPI_CTRL			0x04
+ #define RK3568_VP_COLOR_BAR_CTRL		0x08
++#define RK3588_VP_CLK_CTRL			0x0C
+ #define RK3568_VP_3D_LUT_CTRL			0x10
+ #define RK3568_VP_3D_LUT_MST			0x20
+ #define RK3568_VP_DSP_BG			0x2C
+@@ -264,6 +295,17 @@ enum dst_factor_mode {
+ #define RK3568_SMART_DLY_NUM			0x6F8
+ 
+ /* Cluster register definition, offset relative to window base */
++#define RK3568_CLUSTER0_CTRL_BASE		0x1000
++#define RK3568_CLUSTER1_CTRL_BASE		0x1200
++#define RK3588_CLUSTER2_CTRL_BASE		0x1400
++#define RK3588_CLUSTER3_CTRL_BASE		0x1600
++#define RK3568_ESMART0_CTRL_BASE		0x1800
++#define RK3568_ESMART1_CTRL_BASE		0x1A00
++#define RK3568_SMART0_CTRL_BASE			0x1C00
++#define RK3568_SMART1_CTRL_BASE			0x1E00
++#define RK3588_ESMART2_CTRL_BASE		0x1C00
++#define RK3588_ESMART3_CTRL_BASE		0x1E00
++
+ #define RK3568_CLUSTER_WIN_CTRL0		0x00
+ #define RK3568_CLUSTER_WIN_CTRL1		0x04
+ #define RK3568_CLUSTER_WIN_YRGB_MST		0x10
+@@ -357,13 +399,18 @@ enum dst_factor_mode {
+ #define RK3568_VP_DSP_CTRL__DITHER_DOWN_EN		BIT(17)
+ #define RK3568_VP_DSP_CTRL__PRE_DITHER_DOWN_EN		BIT(16)
+ #define RK3568_VP_DSP_CTRL__POST_DSP_OUT_R2Y		BIT(15)
++#define RK3568_VP_DSP_CTRL__DSP_RG_SWAP			BIT(10)
+ #define RK3568_VP_DSP_CTRL__DSP_RB_SWAP			BIT(9)
++#define RK3568_VP_DSP_CTRL__DSP_BG_SWAP			BIT(8)
+ #define RK3568_VP_DSP_CTRL__DSP_INTERLACE		BIT(7)
+ #define RK3568_VP_DSP_CTRL__DSP_FILED_POL		BIT(6)
+ #define RK3568_VP_DSP_CTRL__P2I_EN			BIT(5)
+ #define RK3568_VP_DSP_CTRL__CORE_DCLK_DIV		BIT(4)
+ #define RK3568_VP_DSP_CTRL__OUT_MODE			GENMASK(3, 0)
+ 
++#define RK3588_VP_CLK_CTRL__DCLK_OUT_DIV		GENMASK(3, 2)
++#define RK3588_VP_CLK_CTRL__DCLK_CORE_DIV		GENMASK(1, 0)
++
+ #define RK3568_VP_POST_SCL_CTRL__VSCALEDOWN		BIT(1)
+ #define RK3568_VP_POST_SCL_CTRL__HSCALEDOWN		BIT(0)
+ 
+@@ -382,11 +429,37 @@ enum dst_factor_mode {
+ #define RK3568_SYS_DSP_INFACE_EN_HDMI			BIT(1)
+ #define RK3568_SYS_DSP_INFACE_EN_RGB			BIT(0)
+ 
++#define RK3588_SYS_DSP_INFACE_EN_MIPI1_MUX		GENMASK(22, 21)
++#define RK3588_SYS_DSP_INFACE_EN_MIPI0_MUX		GENMASK(20, 20)
++#define RK3588_SYS_DSP_INFACE_EN_EDP_HDMI1_MUX		GENMASK(19, 18)
++#define RK3588_SYS_DSP_INFACE_EN_EDP_HDMI0_MUX		GENMASK(17, 16)
++#define RK3588_SYS_DSP_INFACE_EN_DP1_MUX		GENMASK(15, 14)
++#define RK3588_SYS_DSP_INFACE_EN_DP0_MUX		GENMASK(13, 12)
++#define RK3588_SYS_DSP_INFACE_EN_DPI			GENMASK(9, 8)
++#define RK3588_SYS_DSP_INFACE_EN_MIPI1			BIT(7)
++#define RK3588_SYS_DSP_INFACE_EN_MIPI0			BIT(6)
++#define RK3588_SYS_DSP_INFACE_EN_HDMI1			BIT(5)
++#define RK3588_SYS_DSP_INFACE_EN_EDP1			BIT(4)
++#define RK3588_SYS_DSP_INFACE_EN_HDMI0			BIT(3)
++#define RK3588_SYS_DSP_INFACE_EN_EDP0			BIT(2)
++#define RK3588_SYS_DSP_INFACE_EN_DP1			BIT(1)
++#define RK3588_SYS_DSP_INFACE_EN_DP0			BIT(0)
++
++#define RK3588_DSP_IF_MIPI1_PCLK_DIV			GENMASK(27, 26)
++#define RK3588_DSP_IF_MIPI0_PCLK_DIV			GENMASK(25, 24)
++#define RK3588_DSP_IF_EDP_HDMI1_PCLK_DIV		GENMASK(22, 22)
++#define RK3588_DSP_IF_EDP_HDMI1_DCLK_DIV		GENMASK(21, 20)
++#define RK3588_DSP_IF_EDP_HDMI0_PCLK_DIV		GENMASK(18, 18)
++#define RK3588_DSP_IF_EDP_HDMI0_DCLK_DIV		GENMASK(17, 16)
++
+ #define RK3568_DSP_IF_POL__MIPI_PIN_POL			GENMASK(19, 16)
+ #define RK3568_DSP_IF_POL__EDP_PIN_POL			GENMASK(15, 12)
+ #define RK3568_DSP_IF_POL__HDMI_PIN_POL			GENMASK(7, 4)
+ #define RK3568_DSP_IF_POL__RGB_LVDS_PIN_POL		GENMASK(3, 0)
+ 
++#define RK3588_DSP_IF_POL__DP1_PIN_POL			GENMASK(14, 12)
++#define RK3588_DSP_IF_POL__DP0_PIN_POL			GENMASK(10, 8)
++
+ #define RK3568_VP0_MIPI_CTRL__DCLK_DIV2_PHASE_LOCK	BIT(5)
+ #define RK3568_VP0_MIPI_CTRL__DCLK_DIV2			BIT(4)
+ 
+@@ -408,8 +481,12 @@ enum dst_factor_mode {
+ #define RK3568_OVL_PORT_SEL__SEL_PORT			GENMASK(31, 16)
+ #define RK3568_OVL_PORT_SEL__SMART1			GENMASK(31, 30)
+ #define RK3568_OVL_PORT_SEL__SMART0			GENMASK(29, 28)
++#define RK3588_OVL_PORT_SEL__ESMART3			GENMASK(31, 30)
++#define RK3588_OVL_PORT_SEL__ESMART2			GENMASK(29, 28)
+ #define RK3568_OVL_PORT_SEL__ESMART1			GENMASK(27, 26)
+ #define RK3568_OVL_PORT_SEL__ESMART0			GENMASK(25, 24)
++#define RK3588_OVL_PORT_SEL__CLUSTER3			GENMASK(23, 22)
++#define RK3588_OVL_PORT_SEL__CLUSTER2			GENMASK(21, 20)
+ #define RK3568_OVL_PORT_SEL__CLUSTER1			GENMASK(19, 18)
+ #define RK3568_OVL_PORT_SEL__CLUSTER0			GENMASK(17, 16)
+ #define RK3568_OVL_PORT_SET__PORT2_MUX			GENMASK(11, 8)
+@@ -422,6 +499,10 @@ enum dst_factor_mode {
+ #define RK3568_CLUSTER_DLY_NUM__CLUSTER0_1		GENMASK(15, 8)
+ #define RK3568_CLUSTER_DLY_NUM__CLUSTER0_0		GENMASK(7, 0)
+ 
++#define RK3568_CLUSTER_WIN_CTRL0__WIN0_EN		BIT(0)
++
++#define RK3568_SMART_REGION0_CTRL__WIN0_EN		BIT(0)
++
+ #define RK3568_SMART_DLY_NUM__SMART1			GENMASK(31, 24)
+ #define RK3568_SMART_DLY_NUM__SMART0			GENMASK(23, 16)
+ #define RK3568_SMART_DLY_NUM__ESMART1			GENMASK(15, 8)
+--- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
++++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
+@@ -34,6 +34,30 @@ static const uint32_t formats_cluster[]
+ 	DRM_FORMAT_Y210, /* yuv422_10bit non-Linear mode only */
+ };
+ 
++static const uint32_t formats_esmart[] = {
++	DRM_FORMAT_XRGB8888,
++	DRM_FORMAT_ARGB8888,
++	DRM_FORMAT_XBGR8888,
++	DRM_FORMAT_ABGR8888,
++	DRM_FORMAT_RGB888,
++	DRM_FORMAT_BGR888,
++	DRM_FORMAT_RGB565,
++	DRM_FORMAT_BGR565,
++	DRM_FORMAT_NV12, /* yuv420_8bit linear mode, 2 plane */
++	DRM_FORMAT_NV21, /* yvu420_8bit linear mode, 2 plane */
++	DRM_FORMAT_NV16, /* yuv422_8bit linear mode, 2 plane */
++	DRM_FORMAT_NV61, /* yvu422_8bit linear mode, 2 plane */
++	DRM_FORMAT_NV20, /* yuv422_10bit linear mode, 2 plane, no padding */
++	DRM_FORMAT_NV24, /* yuv444_8bit linear mode, 2 plane */
++	DRM_FORMAT_NV42, /* yvu444_8bit linear mode, 2 plane */
++	DRM_FORMAT_NV30, /* yuv444_10bit linear mode, 2 plane, no padding */
++	DRM_FORMAT_NV15, /* yuv420_10bit linear mode, 2 plane, no padding */
++	DRM_FORMAT_YVYU, /* yuv422_8bit[YVYU] linear mode */
++	DRM_FORMAT_VYUY, /* yuv422_8bit[VYUY] linear mode */
++	DRM_FORMAT_YUYV, /* yuv422_8bit[YUYV] linear mode */
++	DRM_FORMAT_UYVY, /* yuv422_8bit[UYVY] linear mode */
++};
++
+ static const uint32_t formats_rk356x_esmart[] = {
+ 	DRM_FORMAT_XRGB8888,
+ 	DRM_FORMAT_ARGB8888,
+@@ -236,7 +260,188 @@ static const struct vop2_win_data rk3568
+ 	},
+ };
+ 
++static const struct vop2_video_port_data rk3588_vop_video_ports[] = {
++	{
++		.id = 0,
++		.feature = VOP_FEATURE_OUTPUT_10BIT,
++		.gamma_lut_len = 1024,
++		.cubic_lut_len = 9 * 9 * 9, /* 9x9x9 */
++		.max_output = { 4096, 2304 },
++		/* hdr2sdr sdr2hdr hdr2hdr sdr2sdr */
++		.pre_scan_max_dly = { 76, 65, 65, 54 },
++		.offset = 0xc00,
++	}, {
++		.id = 1,
++		.feature = VOP_FEATURE_OUTPUT_10BIT,
++		.gamma_lut_len = 1024,
++		.cubic_lut_len = 729, /* 9x9x9 */
++		.max_output = { 4096, 2304 },
++		.pre_scan_max_dly = { 76, 65, 65, 54 },
++		.offset = 0xd00,
++	}, {
++		.id = 2,
++		.feature = VOP_FEATURE_OUTPUT_10BIT,
++		.gamma_lut_len = 1024,
++		.cubic_lut_len = 17 * 17 * 17, /* 17x17x17 */
++		.max_output = { 4096, 2304 },
++		.pre_scan_max_dly = { 52, 52, 52, 52 },
++		.offset = 0xe00,
++	}, {
++		.id = 3,
++		.gamma_lut_len = 1024,
++		.max_output = { 2048, 1536 },
++		.pre_scan_max_dly = { 52, 52, 52, 52 },
++		.offset = 0xf00,
++	},
++};
++
++/*
++ * rk3588 vop with 4 cluster, 4 esmart win.
++ * Every cluster can work as 4K win or split into two win.
++ * All win in cluster support AFBCD.
++ *
++ * Every esmart win and smart win support 4 Multi-region.
++ *
++ * Scale filter mode:
++ *
++ * * Cluster:  bicubic for horizontal scale up, others use bilinear
++ * * ESmart:
++ *    * nearest-neighbor/bilinear/bicubic for scale up
++ *    * nearest-neighbor/bilinear/average for scale down
++ *
++ * AXI Read ID assignment:
++ * Two AXI bus:
++ * AXI0 is a read/write bus with a higher performance.
++ * AXI1 is a read only bus.
++ *
++ * Every window on a AXI bus must assigned two unique
++ * read id(yrgb_id/uv_id, valid id are 0x1~0xe).
++ *
++ * AXI0:
++ * Cluster0/1, Esmart0/1, WriteBack
++ *
++ * AXI 1:
++ * Cluster2/3, Esmart2/3
++ *
++ */
++static const struct vop2_win_data rk3588_vop_win_data[] = {
++	{
++		.name = "Cluster0-win0",
++		.phys_id = ROCKCHIP_VOP2_CLUSTER0,
++		.base = 0x1000,
++		.formats = formats_cluster,
++		.nformats = ARRAY_SIZE(formats_cluster),
++		.format_modifiers = format_modifiers_afbc,
++		.layer_sel_id = 0,
++		.supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 |
++				       DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y,
++		.max_upscale_factor = 4,
++		.max_downscale_factor = 4,
++		.dly = { 4, 26, 29 },
++		.type = DRM_PLANE_TYPE_PRIMARY,
++		.feature = WIN_FEATURE_AFBDC | WIN_FEATURE_CLUSTER,
++	}, {
++		.name = "Cluster1-win0",
++		.phys_id = ROCKCHIP_VOP2_CLUSTER1,
++		.base = 0x1200,
++		.formats = formats_cluster,
++		.nformats = ARRAY_SIZE(formats_cluster),
++		.format_modifiers = format_modifiers_afbc,
++		.layer_sel_id = 1,
++		.supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 |
++				       DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y,
++		.type = DRM_PLANE_TYPE_PRIMARY,
++		.max_upscale_factor = 4,
++		.max_downscale_factor = 4,
++		.dly = { 4, 26, 29 },
++		.feature = WIN_FEATURE_AFBDC | WIN_FEATURE_CLUSTER,
++	}, {
++		.name = "Cluster2-win0",
++		.phys_id = ROCKCHIP_VOP2_CLUSTER2,
++		.base = 0x1400,
++		.formats = formats_cluster,
++		.nformats = ARRAY_SIZE(formats_cluster),
++		.format_modifiers = format_modifiers_afbc,
++		.layer_sel_id = 4,
++		.supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 |
++				       DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y,
++		.type = DRM_PLANE_TYPE_PRIMARY,
++		.max_upscale_factor = 4,
++		.max_downscale_factor = 4,
++		.dly = { 4, 26, 29 },
++		.feature = WIN_FEATURE_AFBDC | WIN_FEATURE_CLUSTER,
++	}, {
++		.name = "Cluster3-win0",
++		.phys_id = ROCKCHIP_VOP2_CLUSTER3,
++		.base = 0x1600,
++		.formats = formats_cluster,
++		.nformats = ARRAY_SIZE(formats_cluster),
++		.format_modifiers = format_modifiers_afbc,
++		.layer_sel_id = 5,
++		.supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 |
++				       DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y,
++		.type = DRM_PLANE_TYPE_PRIMARY,
++		.max_upscale_factor = 4,
++		.max_downscale_factor = 4,
++		.dly = { 4, 26, 29 },
++		.feature = WIN_FEATURE_AFBDC | WIN_FEATURE_CLUSTER,
++	}, {
++		.name = "Esmart0-win0",
++		.phys_id = ROCKCHIP_VOP2_ESMART0,
++		.formats = formats_esmart,
++		.nformats = ARRAY_SIZE(formats_esmart),
++		.format_modifiers = format_modifiers,
++		.base = 0x1800,
++		.layer_sel_id = 2,
++		.supported_rotations = DRM_MODE_REFLECT_Y,
++		.type = DRM_PLANE_TYPE_OVERLAY,
++		.max_upscale_factor = 8,
++		.max_downscale_factor = 8,
++		.dly = { 23, 45, 48 },
++	}, {
++		.name = "Esmart1-win0",
++		.phys_id = ROCKCHIP_VOP2_ESMART1,
++		.formats = formats_esmart,
++		.nformats = ARRAY_SIZE(formats_esmart),
++		.format_modifiers = format_modifiers,
++		.base = 0x1a00,
++		.layer_sel_id = 3,
++		.supported_rotations = DRM_MODE_REFLECT_Y,
++		.type = DRM_PLANE_TYPE_OVERLAY,
++		.max_upscale_factor = 8,
++		.max_downscale_factor = 8,
++		.dly = { 23, 45, 48 },
++	}, {
++		.name = "Esmart2-win0",
++		.phys_id = ROCKCHIP_VOP2_ESMART2,
++		.base = 0x1c00,
++		.formats = formats_esmart,
++		.nformats = ARRAY_SIZE(formats_esmart),
++		.format_modifiers = format_modifiers,
++		.layer_sel_id = 6,
++		.supported_rotations = DRM_MODE_REFLECT_Y,
++		.type = DRM_PLANE_TYPE_OVERLAY,
++		.max_upscale_factor = 8,
++		.max_downscale_factor = 8,
++		.dly = { 23, 45, 48 },
++	}, {
++		.name = "Esmart3-win0",
++		.phys_id = ROCKCHIP_VOP2_ESMART3,
++		.formats = formats_esmart,
++		.nformats = ARRAY_SIZE(formats_esmart),
++		.format_modifiers = format_modifiers,
++		.base = 0x1e00,
++		.layer_sel_id = 7,
++		.supported_rotations = DRM_MODE_REFLECT_Y,
++		.type = DRM_PLANE_TYPE_OVERLAY,
++		.max_upscale_factor = 8,
++		.max_downscale_factor = 8,
++		.dly = { 23, 45, 48 },
++	},
++};
++
+ static const struct vop2_data rk3566_vop = {
++	.feature = VOP2_FEATURE_HAS_SYS_GRF,
+ 	.nr_vps = 3,
+ 	.max_input = { 4096, 2304 },
+ 	.max_output = { 4096, 2304 },
+@@ -247,6 +452,7 @@ static const struct vop2_data rk3566_vop
+ };
+ 
+ static const struct vop2_data rk3568_vop = {
++	.feature = VOP2_FEATURE_HAS_SYS_GRF,
+ 	.nr_vps = 3,
+ 	.max_input = { 4096, 2304 },
+ 	.max_output = { 4096, 2304 },
+@@ -256,6 +462,18 @@ static const struct vop2_data rk3568_vop
+ 	.soc_id = 3568,
+ };
+ 
++static const struct vop2_data rk3588_vop = {
++	.feature = VOP2_FEATURE_HAS_SYS_GRF | VOP2_FEATURE_HAS_VO1_GRF |
++		   VOP2_FEATURE_HAS_VOP_GRF | VOP2_FEATURE_HAS_SYS_PMU,
++	.nr_vps = 4,
++	.max_input = { 4096, 4320 },
++	.max_output = { 4096, 4320 },
++	.vp = rk3588_vop_video_ports,
++	.win = rk3588_vop_win_data,
++	.win_size = ARRAY_SIZE(rk3588_vop_win_data),
++	.soc_id = 3588,
++};
++
+ static const struct of_device_id vop2_dt_match[] = {
+ 	{
+ 		.compatible = "rockchip,rk3566-vop",
+@@ -264,6 +482,9 @@ static const struct of_device_id vop2_dt
+ 		.compatible = "rockchip,rk3568-vop",
+ 		.data = &rk3568_vop,
+ 	}, {
++		.compatible = "rockchip,rk3588-vop",
++		.data = &rk3588_vop
++	}, {
+ 	},
+ };
+ MODULE_DEVICE_TABLE(of, vop2_dt_match);
diff --git a/patches-6.6/033-29-v6.8-drm-rockchip-vop2-rename-VOP_FEATURE_OUTPUT_10BIT-to.patch b/patches-6.6/033-29-v6.8-drm-rockchip-vop2-rename-VOP_FEATURE_OUTPUT_10BIT-to.patch
new file mode 100644
index 0000000..1db6bda
--- /dev/null
+++ b/patches-6.6/033-29-v6.8-drm-rockchip-vop2-rename-VOP_FEATURE_OUTPUT_10BIT-to.patch
@@ -0,0 +1,80 @@
+From 9d7fe7704d534c2d043aff2987f10671a8b4373d Mon Sep 17 00:00:00 2001
+From: Andy Yan <andy.yan@rock-chips.com>
+Date: Mon, 11 Dec 2023 19:59:31 +0800
+Subject: [PATCH] drm/rockchip: vop2: rename VOP_FEATURE_OUTPUT_10BIT to
+ VOP2_VP_FEATURE_OUTPUT_10BIT
+
+VOP2 has multiple independent video ports with different
+feature, so rename VOP_FEATURE_OUTPUT_10BIT to
+VOP2_VP_FEATURE_OUTPUT_10BIT for more clearly meaning.
+
+Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
+Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231211115931.1785495-1-andyshrk@163.com
+---
+ drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 2 +-
+ drivers/gpu/drm/rockchip/rockchip_drm_vop2.h | 2 +-
+ drivers/gpu/drm/rockchip/rockchip_vop2_reg.c | 8 ++++----
+ 3 files changed, 6 insertions(+), 6 deletions(-)
+
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+@@ -1997,7 +1997,7 @@ static void vop2_crtc_atomic_enable(stru
+ 		return;
+ 
+ 	if (vcstate->output_mode == ROCKCHIP_OUT_MODE_AAAA &&
+-	    !(vp_data->feature & VOP_FEATURE_OUTPUT_10BIT))
++	    !(vp_data->feature & VOP2_VP_FEATURE_OUTPUT_10BIT))
+ 		out_mode = ROCKCHIP_OUT_MODE_P888;
+ 	else
+ 		out_mode = vcstate->output_mode;
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h
+@@ -11,7 +11,7 @@
+ #include <drm/drm_modes.h>
+ #include "rockchip_drm_vop.h"
+ 
+-#define VOP_FEATURE_OUTPUT_10BIT        BIT(0)
++#define VOP2_VP_FEATURE_OUTPUT_10BIT        BIT(0)
+ 
+ #define VOP2_FEATURE_HAS_SYS_GRF	BIT(0)
+ #define VOP2_FEATURE_HAS_VO0_GRF	BIT(1)
+--- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
++++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c
+@@ -136,7 +136,7 @@ static const uint64_t format_modifiers_a
+ static const struct vop2_video_port_data rk3568_vop_video_ports[] = {
+ 	{
+ 		.id = 0,
+-		.feature = VOP_FEATURE_OUTPUT_10BIT,
++		.feature = VOP2_VP_FEATURE_OUTPUT_10BIT,
+ 		.gamma_lut_len = 1024,
+ 		.cubic_lut_len = 9 * 9 * 9,
+ 		.max_output = { 4096, 2304 },
+@@ -263,7 +263,7 @@ static const struct vop2_win_data rk3568
+ static const struct vop2_video_port_data rk3588_vop_video_ports[] = {
+ 	{
+ 		.id = 0,
+-		.feature = VOP_FEATURE_OUTPUT_10BIT,
++		.feature = VOP2_VP_FEATURE_OUTPUT_10BIT,
+ 		.gamma_lut_len = 1024,
+ 		.cubic_lut_len = 9 * 9 * 9, /* 9x9x9 */
+ 		.max_output = { 4096, 2304 },
+@@ -272,7 +272,7 @@ static const struct vop2_video_port_data
+ 		.offset = 0xc00,
+ 	}, {
+ 		.id = 1,
+-		.feature = VOP_FEATURE_OUTPUT_10BIT,
++		.feature = VOP2_VP_FEATURE_OUTPUT_10BIT,
+ 		.gamma_lut_len = 1024,
+ 		.cubic_lut_len = 729, /* 9x9x9 */
+ 		.max_output = { 4096, 2304 },
+@@ -280,7 +280,7 @@ static const struct vop2_video_port_data
+ 		.offset = 0xd00,
+ 	}, {
+ 		.id = 2,
+-		.feature = VOP_FEATURE_OUTPUT_10BIT,
++		.feature = VOP2_VP_FEATURE_OUTPUT_10BIT,
+ 		.gamma_lut_len = 1024,
+ 		.cubic_lut_len = 17 * 17 * 17, /* 17x17x17 */
+ 		.max_output = { 4096, 2304 },
diff --git a/patches-6.6/033-30-v6.8-drm-rockchip-vop2-Avoid-use-regmap_reinit_cache-at-runtim.patch b/patches-6.6/033-30-v6.8-drm-rockchip-vop2-Avoid-use-regmap_reinit_cache-at-runtim.patch
new file mode 100644
index 0000000..9e691b0
--- /dev/null
+++ b/patches-6.6/033-30-v6.8-drm-rockchip-vop2-Avoid-use-regmap_reinit_cache-at-runtim.patch
@@ -0,0 +1,59 @@
+From 3ee348eb36f14e9303a7e9757efb91b0bbf3f7a9 Mon Sep 17 00:00:00 2001
+From: Andy Yan <andy.yan@rock-chips.com>
+Date: Sun, 17 Dec 2023 16:44:15 +0800
+Subject: [PATCH] drm/rockchip: vop2: Avoid use regmap_reinit_cache at runtime
+
+Marek Report a possible irq lock inversion dependency warning when
+commit 81a06f1d02e5 ("Revert "drm/rockchip: vop2: Use regcache_sync()
+to fix suspend/resume"") lands linux-next.
+
+I can reproduce this warning with:
+CONFIG_PROVE_LOCKING=y
+CONFIG_DEBUG_LOCKDEP=y
+
+It seems than when use regmap_reinit_cache at runtime whith Mark's
+commit 3d59c22bbb8d ("drm/rockchip: vop2: Convert to use maple tree
+register cache"), it will trigger a possible irq lock inversion dependency
+warning.
+
+One solution is switch back to REGCACHE_RBTREE, but it seems that
+REGCACHE_MAPLE is the future, so I avoid using regmap_reinit_cache,
+and drop all the regcache when vop is disabled, then we get a fresh
+start at next enbable time.
+
+Fixes: 81a06f1d02e5 ("Revert "drm/rockchip: vop2: Use regcache_sync() to fix suspend/resume"")
+Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
+Closes: https://lore.kernel.org/all/98a9f15d-30ac-47bf-9b93-3aa2c9900f7b@samsung.com/
+Signed-off-by: Andy Yan <andy.yan@rock-chips.com>
+Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
+[dropped the large kernel log of the lockdep report from the message]
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231217084415.2373043-1-andyshrk@163.com
+---
+ drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 8 ++------
+ 1 file changed, 2 insertions(+), 6 deletions(-)
+
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+@@ -961,12 +961,6 @@ static void vop2_enable(struct vop2 *vop
+ 		return;
+ 	}
+ 
+-	ret = regmap_reinit_cache(vop2->map, &vop2_regmap_config);
+-	if (ret) {
+-		drm_err(vop2->drm, "failed to reinit cache: %d\n", ret);
+-		return;
+-	}
+-
+ 	if (vop2->data->soc_id == 3566)
+ 		vop2_writel(vop2, RK3568_OTP_WIN_EN, 1);
+ 
+@@ -998,6 +992,8 @@ static void vop2_disable(struct vop2 *vo
+ 
+ 	pm_runtime_put_sync(vop2->dev);
+ 
++	regcache_drop_region(vop2->map, 0, vop2_regmap_config.max_register);
++
+ 	clk_disable_unprepare(vop2->pclk);
+ 	clk_disable_unprepare(vop2->aclk);
+ 	clk_disable_unprepare(vop2->hclk);
diff --git a/patches-6.6/033-31-v6.8-drm-rockchip-vop2-clean-up-some-inconsistent-indenting.patch b/patches-6.6/033-31-v6.8-drm-rockchip-vop2-clean-up-some-inconsistent-indenting.patch
new file mode 100644
index 0000000..6272c4e
--- /dev/null
+++ b/patches-6.6/033-31-v6.8-drm-rockchip-vop2-clean-up-some-inconsistent-indenting.patch
@@ -0,0 +1,31 @@
+From f40e61eb538d35661d6dda1de92867954d776c4a Mon Sep 17 00:00:00 2001
+From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
+Date: Tue, 19 Dec 2023 14:26:35 +0800
+Subject: [PATCH] drm/rockchip: vop2: clean up some inconsistent indenting
+
+No functional modification involved.
+
+drivers/gpu/drm/rockchip/rockchip_drm_vop2.c:1708 rk3588_calc_cru_cfg() warn: inconsistent indenting.
+
+Reported-by: Abaci Robot <abaci@linux.alibaba.com>
+Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=7778
+Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231219062635.100718-1-jiapeng.chong@linux.alibaba.com
+---
+ drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+@@ -1703,8 +1703,8 @@ static unsigned long rk3588_calc_cru_cfg
+ 		 * *if_pixclk_div = dclk_rate / if_pixclk_rate;
+ 		 * *if_dclk_div = dclk_rate / if_dclk_rate;
+ 		 */
+-		 *if_pixclk_div = 2;
+-		 *if_dclk_div = 4;
++		*if_pixclk_div = 2;
++		*if_dclk_div = 4;
+ 	} else if (vop2_output_if_is_edp(id)) {
+ 		/*
+ 		 * edp_pixclk = edp_dclk > dclk_core
diff --git a/patches-6.6/033-32-v6.8-drm-rockchip-vop2-Drop-superfluous-include.patch b/patches-6.6/033-32-v6.8-drm-rockchip-vop2-Drop-superfluous-include.patch
new file mode 100644
index 0000000..de34da6
--- /dev/null
+++ b/patches-6.6/033-32-v6.8-drm-rockchip-vop2-Drop-superfluous-include.patch
@@ -0,0 +1,25 @@
+From 38709af26c33e398c3292e96837ccfde41fd9e6b Mon Sep 17 00:00:00 2001
+From: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
+Date: Thu, 4 Jan 2024 16:39:49 +0200
+Subject: [PATCH] drm/rockchip: vop2: Drop superfluous include
+
+The rockchip_drm_fb.h header contains just a single function which is
+not directly used by the VOP2 driver.  Drop the unnecessary include.
+
+Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240104143951.85219-1-cristian.ciocaltea@collabora.com
+---
+ drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+@@ -35,7 +35,6 @@
+ 
+ #include "rockchip_drm_drv.h"
+ #include "rockchip_drm_gem.h"
+-#include "rockchip_drm_fb.h"
+ #include "rockchip_drm_vop2.h"
+ #include "rockchip_rgb.h"
+ 
diff --git a/patches-6.6/033-33-v6.8-drm-rockchip-vop2-Drop-unused-if_dclk_rate-variable.patch b/patches-6.6/033-33-v6.8-drm-rockchip-vop2-Drop-unused-if_dclk_rate-variable.patch
new file mode 100644
index 0000000..16c4ada
--- /dev/null
+++ b/patches-6.6/033-33-v6.8-drm-rockchip-vop2-Drop-unused-if_dclk_rate-variable.patch
@@ -0,0 +1,47 @@
+From 196da3f3f76a46905f7daab29c56974f1aba9a7a Mon Sep 17 00:00:00 2001
+From: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
+Date: Fri, 5 Jan 2024 19:40:06 +0200
+Subject: [PATCH] drm/rockchip: vop2: Drop unused if_dclk_rate variable
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Commit 5a028e8f062f ("drm/rockchip: vop2: Add support for rk3588")
+introduced a variable which ended up being unused:
+
+rockchip_drm_vop2.c:1688:23: warning: variable ‘if_dclk_rate’ set but not used [-Wunused-but-set-variable]
+
+This has been initially used as part of a formula to compute the clock
+dividers, but eventually it has been replaced by static values.
+
+Drop the variable declaration and move its assignment to the comment
+block, to serve as documentation of how the constants have been
+generated.
+
+Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240105174007.98054-1-cristian.ciocaltea@collabora.com
+---
+ drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+@@ -1682,7 +1682,6 @@ static unsigned long rk3588_calc_cru_cfg
+ 	unsigned long dclk_core_rate = v_pixclk >> 2;
+ 	unsigned long dclk_rate = v_pixclk;
+ 	unsigned long dclk_out_rate;
+-	unsigned long if_dclk_rate;
+ 	unsigned long if_pixclk_rate;
+ 	int K = 1;
+ 
+@@ -1697,8 +1696,8 @@ static unsigned long rk3588_calc_cru_cfg
+ 		}
+ 
+ 		if_pixclk_rate = (dclk_core_rate << 1) / K;
+-		if_dclk_rate = dclk_core_rate / K;
+ 		/*
++		 * if_dclk_rate = dclk_core_rate / K;
+ 		 * *if_pixclk_div = dclk_rate / if_pixclk_rate;
+ 		 * *if_dclk_div = dclk_rate / if_dclk_rate;
+ 		 */
diff --git a/patches-6.6/033-34-v6.8-drm-rockchip-vop2-add-a-missing-unlock-in.patch b/patches-6.6/033-34-v6.8-drm-rockchip-vop2-add-a-missing-unlock-in.patch
new file mode 100644
index 0000000..5316851
--- /dev/null
+++ b/patches-6.6/033-34-v6.8-drm-rockchip-vop2-add-a-missing-unlock-in.patch
@@ -0,0 +1,31 @@
+From b6ddaa63f728d26c12048aed76be99c24f435c41 Mon Sep 17 00:00:00 2001
+From: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
+Date: Fri, 19 Jan 2024 11:08:40 -0800
+Subject: [PATCH] drm/rockchip: vop2: add a missing unlock in
+ vop2_crtc_atomic_enable()
+
+Unlock before returning on the error path.
+
+Fixes: 5a028e8f062f ("drm/rockchip: vop2: Add support for rk3588")
+Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
+Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
+Signed-off-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240119190841.1619443-1-harshit.m.mogalapalli@oracle.com
+---
+ drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+@@ -1987,8 +1987,10 @@ static void vop2_crtc_atomic_enable(stru
+ 		clock = vop2_set_intf_mux(vp, rkencoder->crtc_endpoint_id, polflags);
+ 	}
+ 
+-	if (!clock)
++	if (!clock) {
++		vop2_unlock(vop2);
+ 		return;
++	}
+ 
+ 	if (vcstate->output_mode == ROCKCHIP_OUT_MODE_AAAA &&
+ 	    !(vp_data->feature & VOP2_VP_FEATURE_OUTPUT_10BIT))
diff --git a/patches-6.6/034-01-v6.7-drm-gpuvm-rename-struct-drm_gpuva_manager-to-struct.patch b/patches-6.6/034-01-v6.7-drm-gpuvm-rename-struct-drm_gpuva_manager-to-struct.patch
new file mode 100644
index 0000000..72a2eb9
--- /dev/null
+++ b/patches-6.6/034-01-v6.7-drm-gpuvm-rename-struct-drm_gpuva_manager-to-struct.patch
@@ -0,0 +1,5137 @@
+From f72c2db47080523d5e0f3c20846c96ed31c35648 Mon Sep 17 00:00:00 2001
+From: Danilo Krummrich <dakr@redhat.com>
+Date: Wed, 20 Sep 2023 16:42:34 +0200
+Subject: [PATCH] drm/gpuvm: rename struct drm_gpuva_manager to struct
+ drm_gpuvm
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Rename struct drm_gpuva_manager to struct drm_gpuvm including
+corresponding functions. This way the GPUVA manager's structures align
+very well with the documentation of VM_BIND [1] and VM_BIND locking [2].
+
+It also provides a better foundation for the naming of data structures
+and functions introduced for implementing a common dma-resv per GPU-VM
+including tracking of external and evicted objects in subsequent
+patches.
+
+[1] Documentation/gpu/drm-vm-bind-async.rst
+[2] Documentation/gpu/drm-vm-bind-locking.rst
+
+Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
+Cc: Matthew Brost <matthew.brost@intel.com>
+Acked-by: Dave Airlie <airlied@redhat.com>
+Acked-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Danilo Krummrich <dakr@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230920144343.64830-2-dakr@redhat.com
+---
+ drivers/gpu/drm/Makefile                      |   2 +-
+ drivers/gpu/drm/drm_debugfs.c                 |  16 +-
+ .../gpu/drm/{drm_gpuva_mgr.c => drm_gpuvm.c}  | 404 +++++++++---------
+ drivers/gpu/drm/nouveau/nouveau_exec.c        |   2 +-
+ drivers/gpu/drm/nouveau/nouveau_uvmm.c        |  28 +-
+ drivers/gpu/drm/nouveau/nouveau_uvmm.h        |   6 +-
+ include/drm/drm_debugfs.h                     |   6 +-
+ include/drm/{drm_gpuva_mgr.h => drm_gpuvm.h}  | 155 ++++---
+ 8 files changed, 309 insertions(+), 310 deletions(-)
+ rename drivers/gpu/drm/{drm_gpuva_mgr.c => drm_gpuvm.c} (78%)
+ rename include/drm/{drm_gpuva_mgr.h => drm_gpuvm.h} (78%)
+
+--- a/drivers/gpu/drm/Makefile
++++ b/drivers/gpu/drm/Makefile
+@@ -45,7 +45,7 @@ drm-y := \
+ 	drm_vblank.o \
+ 	drm_vblank_work.o \
+ 	drm_vma_manager.o \
+-	drm_gpuva_mgr.o \
++	drm_gpuvm.o \
+ 	drm_writeback.o
+ drm-$(CONFIG_DRM_LEGACY) += \
+ 	drm_agpsupport.o \
+--- a/drivers/gpu/drm/drm_debugfs.c
++++ b/drivers/gpu/drm/drm_debugfs.c
+@@ -40,7 +40,7 @@
+ #include <drm/drm_file.h>
+ #include <drm/drm_gem.h>
+ #include <drm/drm_managed.h>
+-#include <drm/drm_gpuva_mgr.h>
++#include <drm/drm_gpuvm.h>
+ 
+ #include "drm_crtc_internal.h"
+ #include "drm_internal.h"
+@@ -182,31 +182,31 @@ static const struct file_operations drm_
+ /**
+  * drm_debugfs_gpuva_info - dump the given DRM GPU VA space
+  * @m: pointer to the &seq_file to write
+- * @mgr: the &drm_gpuva_manager representing the GPU VA space
++ * @gpuvm: the &drm_gpuvm representing the GPU VA space
+  *
+  * Dumps the GPU VA mappings of a given DRM GPU VA manager.
+  *
+  * For each DRM GPU VA space drivers should call this function from their
+  * &drm_info_list's show callback.
+  *
+- * Returns: 0 on success, -ENODEV if the &mgr is not initialized
++ * Returns: 0 on success, -ENODEV if the &gpuvm is not initialized
+  */
+ int drm_debugfs_gpuva_info(struct seq_file *m,
+-			   struct drm_gpuva_manager *mgr)
++			   struct drm_gpuvm *gpuvm)
+ {
+-	struct drm_gpuva *va, *kva = &mgr->kernel_alloc_node;
++	struct drm_gpuva *va, *kva = &gpuvm->kernel_alloc_node;
+ 
+-	if (!mgr->name)
++	if (!gpuvm->name)
+ 		return -ENODEV;
+ 
+ 	seq_printf(m, "DRM GPU VA space (%s) [0x%016llx;0x%016llx]\n",
+-		   mgr->name, mgr->mm_start, mgr->mm_start + mgr->mm_range);
++		   gpuvm->name, gpuvm->mm_start, gpuvm->mm_start + gpuvm->mm_range);
+ 	seq_printf(m, "Kernel reserved node [0x%016llx;0x%016llx]\n",
+ 		   kva->va.addr, kva->va.addr + kva->va.range);
+ 	seq_puts(m, "\n");
+ 	seq_puts(m, " VAs | start              | range              | end                | object             | object offset\n");
+ 	seq_puts(m, "-------------------------------------------------------------------------------------------------------------\n");
+-	drm_gpuva_for_each_va(va, mgr) {
++	drm_gpuvm_for_each_va(va, gpuvm) {
+ 		if (unlikely(va == kva))
+ 			continue;
+ 
+--- a/drivers/gpu/drm/drm_gpuva_mgr.c
++++ /dev/null
+@@ -1,1723 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0-only
+-/*
+- * Copyright (c) 2022 Red Hat.
+- *
+- * Permission is hereby granted, free of charge, to any person obtaining a
+- * copy of this software and associated documentation files (the "Software"),
+- * to deal in the Software without restriction, including without limitation
+- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+- * and/or sell copies of the Software, and to permit persons to whom the
+- * Software is furnished to do so, subject to the following conditions:
+- *
+- * The above copyright notice and this permission notice shall be included in
+- * all copies or substantial portions of the Software.
+- *
+- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+- * OTHER DEALINGS IN THE SOFTWARE.
+- *
+- * Authors:
+- *     Danilo Krummrich <dakr@redhat.com>
+- *
+- */
+-
+-#include <drm/drm_gpuva_mgr.h>
+-
+-#include <linux/interval_tree_generic.h>
+-#include <linux/mm.h>
+-
+-/**
+- * DOC: Overview
+- *
+- * The DRM GPU VA Manager, represented by struct drm_gpuva_manager keeps track
+- * of a GPU's virtual address (VA) space and manages the corresponding virtual
+- * mappings represented by &drm_gpuva objects. It also keeps track of the
+- * mapping's backing &drm_gem_object buffers.
+- *
+- * &drm_gem_object buffers maintain a list of &drm_gpuva objects representing
+- * all existent GPU VA mappings using this &drm_gem_object as backing buffer.
+- *
+- * GPU VAs can be flagged as sparse, such that drivers may use GPU VAs to also
+- * keep track of sparse PTEs in order to support Vulkan 'Sparse Resources'.
+- *
+- * The GPU VA manager internally uses a rb-tree to manage the
+- * &drm_gpuva mappings within a GPU's virtual address space.
+- *
+- * The &drm_gpuva_manager contains a special &drm_gpuva representing the
+- * portion of VA space reserved by the kernel. This node is initialized together
+- * with the GPU VA manager instance and removed when the GPU VA manager is
+- * destroyed.
+- *
+- * In a typical application drivers would embed struct drm_gpuva_manager and
+- * struct drm_gpuva within their own driver specific structures, there won't be
+- * any memory allocations of its own nor memory allocations of &drm_gpuva
+- * entries.
+- *
+- * The data structures needed to store &drm_gpuvas within the &drm_gpuva_manager
+- * are contained within struct drm_gpuva already. Hence, for inserting
+- * &drm_gpuva entries from within dma-fence signalling critical sections it is
+- * enough to pre-allocate the &drm_gpuva structures.
+- */
+-
+-/**
+- * DOC: Split and Merge
+- *
+- * Besides its capability to manage and represent a GPU VA space, the
+- * &drm_gpuva_manager also provides functions to let the &drm_gpuva_manager
+- * calculate a sequence of operations to satisfy a given map or unmap request.
+- *
+- * Therefore the DRM GPU VA manager provides an algorithm implementing splitting
+- * and merging of existent GPU VA mappings with the ones that are requested to
+- * be mapped or unmapped. This feature is required by the Vulkan API to
+- * implement Vulkan 'Sparse Memory Bindings' - drivers UAPIs often refer to this
+- * as VM BIND.
+- *
+- * Drivers can call drm_gpuva_sm_map() to receive a sequence of callbacks
+- * containing map, unmap and remap operations for a given newly requested
+- * mapping. The sequence of callbacks represents the set of operations to
+- * execute in order to integrate the new mapping cleanly into the current state
+- * of the GPU VA space.
+- *
+- * Depending on how the new GPU VA mapping intersects with the existent mappings
+- * of the GPU VA space the &drm_gpuva_fn_ops callbacks contain an arbitrary
+- * amount of unmap operations, a maximum of two remap operations and a single
+- * map operation. The caller might receive no callback at all if no operation is
+- * required, e.g. if the requested mapping already exists in the exact same way.
+- *
+- * The single map operation represents the original map operation requested by
+- * the caller.
+- *
+- * &drm_gpuva_op_unmap contains a 'keep' field, which indicates whether the
+- * &drm_gpuva to unmap is physically contiguous with the original mapping
+- * request. Optionally, if 'keep' is set, drivers may keep the actual page table
+- * entries for this &drm_gpuva, adding the missing page table entries only and
+- * update the &drm_gpuva_manager's view of things accordingly.
+- *
+- * Drivers may do the same optimization, namely delta page table updates, also
+- * for remap operations. This is possible since &drm_gpuva_op_remap consists of
+- * one unmap operation and one or two map operations, such that drivers can
+- * derive the page table update delta accordingly.
+- *
+- * Note that there can't be more than two existent mappings to split up, one at
+- * the beginning and one at the end of the new mapping, hence there is a
+- * maximum of two remap operations.
+- *
+- * Analogous to drm_gpuva_sm_map() drm_gpuva_sm_unmap() uses &drm_gpuva_fn_ops
+- * to call back into the driver in order to unmap a range of GPU VA space. The
+- * logic behind this function is way simpler though: For all existent mappings
+- * enclosed by the given range unmap operations are created. For mappings which
+- * are only partically located within the given range, remap operations are
+- * created such that those mappings are split up and re-mapped partically.
+- *
+- * As an alternative to drm_gpuva_sm_map() and drm_gpuva_sm_unmap(),
+- * drm_gpuva_sm_map_ops_create() and drm_gpuva_sm_unmap_ops_create() can be used
+- * to directly obtain an instance of struct drm_gpuva_ops containing a list of
+- * &drm_gpuva_op, which can be iterated with drm_gpuva_for_each_op(). This list
+- * contains the &drm_gpuva_ops analogous to the callbacks one would receive when
+- * calling drm_gpuva_sm_map() or drm_gpuva_sm_unmap(). While this way requires
+- * more memory (to allocate the &drm_gpuva_ops), it provides drivers a way to
+- * iterate the &drm_gpuva_op multiple times, e.g. once in a context where memory
+- * allocations are possible (e.g. to allocate GPU page tables) and once in the
+- * dma-fence signalling critical path.
+- *
+- * To update the &drm_gpuva_manager's view of the GPU VA space
+- * drm_gpuva_insert() and drm_gpuva_remove() may be used. These functions can
+- * safely be used from &drm_gpuva_fn_ops callbacks originating from
+- * drm_gpuva_sm_map() or drm_gpuva_sm_unmap(). However, it might be more
+- * convenient to use the provided helper functions drm_gpuva_map(),
+- * drm_gpuva_remap() and drm_gpuva_unmap() instead.
+- *
+- * The following diagram depicts the basic relationships of existent GPU VA
+- * mappings, a newly requested mapping and the resulting mappings as implemented
+- * by drm_gpuva_sm_map() - it doesn't cover any arbitrary combinations of these.
+- *
+- * 1) Requested mapping is identical. Replace it, but indicate the backing PTEs
+- *    could be kept.
+- *
+- *    ::
+- *
+- *	     0     a     1
+- *	old: |-----------| (bo_offset=n)
+- *
+- *	     0     a     1
+- *	req: |-----------| (bo_offset=n)
+- *
+- *	     0     a     1
+- *	new: |-----------| (bo_offset=n)
+- *
+- *
+- * 2) Requested mapping is identical, except for the BO offset, hence replace
+- *    the mapping.
+- *
+- *    ::
+- *
+- *	     0     a     1
+- *	old: |-----------| (bo_offset=n)
+- *
+- *	     0     a     1
+- *	req: |-----------| (bo_offset=m)
+- *
+- *	     0     a     1
+- *	new: |-----------| (bo_offset=m)
+- *
+- *
+- * 3) Requested mapping is identical, except for the backing BO, hence replace
+- *    the mapping.
+- *
+- *    ::
+- *
+- *	     0     a     1
+- *	old: |-----------| (bo_offset=n)
+- *
+- *	     0     b     1
+- *	req: |-----------| (bo_offset=n)
+- *
+- *	     0     b     1
+- *	new: |-----------| (bo_offset=n)
+- *
+- *
+- * 4) Existent mapping is a left aligned subset of the requested one, hence
+- *    replace the existent one.
+- *
+- *    ::
+- *
+- *	     0  a  1
+- *	old: |-----|       (bo_offset=n)
+- *
+- *	     0     a     2
+- *	req: |-----------| (bo_offset=n)
+- *
+- *	     0     a     2
+- *	new: |-----------| (bo_offset=n)
+- *
+- *    .. note::
+- *       We expect to see the same result for a request with a different BO
+- *       and/or non-contiguous BO offset.
+- *
+- *
+- * 5) Requested mapping's range is a left aligned subset of the existent one,
+- *    but backed by a different BO. Hence, map the requested mapping and split
+- *    the existent one adjusting its BO offset.
+- *
+- *    ::
+- *
+- *	     0     a     2
+- *	old: |-----------| (bo_offset=n)
+- *
+- *	     0  b  1
+- *	req: |-----|       (bo_offset=n)
+- *
+- *	     0  b  1  a' 2
+- *	new: |-----|-----| (b.bo_offset=n, a.bo_offset=n+1)
+- *
+- *    .. note::
+- *       We expect to see the same result for a request with a different BO
+- *       and/or non-contiguous BO offset.
+- *
+- *
+- * 6) Existent mapping is a superset of the requested mapping. Split it up, but
+- *    indicate that the backing PTEs could be kept.
+- *
+- *    ::
+- *
+- *	     0     a     2
+- *	old: |-----------| (bo_offset=n)
+- *
+- *	     0  a  1
+- *	req: |-----|       (bo_offset=n)
+- *
+- *	     0  a  1  a' 2
+- *	new: |-----|-----| (a.bo_offset=n, a'.bo_offset=n+1)
+- *
+- *
+- * 7) Requested mapping's range is a right aligned subset of the existent one,
+- *    but backed by a different BO. Hence, map the requested mapping and split
+- *    the existent one, without adjusting the BO offset.
+- *
+- *    ::
+- *
+- *	     0     a     2
+- *	old: |-----------| (bo_offset=n)
+- *
+- *	           1  b  2
+- *	req:       |-----| (bo_offset=m)
+- *
+- *	     0  a  1  b  2
+- *	new: |-----|-----| (a.bo_offset=n,b.bo_offset=m)
+- *
+- *
+- * 8) Existent mapping is a superset of the requested mapping. Split it up, but
+- *    indicate that the backing PTEs could be kept.
+- *
+- *    ::
+- *
+- *	      0     a     2
+- *	old: |-----------| (bo_offset=n)
+- *
+- *	           1  a  2
+- *	req:       |-----| (bo_offset=n+1)
+- *
+- *	     0  a' 1  a  2
+- *	new: |-----|-----| (a'.bo_offset=n, a.bo_offset=n+1)
+- *
+- *
+- * 9) Existent mapping is overlapped at the end by the requested mapping backed
+- *    by a different BO. Hence, map the requested mapping and split up the
+- *    existent one, without adjusting the BO offset.
+- *
+- *    ::
+- *
+- *	     0     a     2
+- *	old: |-----------|       (bo_offset=n)
+- *
+- *	           1     b     3
+- *	req:       |-----------| (bo_offset=m)
+- *
+- *	     0  a  1     b     3
+- *	new: |-----|-----------| (a.bo_offset=n,b.bo_offset=m)
+- *
+- *
+- * 10) Existent mapping is overlapped by the requested mapping, both having the
+- *     same backing BO with a contiguous offset. Indicate the backing PTEs of
+- *     the old mapping could be kept.
+- *
+- *     ::
+- *
+- *	      0     a     2
+- *	 old: |-----------|       (bo_offset=n)
+- *
+- *	            1     a     3
+- *	 req:       |-----------| (bo_offset=n+1)
+- *
+- *	      0  a' 1     a     3
+- *	 new: |-----|-----------| (a'.bo_offset=n, a.bo_offset=n+1)
+- *
+- *
+- * 11) Requested mapping's range is a centered subset of the existent one
+- *     having a different backing BO. Hence, map the requested mapping and split
+- *     up the existent one in two mappings, adjusting the BO offset of the right
+- *     one accordingly.
+- *
+- *     ::
+- *
+- *	      0        a        3
+- *	 old: |-----------------| (bo_offset=n)
+- *
+- *	            1  b  2
+- *	 req:       |-----|       (bo_offset=m)
+- *
+- *	      0  a  1  b  2  a' 3
+- *	 new: |-----|-----|-----| (a.bo_offset=n,b.bo_offset=m,a'.bo_offset=n+2)
+- *
+- *
+- * 12) Requested mapping is a contiguous subset of the existent one. Split it
+- *     up, but indicate that the backing PTEs could be kept.
+- *
+- *     ::
+- *
+- *	      0        a        3
+- *	 old: |-----------------| (bo_offset=n)
+- *
+- *	            1  a  2
+- *	 req:       |-----|       (bo_offset=n+1)
+- *
+- *	      0  a' 1  a  2 a'' 3
+- *	 old: |-----|-----|-----| (a'.bo_offset=n, a.bo_offset=n+1, a''.bo_offset=n+2)
+- *
+- *
+- * 13) Existent mapping is a right aligned subset of the requested one, hence
+- *     replace the existent one.
+- *
+- *     ::
+- *
+- *	            1  a  2
+- *	 old:       |-----| (bo_offset=n+1)
+- *
+- *	      0     a     2
+- *	 req: |-----------| (bo_offset=n)
+- *
+- *	      0     a     2
+- *	 new: |-----------| (bo_offset=n)
+- *
+- *     .. note::
+- *        We expect to see the same result for a request with a different bo
+- *        and/or non-contiguous bo_offset.
+- *
+- *
+- * 14) Existent mapping is a centered subset of the requested one, hence
+- *     replace the existent one.
+- *
+- *     ::
+- *
+- *	            1  a  2
+- *	 old:       |-----| (bo_offset=n+1)
+- *
+- *	      0        a       3
+- *	 req: |----------------| (bo_offset=n)
+- *
+- *	      0        a       3
+- *	 new: |----------------| (bo_offset=n)
+- *
+- *     .. note::
+- *        We expect to see the same result for a request with a different bo
+- *        and/or non-contiguous bo_offset.
+- *
+- *
+- * 15) Existent mappings is overlapped at the beginning by the requested mapping
+- *     backed by a different BO. Hence, map the requested mapping and split up
+- *     the existent one, adjusting its BO offset accordingly.
+- *
+- *     ::
+- *
+- *	            1     a     3
+- *	 old:       |-----------| (bo_offset=n)
+- *
+- *	      0     b     2
+- *	 req: |-----------|       (bo_offset=m)
+- *
+- *	      0     b     2  a' 3
+- *	 new: |-----------|-----| (b.bo_offset=m,a.bo_offset=n+2)
+- */
+-
+-/**
+- * DOC: Locking
+- *
+- * Generally, the GPU VA manager does not take care of locking itself, it is
+- * the drivers responsibility to take care about locking. Drivers might want to
+- * protect the following operations: inserting, removing and iterating
+- * &drm_gpuva objects as well as generating all kinds of operations, such as
+- * split / merge or prefetch.
+- *
+- * The GPU VA manager also does not take care of the locking of the backing
+- * &drm_gem_object buffers GPU VA lists by itself; drivers are responsible to
+- * enforce mutual exclusion using either the GEMs dma_resv lock or alternatively
+- * a driver specific external lock. For the latter see also
+- * drm_gem_gpuva_set_lock().
+- *
+- * However, the GPU VA manager contains lockdep checks to ensure callers of its
+- * API hold the corresponding lock whenever the &drm_gem_objects GPU VA list is
+- * accessed by functions such as drm_gpuva_link() or drm_gpuva_unlink().
+- */
+-
+-/**
+- * DOC: Examples
+- *
+- * This section gives two examples on how to let the DRM GPUVA Manager generate
+- * &drm_gpuva_op in order to satisfy a given map or unmap request and how to
+- * make use of them.
+- *
+- * The below code is strictly limited to illustrate the generic usage pattern.
+- * To maintain simplicitly, it doesn't make use of any abstractions for common
+- * code, different (asyncronous) stages with fence signalling critical paths,
+- * any other helpers or error handling in terms of freeing memory and dropping
+- * previously taken locks.
+- *
+- * 1) Obtain a list of &drm_gpuva_op to create a new mapping::
+- *
+- *	// Allocates a new &drm_gpuva.
+- *	struct drm_gpuva * driver_gpuva_alloc(void);
+- *
+- *	// Typically drivers would embedd the &drm_gpuva_manager and &drm_gpuva
+- *	// structure in individual driver structures and lock the dma-resv with
+- *	// drm_exec or similar helpers.
+- *	int driver_mapping_create(struct drm_gpuva_manager *mgr,
+- *				  u64 addr, u64 range,
+- *				  struct drm_gem_object *obj, u64 offset)
+- *	{
+- *		struct drm_gpuva_ops *ops;
+- *		struct drm_gpuva_op *op
+- *
+- *		driver_lock_va_space();
+- *		ops = drm_gpuva_sm_map_ops_create(mgr, addr, range,
+- *						  obj, offset);
+- *		if (IS_ERR(ops))
+- *			return PTR_ERR(ops);
+- *
+- *		drm_gpuva_for_each_op(op, ops) {
+- *			struct drm_gpuva *va;
+- *
+- *			switch (op->op) {
+- *			case DRM_GPUVA_OP_MAP:
+- *				va = driver_gpuva_alloc();
+- *				if (!va)
+- *					; // unwind previous VA space updates,
+- *					  // free memory and unlock
+- *
+- *				driver_vm_map();
+- *				drm_gpuva_map(mgr, va, &op->map);
+- *				drm_gpuva_link(va);
+- *
+- *				break;
+- *			case DRM_GPUVA_OP_REMAP: {
+- *				struct drm_gpuva *prev = NULL, *next = NULL;
+- *
+- *				va = op->remap.unmap->va;
+- *
+- *				if (op->remap.prev) {
+- *					prev = driver_gpuva_alloc();
+- *					if (!prev)
+- *						; // unwind previous VA space
+- *						  // updates, free memory and
+- *						  // unlock
+- *				}
+- *
+- *				if (op->remap.next) {
+- *					next = driver_gpuva_alloc();
+- *					if (!next)
+- *						; // unwind previous VA space
+- *						  // updates, free memory and
+- *						  // unlock
+- *				}
+- *
+- *				driver_vm_remap();
+- *				drm_gpuva_remap(prev, next, &op->remap);
+- *
+- *				drm_gpuva_unlink(va);
+- *				if (prev)
+- *					drm_gpuva_link(prev);
+- *				if (next)
+- *					drm_gpuva_link(next);
+- *
+- *				break;
+- *			}
+- *			case DRM_GPUVA_OP_UNMAP:
+- *				va = op->unmap->va;
+- *
+- *				driver_vm_unmap();
+- *				drm_gpuva_unlink(va);
+- *				drm_gpuva_unmap(&op->unmap);
+- *
+- *				break;
+- *			default:
+- *				break;
+- *			}
+- *		}
+- *		driver_unlock_va_space();
+- *
+- *		return 0;
+- *	}
+- *
+- * 2) Receive a callback for each &drm_gpuva_op to create a new mapping::
+- *
+- *	struct driver_context {
+- *		struct drm_gpuva_manager *mgr;
+- *		struct drm_gpuva *new_va;
+- *		struct drm_gpuva *prev_va;
+- *		struct drm_gpuva *next_va;
+- *	};
+- *
+- *	// ops to pass to drm_gpuva_manager_init()
+- *	static const struct drm_gpuva_fn_ops driver_gpuva_ops = {
+- *		.sm_step_map = driver_gpuva_map,
+- *		.sm_step_remap = driver_gpuva_remap,
+- *		.sm_step_unmap = driver_gpuva_unmap,
+- *	};
+- *
+- *	// Typically drivers would embedd the &drm_gpuva_manager and &drm_gpuva
+- *	// structure in individual driver structures and lock the dma-resv with
+- *	// drm_exec or similar helpers.
+- *	int driver_mapping_create(struct drm_gpuva_manager *mgr,
+- *				  u64 addr, u64 range,
+- *				  struct drm_gem_object *obj, u64 offset)
+- *	{
+- *		struct driver_context ctx;
+- *		struct drm_gpuva_ops *ops;
+- *		struct drm_gpuva_op *op;
+- *		int ret = 0;
+- *
+- *		ctx.mgr = mgr;
+- *
+- *		ctx.new_va = kzalloc(sizeof(*ctx.new_va), GFP_KERNEL);
+- *		ctx.prev_va = kzalloc(sizeof(*ctx.prev_va), GFP_KERNEL);
+- *		ctx.next_va = kzalloc(sizeof(*ctx.next_va), GFP_KERNEL);
+- *		if (!ctx.new_va || !ctx.prev_va || !ctx.next_va) {
+- *			ret = -ENOMEM;
+- *			goto out;
+- *		}
+- *
+- *		driver_lock_va_space();
+- *		ret = drm_gpuva_sm_map(mgr, &ctx, addr, range, obj, offset);
+- *		driver_unlock_va_space();
+- *
+- *	out:
+- *		kfree(ctx.new_va);
+- *		kfree(ctx.prev_va);
+- *		kfree(ctx.next_va);
+- *		return ret;
+- *	}
+- *
+- *	int driver_gpuva_map(struct drm_gpuva_op *op, void *__ctx)
+- *	{
+- *		struct driver_context *ctx = __ctx;
+- *
+- *		drm_gpuva_map(ctx->mgr, ctx->new_va, &op->map);
+- *
+- *		drm_gpuva_link(ctx->new_va);
+- *
+- *		// prevent the new GPUVA from being freed in
+- *		// driver_mapping_create()
+- *		ctx->new_va = NULL;
+- *
+- *		return 0;
+- *	}
+- *
+- *	int driver_gpuva_remap(struct drm_gpuva_op *op, void *__ctx)
+- *	{
+- *		struct driver_context *ctx = __ctx;
+- *
+- *		drm_gpuva_remap(ctx->prev_va, ctx->next_va, &op->remap);
+- *
+- *		drm_gpuva_unlink(op->remap.unmap->va);
+- *		kfree(op->remap.unmap->va);
+- *
+- *		if (op->remap.prev) {
+- *			drm_gpuva_link(ctx->prev_va);
+- *			ctx->prev_va = NULL;
+- *		}
+- *
+- *		if (op->remap.next) {
+- *			drm_gpuva_link(ctx->next_va);
+- *			ctx->next_va = NULL;
+- *		}
+- *
+- *		return 0;
+- *	}
+- *
+- *	int driver_gpuva_unmap(struct drm_gpuva_op *op, void *__ctx)
+- *	{
+- *		drm_gpuva_unlink(op->unmap.va);
+- *		drm_gpuva_unmap(&op->unmap);
+- *		kfree(op->unmap.va);
+- *
+- *		return 0;
+- *	}
+- */
+-
+-#define to_drm_gpuva(__node)	container_of((__node), struct drm_gpuva, rb.node)
+-
+-#define GPUVA_START(node) ((node)->va.addr)
+-#define GPUVA_LAST(node) ((node)->va.addr + (node)->va.range - 1)
+-
+-/* We do not actually use drm_gpuva_it_next(), tell the compiler to not complain
+- * about this.
+- */
+-INTERVAL_TREE_DEFINE(struct drm_gpuva, rb.node, u64, rb.__subtree_last,
+-		     GPUVA_START, GPUVA_LAST, static __maybe_unused,
+-		     drm_gpuva_it)
+-
+-static int __drm_gpuva_insert(struct drm_gpuva_manager *mgr,
+-			      struct drm_gpuva *va);
+-static void __drm_gpuva_remove(struct drm_gpuva *va);
+-
+-static bool
+-drm_gpuva_check_overflow(u64 addr, u64 range)
+-{
+-	u64 end;
+-
+-	return WARN(check_add_overflow(addr, range, &end),
+-		    "GPUVA address limited to %zu bytes.\n", sizeof(end));
+-}
+-
+-static bool
+-drm_gpuva_in_mm_range(struct drm_gpuva_manager *mgr, u64 addr, u64 range)
+-{
+-	u64 end = addr + range;
+-	u64 mm_start = mgr->mm_start;
+-	u64 mm_end = mm_start + mgr->mm_range;
+-
+-	return addr >= mm_start && end <= mm_end;
+-}
+-
+-static bool
+-drm_gpuva_in_kernel_node(struct drm_gpuva_manager *mgr, u64 addr, u64 range)
+-{
+-	u64 end = addr + range;
+-	u64 kstart = mgr->kernel_alloc_node.va.addr;
+-	u64 krange = mgr->kernel_alloc_node.va.range;
+-	u64 kend = kstart + krange;
+-
+-	return krange && addr < kend && kstart < end;
+-}
+-
+-static bool
+-drm_gpuva_range_valid(struct drm_gpuva_manager *mgr,
+-		      u64 addr, u64 range)
+-{
+-	return !drm_gpuva_check_overflow(addr, range) &&
+-	       drm_gpuva_in_mm_range(mgr, addr, range) &&
+-	       !drm_gpuva_in_kernel_node(mgr, addr, range);
+-}
+-
+-/**
+- * drm_gpuva_manager_init() - initialize a &drm_gpuva_manager
+- * @mgr: pointer to the &drm_gpuva_manager to initialize
+- * @name: the name of the GPU VA space
+- * @start_offset: the start offset of the GPU VA space
+- * @range: the size of the GPU VA space
+- * @reserve_offset: the start of the kernel reserved GPU VA area
+- * @reserve_range: the size of the kernel reserved GPU VA area
+- * @ops: &drm_gpuva_fn_ops called on &drm_gpuva_sm_map / &drm_gpuva_sm_unmap
+- *
+- * The &drm_gpuva_manager must be initialized with this function before use.
+- *
+- * Note that @mgr must be cleared to 0 before calling this function. The given
+- * &name is expected to be managed by the surrounding driver structures.
+- */
+-void
+-drm_gpuva_manager_init(struct drm_gpuva_manager *mgr,
+-		       const char *name,
+-		       u64 start_offset, u64 range,
+-		       u64 reserve_offset, u64 reserve_range,
+-		       const struct drm_gpuva_fn_ops *ops)
+-{
+-	mgr->rb.tree = RB_ROOT_CACHED;
+-	INIT_LIST_HEAD(&mgr->rb.list);
+-
+-	drm_gpuva_check_overflow(start_offset, range);
+-	mgr->mm_start = start_offset;
+-	mgr->mm_range = range;
+-
+-	mgr->name = name ? name : "unknown";
+-	mgr->ops = ops;
+-
+-	memset(&mgr->kernel_alloc_node, 0, sizeof(struct drm_gpuva));
+-
+-	if (reserve_range) {
+-		mgr->kernel_alloc_node.va.addr = reserve_offset;
+-		mgr->kernel_alloc_node.va.range = reserve_range;
+-
+-		if (likely(!drm_gpuva_check_overflow(reserve_offset,
+-						     reserve_range)))
+-			__drm_gpuva_insert(mgr, &mgr->kernel_alloc_node);
+-	}
+-}
+-EXPORT_SYMBOL_GPL(drm_gpuva_manager_init);
+-
+-/**
+- * drm_gpuva_manager_destroy() - cleanup a &drm_gpuva_manager
+- * @mgr: pointer to the &drm_gpuva_manager to clean up
+- *
+- * Note that it is a bug to call this function on a manager that still
+- * holds GPU VA mappings.
+- */
+-void
+-drm_gpuva_manager_destroy(struct drm_gpuva_manager *mgr)
+-{
+-	mgr->name = NULL;
+-
+-	if (mgr->kernel_alloc_node.va.range)
+-		__drm_gpuva_remove(&mgr->kernel_alloc_node);
+-
+-	WARN(!RB_EMPTY_ROOT(&mgr->rb.tree.rb_root),
+-	     "GPUVA tree is not empty, potentially leaking memory.");
+-}
+-EXPORT_SYMBOL_GPL(drm_gpuva_manager_destroy);
+-
+-static int
+-__drm_gpuva_insert(struct drm_gpuva_manager *mgr,
+-		   struct drm_gpuva *va)
+-{
+-	struct rb_node *node;
+-	struct list_head *head;
+-
+-	if (drm_gpuva_it_iter_first(&mgr->rb.tree,
+-				    GPUVA_START(va),
+-				    GPUVA_LAST(va)))
+-		return -EEXIST;
+-
+-	va->mgr = mgr;
+-
+-	drm_gpuva_it_insert(va, &mgr->rb.tree);
+-
+-	node = rb_prev(&va->rb.node);
+-	if (node)
+-		head = &(to_drm_gpuva(node))->rb.entry;
+-	else
+-		head = &mgr->rb.list;
+-
+-	list_add(&va->rb.entry, head);
+-
+-	return 0;
+-}
+-
+-/**
+- * drm_gpuva_insert() - insert a &drm_gpuva
+- * @mgr: the &drm_gpuva_manager to insert the &drm_gpuva in
+- * @va: the &drm_gpuva to insert
+- *
+- * Insert a &drm_gpuva with a given address and range into a
+- * &drm_gpuva_manager.
+- *
+- * It is safe to use this function using the safe versions of iterating the GPU
+- * VA space, such as drm_gpuva_for_each_va_safe() and
+- * drm_gpuva_for_each_va_range_safe().
+- *
+- * Returns: 0 on success, negative error code on failure.
+- */
+-int
+-drm_gpuva_insert(struct drm_gpuva_manager *mgr,
+-		 struct drm_gpuva *va)
+-{
+-	u64 addr = va->va.addr;
+-	u64 range = va->va.range;
+-
+-	if (unlikely(!drm_gpuva_range_valid(mgr, addr, range)))
+-		return -EINVAL;
+-
+-	return __drm_gpuva_insert(mgr, va);
+-}
+-EXPORT_SYMBOL_GPL(drm_gpuva_insert);
+-
+-static void
+-__drm_gpuva_remove(struct drm_gpuva *va)
+-{
+-	drm_gpuva_it_remove(va, &va->mgr->rb.tree);
+-	list_del_init(&va->rb.entry);
+-}
+-
+-/**
+- * drm_gpuva_remove() - remove a &drm_gpuva
+- * @va: the &drm_gpuva to remove
+- *
+- * This removes the given &va from the underlaying tree.
+- *
+- * It is safe to use this function using the safe versions of iterating the GPU
+- * VA space, such as drm_gpuva_for_each_va_safe() and
+- * drm_gpuva_for_each_va_range_safe().
+- */
+-void
+-drm_gpuva_remove(struct drm_gpuva *va)
+-{
+-	struct drm_gpuva_manager *mgr = va->mgr;
+-
+-	if (unlikely(va == &mgr->kernel_alloc_node)) {
+-		WARN(1, "Can't destroy kernel reserved node.\n");
+-		return;
+-	}
+-
+-	__drm_gpuva_remove(va);
+-}
+-EXPORT_SYMBOL_GPL(drm_gpuva_remove);
+-
+-/**
+- * drm_gpuva_link() - link a &drm_gpuva
+- * @va: the &drm_gpuva to link
+- *
+- * This adds the given &va to the GPU VA list of the &drm_gem_object it is
+- * associated with.
+- *
+- * This function expects the caller to protect the GEM's GPUVA list against
+- * concurrent access using the GEMs dma_resv lock.
+- */
+-void
+-drm_gpuva_link(struct drm_gpuva *va)
+-{
+-	struct drm_gem_object *obj = va->gem.obj;
+-
+-	if (unlikely(!obj))
+-		return;
+-
+-	drm_gem_gpuva_assert_lock_held(obj);
+-
+-	list_add_tail(&va->gem.entry, &obj->gpuva.list);
+-}
+-EXPORT_SYMBOL_GPL(drm_gpuva_link);
+-
+-/**
+- * drm_gpuva_unlink() - unlink a &drm_gpuva
+- * @va: the &drm_gpuva to unlink
+- *
+- * This removes the given &va from the GPU VA list of the &drm_gem_object it is
+- * associated with.
+- *
+- * This function expects the caller to protect the GEM's GPUVA list against
+- * concurrent access using the GEMs dma_resv lock.
+- */
+-void
+-drm_gpuva_unlink(struct drm_gpuva *va)
+-{
+-	struct drm_gem_object *obj = va->gem.obj;
+-
+-	if (unlikely(!obj))
+-		return;
+-
+-	drm_gem_gpuva_assert_lock_held(obj);
+-
+-	list_del_init(&va->gem.entry);
+-}
+-EXPORT_SYMBOL_GPL(drm_gpuva_unlink);
+-
+-/**
+- * drm_gpuva_find_first() - find the first &drm_gpuva in the given range
+- * @mgr: the &drm_gpuva_manager to search in
+- * @addr: the &drm_gpuvas address
+- * @range: the &drm_gpuvas range
+- *
+- * Returns: the first &drm_gpuva within the given range
+- */
+-struct drm_gpuva *
+-drm_gpuva_find_first(struct drm_gpuva_manager *mgr,
+-		     u64 addr, u64 range)
+-{
+-	u64 last = addr + range - 1;
+-
+-	return drm_gpuva_it_iter_first(&mgr->rb.tree, addr, last);
+-}
+-EXPORT_SYMBOL_GPL(drm_gpuva_find_first);
+-
+-/**
+- * drm_gpuva_find() - find a &drm_gpuva
+- * @mgr: the &drm_gpuva_manager to search in
+- * @addr: the &drm_gpuvas address
+- * @range: the &drm_gpuvas range
+- *
+- * Returns: the &drm_gpuva at a given &addr and with a given &range
+- */
+-struct drm_gpuva *
+-drm_gpuva_find(struct drm_gpuva_manager *mgr,
+-	       u64 addr, u64 range)
+-{
+-	struct drm_gpuva *va;
+-
+-	va = drm_gpuva_find_first(mgr, addr, range);
+-	if (!va)
+-		goto out;
+-
+-	if (va->va.addr != addr ||
+-	    va->va.range != range)
+-		goto out;
+-
+-	return va;
+-
+-out:
+-	return NULL;
+-}
+-EXPORT_SYMBOL_GPL(drm_gpuva_find);
+-
+-/**
+- * drm_gpuva_find_prev() - find the &drm_gpuva before the given address
+- * @mgr: the &drm_gpuva_manager to search in
+- * @start: the given GPU VA's start address
+- *
+- * Find the adjacent &drm_gpuva before the GPU VA with given &start address.
+- *
+- * Note that if there is any free space between the GPU VA mappings no mapping
+- * is returned.
+- *
+- * Returns: a pointer to the found &drm_gpuva or NULL if none was found
+- */
+-struct drm_gpuva *
+-drm_gpuva_find_prev(struct drm_gpuva_manager *mgr, u64 start)
+-{
+-	if (!drm_gpuva_range_valid(mgr, start - 1, 1))
+-		return NULL;
+-
+-	return drm_gpuva_it_iter_first(&mgr->rb.tree, start - 1, start);
+-}
+-EXPORT_SYMBOL_GPL(drm_gpuva_find_prev);
+-
+-/**
+- * drm_gpuva_find_next() - find the &drm_gpuva after the given address
+- * @mgr: the &drm_gpuva_manager to search in
+- * @end: the given GPU VA's end address
+- *
+- * Find the adjacent &drm_gpuva after the GPU VA with given &end address.
+- *
+- * Note that if there is any free space between the GPU VA mappings no mapping
+- * is returned.
+- *
+- * Returns: a pointer to the found &drm_gpuva or NULL if none was found
+- */
+-struct drm_gpuva *
+-drm_gpuva_find_next(struct drm_gpuva_manager *mgr, u64 end)
+-{
+-	if (!drm_gpuva_range_valid(mgr, end, 1))
+-		return NULL;
+-
+-	return drm_gpuva_it_iter_first(&mgr->rb.tree, end, end + 1);
+-}
+-EXPORT_SYMBOL_GPL(drm_gpuva_find_next);
+-
+-/**
+- * drm_gpuva_interval_empty() - indicate whether a given interval of the VA space
+- * is empty
+- * @mgr: the &drm_gpuva_manager to check the range for
+- * @addr: the start address of the range
+- * @range: the range of the interval
+- *
+- * Returns: true if the interval is empty, false otherwise
+- */
+-bool
+-drm_gpuva_interval_empty(struct drm_gpuva_manager *mgr, u64 addr, u64 range)
+-{
+-	return !drm_gpuva_find_first(mgr, addr, range);
+-}
+-EXPORT_SYMBOL_GPL(drm_gpuva_interval_empty);
+-
+-/**
+- * drm_gpuva_map() - helper to insert a &drm_gpuva according to a
+- * &drm_gpuva_op_map
+- * @mgr: the &drm_gpuva_manager
+- * @va: the &drm_gpuva to insert
+- * @op: the &drm_gpuva_op_map to initialize @va with
+- *
+- * Initializes the @va from the @op and inserts it into the given @mgr.
+- */
+-void
+-drm_gpuva_map(struct drm_gpuva_manager *mgr,
+-	      struct drm_gpuva *va,
+-	      struct drm_gpuva_op_map *op)
+-{
+-	drm_gpuva_init_from_op(va, op);
+-	drm_gpuva_insert(mgr, va);
+-}
+-EXPORT_SYMBOL_GPL(drm_gpuva_map);
+-
+-/**
+- * drm_gpuva_remap() - helper to remap a &drm_gpuva according to a
+- * &drm_gpuva_op_remap
+- * @prev: the &drm_gpuva to remap when keeping the start of a mapping
+- * @next: the &drm_gpuva to remap when keeping the end of a mapping
+- * @op: the &drm_gpuva_op_remap to initialize @prev and @next with
+- *
+- * Removes the currently mapped &drm_gpuva and remaps it using @prev and/or
+- * @next.
+- */
+-void
+-drm_gpuva_remap(struct drm_gpuva *prev,
+-		struct drm_gpuva *next,
+-		struct drm_gpuva_op_remap *op)
+-{
+-	struct drm_gpuva *curr = op->unmap->va;
+-	struct drm_gpuva_manager *mgr = curr->mgr;
+-
+-	drm_gpuva_remove(curr);
+-
+-	if (op->prev) {
+-		drm_gpuva_init_from_op(prev, op->prev);
+-		drm_gpuva_insert(mgr, prev);
+-	}
+-
+-	if (op->next) {
+-		drm_gpuva_init_from_op(next, op->next);
+-		drm_gpuva_insert(mgr, next);
+-	}
+-}
+-EXPORT_SYMBOL_GPL(drm_gpuva_remap);
+-
+-/**
+- * drm_gpuva_unmap() - helper to remove a &drm_gpuva according to a
+- * &drm_gpuva_op_unmap
+- * @op: the &drm_gpuva_op_unmap specifying the &drm_gpuva to remove
+- *
+- * Removes the &drm_gpuva associated with the &drm_gpuva_op_unmap.
+- */
+-void
+-drm_gpuva_unmap(struct drm_gpuva_op_unmap *op)
+-{
+-	drm_gpuva_remove(op->va);
+-}
+-EXPORT_SYMBOL_GPL(drm_gpuva_unmap);
+-
+-static int
+-op_map_cb(const struct drm_gpuva_fn_ops *fn, void *priv,
+-	  u64 addr, u64 range,
+-	  struct drm_gem_object *obj, u64 offset)
+-{
+-	struct drm_gpuva_op op = {};
+-
+-	op.op = DRM_GPUVA_OP_MAP;
+-	op.map.va.addr = addr;
+-	op.map.va.range = range;
+-	op.map.gem.obj = obj;
+-	op.map.gem.offset = offset;
+-
+-	return fn->sm_step_map(&op, priv);
+-}
+-
+-static int
+-op_remap_cb(const struct drm_gpuva_fn_ops *fn, void *priv,
+-	    struct drm_gpuva_op_map *prev,
+-	    struct drm_gpuva_op_map *next,
+-	    struct drm_gpuva_op_unmap *unmap)
+-{
+-	struct drm_gpuva_op op = {};
+-	struct drm_gpuva_op_remap *r;
+-
+-	op.op = DRM_GPUVA_OP_REMAP;
+-	r = &op.remap;
+-	r->prev = prev;
+-	r->next = next;
+-	r->unmap = unmap;
+-
+-	return fn->sm_step_remap(&op, priv);
+-}
+-
+-static int
+-op_unmap_cb(const struct drm_gpuva_fn_ops *fn, void *priv,
+-	    struct drm_gpuva *va, bool merge)
+-{
+-	struct drm_gpuva_op op = {};
+-
+-	op.op = DRM_GPUVA_OP_UNMAP;
+-	op.unmap.va = va;
+-	op.unmap.keep = merge;
+-
+-	return fn->sm_step_unmap(&op, priv);
+-}
+-
+-static int
+-__drm_gpuva_sm_map(struct drm_gpuva_manager *mgr,
+-		   const struct drm_gpuva_fn_ops *ops, void *priv,
+-		   u64 req_addr, u64 req_range,
+-		   struct drm_gem_object *req_obj, u64 req_offset)
+-{
+-	struct drm_gpuva *va, *next;
+-	u64 req_end = req_addr + req_range;
+-	int ret;
+-
+-	if (unlikely(!drm_gpuva_range_valid(mgr, req_addr, req_range)))
+-		return -EINVAL;
+-
+-	drm_gpuva_for_each_va_range_safe(va, next, mgr, req_addr, req_end) {
+-		struct drm_gem_object *obj = va->gem.obj;
+-		u64 offset = va->gem.offset;
+-		u64 addr = va->va.addr;
+-		u64 range = va->va.range;
+-		u64 end = addr + range;
+-		bool merge = !!va->gem.obj;
+-
+-		if (addr == req_addr) {
+-			merge &= obj == req_obj &&
+-				 offset == req_offset;
+-
+-			if (end == req_end) {
+-				ret = op_unmap_cb(ops, priv, va, merge);
+-				if (ret)
+-					return ret;
+-				break;
+-			}
+-
+-			if (end < req_end) {
+-				ret = op_unmap_cb(ops, priv, va, merge);
+-				if (ret)
+-					return ret;
+-				continue;
+-			}
+-
+-			if (end > req_end) {
+-				struct drm_gpuva_op_map n = {
+-					.va.addr = req_end,
+-					.va.range = range - req_range,
+-					.gem.obj = obj,
+-					.gem.offset = offset + req_range,
+-				};
+-				struct drm_gpuva_op_unmap u = {
+-					.va = va,
+-					.keep = merge,
+-				};
+-
+-				ret = op_remap_cb(ops, priv, NULL, &n, &u);
+-				if (ret)
+-					return ret;
+-				break;
+-			}
+-		} else if (addr < req_addr) {
+-			u64 ls_range = req_addr - addr;
+-			struct drm_gpuva_op_map p = {
+-				.va.addr = addr,
+-				.va.range = ls_range,
+-				.gem.obj = obj,
+-				.gem.offset = offset,
+-			};
+-			struct drm_gpuva_op_unmap u = { .va = va };
+-
+-			merge &= obj == req_obj &&
+-				 offset + ls_range == req_offset;
+-			u.keep = merge;
+-
+-			if (end == req_end) {
+-				ret = op_remap_cb(ops, priv, &p, NULL, &u);
+-				if (ret)
+-					return ret;
+-				break;
+-			}
+-
+-			if (end < req_end) {
+-				ret = op_remap_cb(ops, priv, &p, NULL, &u);
+-				if (ret)
+-					return ret;
+-				continue;
+-			}
+-
+-			if (end > req_end) {
+-				struct drm_gpuva_op_map n = {
+-					.va.addr = req_end,
+-					.va.range = end - req_end,
+-					.gem.obj = obj,
+-					.gem.offset = offset + ls_range +
+-						      req_range,
+-				};
+-
+-				ret = op_remap_cb(ops, priv, &p, &n, &u);
+-				if (ret)
+-					return ret;
+-				break;
+-			}
+-		} else if (addr > req_addr) {
+-			merge &= obj == req_obj &&
+-				 offset == req_offset +
+-					   (addr - req_addr);
+-
+-			if (end == req_end) {
+-				ret = op_unmap_cb(ops, priv, va, merge);
+-				if (ret)
+-					return ret;
+-				break;
+-			}
+-
+-			if (end < req_end) {
+-				ret = op_unmap_cb(ops, priv, va, merge);
+-				if (ret)
+-					return ret;
+-				continue;
+-			}
+-
+-			if (end > req_end) {
+-				struct drm_gpuva_op_map n = {
+-					.va.addr = req_end,
+-					.va.range = end - req_end,
+-					.gem.obj = obj,
+-					.gem.offset = offset + req_end - addr,
+-				};
+-				struct drm_gpuva_op_unmap u = {
+-					.va = va,
+-					.keep = merge,
+-				};
+-
+-				ret = op_remap_cb(ops, priv, NULL, &n, &u);
+-				if (ret)
+-					return ret;
+-				break;
+-			}
+-		}
+-	}
+-
+-	return op_map_cb(ops, priv,
+-			 req_addr, req_range,
+-			 req_obj, req_offset);
+-}
+-
+-static int
+-__drm_gpuva_sm_unmap(struct drm_gpuva_manager *mgr,
+-		     const struct drm_gpuva_fn_ops *ops, void *priv,
+-		     u64 req_addr, u64 req_range)
+-{
+-	struct drm_gpuva *va, *next;
+-	u64 req_end = req_addr + req_range;
+-	int ret;
+-
+-	if (unlikely(!drm_gpuva_range_valid(mgr, req_addr, req_range)))
+-		return -EINVAL;
+-
+-	drm_gpuva_for_each_va_range_safe(va, next, mgr, req_addr, req_end) {
+-		struct drm_gpuva_op_map prev = {}, next = {};
+-		bool prev_split = false, next_split = false;
+-		struct drm_gem_object *obj = va->gem.obj;
+-		u64 offset = va->gem.offset;
+-		u64 addr = va->va.addr;
+-		u64 range = va->va.range;
+-		u64 end = addr + range;
+-
+-		if (addr < req_addr) {
+-			prev.va.addr = addr;
+-			prev.va.range = req_addr - addr;
+-			prev.gem.obj = obj;
+-			prev.gem.offset = offset;
+-
+-			prev_split = true;
+-		}
+-
+-		if (end > req_end) {
+-			next.va.addr = req_end;
+-			next.va.range = end - req_end;
+-			next.gem.obj = obj;
+-			next.gem.offset = offset + (req_end - addr);
+-
+-			next_split = true;
+-		}
+-
+-		if (prev_split || next_split) {
+-			struct drm_gpuva_op_unmap unmap = { .va = va };
+-
+-			ret = op_remap_cb(ops, priv,
+-					  prev_split ? &prev : NULL,
+-					  next_split ? &next : NULL,
+-					  &unmap);
+-			if (ret)
+-				return ret;
+-		} else {
+-			ret = op_unmap_cb(ops, priv, va, false);
+-			if (ret)
+-				return ret;
+-		}
+-	}
+-
+-	return 0;
+-}
+-
+-/**
+- * drm_gpuva_sm_map() - creates the &drm_gpuva_op split/merge steps
+- * @mgr: the &drm_gpuva_manager representing the GPU VA space
+- * @req_addr: the start address of the new mapping
+- * @req_range: the range of the new mapping
+- * @req_obj: the &drm_gem_object to map
+- * @req_offset: the offset within the &drm_gem_object
+- * @priv: pointer to a driver private data structure
+- *
+- * This function iterates the given range of the GPU VA space. It utilizes the
+- * &drm_gpuva_fn_ops to call back into the driver providing the split and merge
+- * steps.
+- *
+- * Drivers may use these callbacks to update the GPU VA space right away within
+- * the callback. In case the driver decides to copy and store the operations for
+- * later processing neither this function nor &drm_gpuva_sm_unmap is allowed to
+- * be called before the &drm_gpuva_manager's view of the GPU VA space was
+- * updated with the previous set of operations. To update the
+- * &drm_gpuva_manager's view of the GPU VA space drm_gpuva_insert(),
+- * drm_gpuva_destroy_locked() and/or drm_gpuva_destroy_unlocked() should be
+- * used.
+- *
+- * A sequence of callbacks can contain map, unmap and remap operations, but
+- * the sequence of callbacks might also be empty if no operation is required,
+- * e.g. if the requested mapping already exists in the exact same way.
+- *
+- * There can be an arbitrary amount of unmap operations, a maximum of two remap
+- * operations and a single map operation. The latter one represents the original
+- * map operation requested by the caller.
+- *
+- * Returns: 0 on success or a negative error code
+- */
+-int
+-drm_gpuva_sm_map(struct drm_gpuva_manager *mgr, void *priv,
+-		 u64 req_addr, u64 req_range,
+-		 struct drm_gem_object *req_obj, u64 req_offset)
+-{
+-	const struct drm_gpuva_fn_ops *ops = mgr->ops;
+-
+-	if (unlikely(!(ops && ops->sm_step_map &&
+-		       ops->sm_step_remap &&
+-		       ops->sm_step_unmap)))
+-		return -EINVAL;
+-
+-	return __drm_gpuva_sm_map(mgr, ops, priv,
+-				  req_addr, req_range,
+-				  req_obj, req_offset);
+-}
+-EXPORT_SYMBOL_GPL(drm_gpuva_sm_map);
+-
+-/**
+- * drm_gpuva_sm_unmap() - creates the &drm_gpuva_ops to split on unmap
+- * @mgr: the &drm_gpuva_manager representing the GPU VA space
+- * @priv: pointer to a driver private data structure
+- * @req_addr: the start address of the range to unmap
+- * @req_range: the range of the mappings to unmap
+- *
+- * This function iterates the given range of the GPU VA space. It utilizes the
+- * &drm_gpuva_fn_ops to call back into the driver providing the operations to
+- * unmap and, if required, split existent mappings.
+- *
+- * Drivers may use these callbacks to update the GPU VA space right away within
+- * the callback. In case the driver decides to copy and store the operations for
+- * later processing neither this function nor &drm_gpuva_sm_map is allowed to be
+- * called before the &drm_gpuva_manager's view of the GPU VA space was updated
+- * with the previous set of operations. To update the &drm_gpuva_manager's view
+- * of the GPU VA space drm_gpuva_insert(), drm_gpuva_destroy_locked() and/or
+- * drm_gpuva_destroy_unlocked() should be used.
+- *
+- * A sequence of callbacks can contain unmap and remap operations, depending on
+- * whether there are actual overlapping mappings to split.
+- *
+- * There can be an arbitrary amount of unmap operations and a maximum of two
+- * remap operations.
+- *
+- * Returns: 0 on success or a negative error code
+- */
+-int
+-drm_gpuva_sm_unmap(struct drm_gpuva_manager *mgr, void *priv,
+-		   u64 req_addr, u64 req_range)
+-{
+-	const struct drm_gpuva_fn_ops *ops = mgr->ops;
+-
+-	if (unlikely(!(ops && ops->sm_step_remap &&
+-		       ops->sm_step_unmap)))
+-		return -EINVAL;
+-
+-	return __drm_gpuva_sm_unmap(mgr, ops, priv,
+-				    req_addr, req_range);
+-}
+-EXPORT_SYMBOL_GPL(drm_gpuva_sm_unmap);
+-
+-static struct drm_gpuva_op *
+-gpuva_op_alloc(struct drm_gpuva_manager *mgr)
+-{
+-	const struct drm_gpuva_fn_ops *fn = mgr->ops;
+-	struct drm_gpuva_op *op;
+-
+-	if (fn && fn->op_alloc)
+-		op = fn->op_alloc();
+-	else
+-		op = kzalloc(sizeof(*op), GFP_KERNEL);
+-
+-	if (unlikely(!op))
+-		return NULL;
+-
+-	return op;
+-}
+-
+-static void
+-gpuva_op_free(struct drm_gpuva_manager *mgr,
+-	      struct drm_gpuva_op *op)
+-{
+-	const struct drm_gpuva_fn_ops *fn = mgr->ops;
+-
+-	if (fn && fn->op_free)
+-		fn->op_free(op);
+-	else
+-		kfree(op);
+-}
+-
+-static int
+-drm_gpuva_sm_step(struct drm_gpuva_op *__op,
+-		  void *priv)
+-{
+-	struct {
+-		struct drm_gpuva_manager *mgr;
+-		struct drm_gpuva_ops *ops;
+-	} *args = priv;
+-	struct drm_gpuva_manager *mgr = args->mgr;
+-	struct drm_gpuva_ops *ops = args->ops;
+-	struct drm_gpuva_op *op;
+-
+-	op = gpuva_op_alloc(mgr);
+-	if (unlikely(!op))
+-		goto err;
+-
+-	memcpy(op, __op, sizeof(*op));
+-
+-	if (op->op == DRM_GPUVA_OP_REMAP) {
+-		struct drm_gpuva_op_remap *__r = &__op->remap;
+-		struct drm_gpuva_op_remap *r = &op->remap;
+-
+-		r->unmap = kmemdup(__r->unmap, sizeof(*r->unmap),
+-				   GFP_KERNEL);
+-		if (unlikely(!r->unmap))
+-			goto err_free_op;
+-
+-		if (__r->prev) {
+-			r->prev = kmemdup(__r->prev, sizeof(*r->prev),
+-					  GFP_KERNEL);
+-			if (unlikely(!r->prev))
+-				goto err_free_unmap;
+-		}
+-
+-		if (__r->next) {
+-			r->next = kmemdup(__r->next, sizeof(*r->next),
+-					  GFP_KERNEL);
+-			if (unlikely(!r->next))
+-				goto err_free_prev;
+-		}
+-	}
+-
+-	list_add_tail(&op->entry, &ops->list);
+-
+-	return 0;
+-
+-err_free_unmap:
+-	kfree(op->remap.unmap);
+-err_free_prev:
+-	kfree(op->remap.prev);
+-err_free_op:
+-	gpuva_op_free(mgr, op);
+-err:
+-	return -ENOMEM;
+-}
+-
+-static const struct drm_gpuva_fn_ops gpuva_list_ops = {
+-	.sm_step_map = drm_gpuva_sm_step,
+-	.sm_step_remap = drm_gpuva_sm_step,
+-	.sm_step_unmap = drm_gpuva_sm_step,
+-};
+-
+-/**
+- * drm_gpuva_sm_map_ops_create() - creates the &drm_gpuva_ops to split and merge
+- * @mgr: the &drm_gpuva_manager representing the GPU VA space
+- * @req_addr: the start address of the new mapping
+- * @req_range: the range of the new mapping
+- * @req_obj: the &drm_gem_object to map
+- * @req_offset: the offset within the &drm_gem_object
+- *
+- * This function creates a list of operations to perform splitting and merging
+- * of existent mapping(s) with the newly requested one.
+- *
+- * The list can be iterated with &drm_gpuva_for_each_op and must be processed
+- * in the given order. It can contain map, unmap and remap operations, but it
+- * also can be empty if no operation is required, e.g. if the requested mapping
+- * already exists is the exact same way.
+- *
+- * There can be an arbitrary amount of unmap operations, a maximum of two remap
+- * operations and a single map operation. The latter one represents the original
+- * map operation requested by the caller.
+- *
+- * Note that before calling this function again with another mapping request it
+- * is necessary to update the &drm_gpuva_manager's view of the GPU VA space. The
+- * previously obtained operations must be either processed or abandoned. To
+- * update the &drm_gpuva_manager's view of the GPU VA space drm_gpuva_insert(),
+- * drm_gpuva_destroy_locked() and/or drm_gpuva_destroy_unlocked() should be
+- * used.
+- *
+- * After the caller finished processing the returned &drm_gpuva_ops, they must
+- * be freed with &drm_gpuva_ops_free.
+- *
+- * Returns: a pointer to the &drm_gpuva_ops on success, an ERR_PTR on failure
+- */
+-struct drm_gpuva_ops *
+-drm_gpuva_sm_map_ops_create(struct drm_gpuva_manager *mgr,
+-			    u64 req_addr, u64 req_range,
+-			    struct drm_gem_object *req_obj, u64 req_offset)
+-{
+-	struct drm_gpuva_ops *ops;
+-	struct {
+-		struct drm_gpuva_manager *mgr;
+-		struct drm_gpuva_ops *ops;
+-	} args;
+-	int ret;
+-
+-	ops = kzalloc(sizeof(*ops), GFP_KERNEL);
+-	if (unlikely(!ops))
+-		return ERR_PTR(-ENOMEM);
+-
+-	INIT_LIST_HEAD(&ops->list);
+-
+-	args.mgr = mgr;
+-	args.ops = ops;
+-
+-	ret = __drm_gpuva_sm_map(mgr, &gpuva_list_ops, &args,
+-				 req_addr, req_range,
+-				 req_obj, req_offset);
+-	if (ret)
+-		goto err_free_ops;
+-
+-	return ops;
+-
+-err_free_ops:
+-	drm_gpuva_ops_free(mgr, ops);
+-	return ERR_PTR(ret);
+-}
+-EXPORT_SYMBOL_GPL(drm_gpuva_sm_map_ops_create);
+-
+-/**
+- * drm_gpuva_sm_unmap_ops_create() - creates the &drm_gpuva_ops to split on
+- * unmap
+- * @mgr: the &drm_gpuva_manager representing the GPU VA space
+- * @req_addr: the start address of the range to unmap
+- * @req_range: the range of the mappings to unmap
+- *
+- * This function creates a list of operations to perform unmapping and, if
+- * required, splitting of the mappings overlapping the unmap range.
+- *
+- * The list can be iterated with &drm_gpuva_for_each_op and must be processed
+- * in the given order. It can contain unmap and remap operations, depending on
+- * whether there are actual overlapping mappings to split.
+- *
+- * There can be an arbitrary amount of unmap operations and a maximum of two
+- * remap operations.
+- *
+- * Note that before calling this function again with another range to unmap it
+- * is necessary to update the &drm_gpuva_manager's view of the GPU VA space. The
+- * previously obtained operations must be processed or abandoned. To update the
+- * &drm_gpuva_manager's view of the GPU VA space drm_gpuva_insert(),
+- * drm_gpuva_destroy_locked() and/or drm_gpuva_destroy_unlocked() should be
+- * used.
+- *
+- * After the caller finished processing the returned &drm_gpuva_ops, they must
+- * be freed with &drm_gpuva_ops_free.
+- *
+- * Returns: a pointer to the &drm_gpuva_ops on success, an ERR_PTR on failure
+- */
+-struct drm_gpuva_ops *
+-drm_gpuva_sm_unmap_ops_create(struct drm_gpuva_manager *mgr,
+-			      u64 req_addr, u64 req_range)
+-{
+-	struct drm_gpuva_ops *ops;
+-	struct {
+-		struct drm_gpuva_manager *mgr;
+-		struct drm_gpuva_ops *ops;
+-	} args;
+-	int ret;
+-
+-	ops = kzalloc(sizeof(*ops), GFP_KERNEL);
+-	if (unlikely(!ops))
+-		return ERR_PTR(-ENOMEM);
+-
+-	INIT_LIST_HEAD(&ops->list);
+-
+-	args.mgr = mgr;
+-	args.ops = ops;
+-
+-	ret = __drm_gpuva_sm_unmap(mgr, &gpuva_list_ops, &args,
+-				   req_addr, req_range);
+-	if (ret)
+-		goto err_free_ops;
+-
+-	return ops;
+-
+-err_free_ops:
+-	drm_gpuva_ops_free(mgr, ops);
+-	return ERR_PTR(ret);
+-}
+-EXPORT_SYMBOL_GPL(drm_gpuva_sm_unmap_ops_create);
+-
+-/**
+- * drm_gpuva_prefetch_ops_create() - creates the &drm_gpuva_ops to prefetch
+- * @mgr: the &drm_gpuva_manager representing the GPU VA space
+- * @addr: the start address of the range to prefetch
+- * @range: the range of the mappings to prefetch
+- *
+- * This function creates a list of operations to perform prefetching.
+- *
+- * The list can be iterated with &drm_gpuva_for_each_op and must be processed
+- * in the given order. It can contain prefetch operations.
+- *
+- * There can be an arbitrary amount of prefetch operations.
+- *
+- * After the caller finished processing the returned &drm_gpuva_ops, they must
+- * be freed with &drm_gpuva_ops_free.
+- *
+- * Returns: a pointer to the &drm_gpuva_ops on success, an ERR_PTR on failure
+- */
+-struct drm_gpuva_ops *
+-drm_gpuva_prefetch_ops_create(struct drm_gpuva_manager *mgr,
+-			      u64 addr, u64 range)
+-{
+-	struct drm_gpuva_ops *ops;
+-	struct drm_gpuva_op *op;
+-	struct drm_gpuva *va;
+-	u64 end = addr + range;
+-	int ret;
+-
+-	ops = kzalloc(sizeof(*ops), GFP_KERNEL);
+-	if (!ops)
+-		return ERR_PTR(-ENOMEM);
+-
+-	INIT_LIST_HEAD(&ops->list);
+-
+-	drm_gpuva_for_each_va_range(va, mgr, addr, end) {
+-		op = gpuva_op_alloc(mgr);
+-		if (!op) {
+-			ret = -ENOMEM;
+-			goto err_free_ops;
+-		}
+-
+-		op->op = DRM_GPUVA_OP_PREFETCH;
+-		op->prefetch.va = va;
+-		list_add_tail(&op->entry, &ops->list);
+-	}
+-
+-	return ops;
+-
+-err_free_ops:
+-	drm_gpuva_ops_free(mgr, ops);
+-	return ERR_PTR(ret);
+-}
+-EXPORT_SYMBOL_GPL(drm_gpuva_prefetch_ops_create);
+-
+-/**
+- * drm_gpuva_gem_unmap_ops_create() - creates the &drm_gpuva_ops to unmap a GEM
+- * @mgr: the &drm_gpuva_manager representing the GPU VA space
+- * @obj: the &drm_gem_object to unmap
+- *
+- * This function creates a list of operations to perform unmapping for every
+- * GPUVA attached to a GEM.
+- *
+- * The list can be iterated with &drm_gpuva_for_each_op and consists out of an
+- * arbitrary amount of unmap operations.
+- *
+- * After the caller finished processing the returned &drm_gpuva_ops, they must
+- * be freed with &drm_gpuva_ops_free.
+- *
+- * It is the callers responsibility to protect the GEMs GPUVA list against
+- * concurrent access using the GEMs dma_resv lock.
+- *
+- * Returns: a pointer to the &drm_gpuva_ops on success, an ERR_PTR on failure
+- */
+-struct drm_gpuva_ops *
+-drm_gpuva_gem_unmap_ops_create(struct drm_gpuva_manager *mgr,
+-			       struct drm_gem_object *obj)
+-{
+-	struct drm_gpuva_ops *ops;
+-	struct drm_gpuva_op *op;
+-	struct drm_gpuva *va;
+-	int ret;
+-
+-	drm_gem_gpuva_assert_lock_held(obj);
+-
+-	ops = kzalloc(sizeof(*ops), GFP_KERNEL);
+-	if (!ops)
+-		return ERR_PTR(-ENOMEM);
+-
+-	INIT_LIST_HEAD(&ops->list);
+-
+-	drm_gem_for_each_gpuva(va, obj) {
+-		op = gpuva_op_alloc(mgr);
+-		if (!op) {
+-			ret = -ENOMEM;
+-			goto err_free_ops;
+-		}
+-
+-		op->op = DRM_GPUVA_OP_UNMAP;
+-		op->unmap.va = va;
+-		list_add_tail(&op->entry, &ops->list);
+-	}
+-
+-	return ops;
+-
+-err_free_ops:
+-	drm_gpuva_ops_free(mgr, ops);
+-	return ERR_PTR(ret);
+-}
+-EXPORT_SYMBOL_GPL(drm_gpuva_gem_unmap_ops_create);
+-
+-/**
+- * drm_gpuva_ops_free() - free the given &drm_gpuva_ops
+- * @mgr: the &drm_gpuva_manager the ops were created for
+- * @ops: the &drm_gpuva_ops to free
+- *
+- * Frees the given &drm_gpuva_ops structure including all the ops associated
+- * with it.
+- */
+-void
+-drm_gpuva_ops_free(struct drm_gpuva_manager *mgr,
+-		   struct drm_gpuva_ops *ops)
+-{
+-	struct drm_gpuva_op *op, *next;
+-
+-	drm_gpuva_for_each_op_safe(op, next, ops) {
+-		list_del(&op->entry);
+-
+-		if (op->op == DRM_GPUVA_OP_REMAP) {
+-			kfree(op->remap.prev);
+-			kfree(op->remap.next);
+-			kfree(op->remap.unmap);
+-		}
+-
+-		gpuva_op_free(mgr, op);
+-	}
+-
+-	kfree(ops);
+-}
+-EXPORT_SYMBOL_GPL(drm_gpuva_ops_free);
+--- /dev/null
++++ b/drivers/gpu/drm/drm_gpuvm.c
+@@ -0,0 +1,1723 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/*
++ * Copyright (c) 2022 Red Hat.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * Authors:
++ *     Danilo Krummrich <dakr@redhat.com>
++ *
++ */
++
++#include <drm/drm_gpuvm.h>
++
++#include <linux/interval_tree_generic.h>
++#include <linux/mm.h>
++
++/**
++ * DOC: Overview
++ *
++ * The DRM GPU VA Manager, represented by struct drm_gpuvm keeps track of a
++ * GPU's virtual address (VA) space and manages the corresponding virtual
++ * mappings represented by &drm_gpuva objects. It also keeps track of the
++ * mapping's backing &drm_gem_object buffers.
++ *
++ * &drm_gem_object buffers maintain a list of &drm_gpuva objects representing
++ * all existent GPU VA mappings using this &drm_gem_object as backing buffer.
++ *
++ * GPU VAs can be flagged as sparse, such that drivers may use GPU VAs to also
++ * keep track of sparse PTEs in order to support Vulkan 'Sparse Resources'.
++ *
++ * The GPU VA manager internally uses a rb-tree to manage the
++ * &drm_gpuva mappings within a GPU's virtual address space.
++ *
++ * The &drm_gpuvm structure contains a special &drm_gpuva representing the
++ * portion of VA space reserved by the kernel. This node is initialized together
++ * with the GPU VA manager instance and removed when the GPU VA manager is
++ * destroyed.
++ *
++ * In a typical application drivers would embed struct drm_gpuvm and
++ * struct drm_gpuva within their own driver specific structures, there won't be
++ * any memory allocations of its own nor memory allocations of &drm_gpuva
++ * entries.
++ *
++ * The data structures needed to store &drm_gpuvas within the &drm_gpuvm are
++ * contained within struct drm_gpuva already. Hence, for inserting &drm_gpuva
++ * entries from within dma-fence signalling critical sections it is enough to
++ * pre-allocate the &drm_gpuva structures.
++ */
++
++/**
++ * DOC: Split and Merge
++ *
++ * Besides its capability to manage and represent a GPU VA space, the
++ * GPU VA manager also provides functions to let the &drm_gpuvm calculate a
++ * sequence of operations to satisfy a given map or unmap request.
++ *
++ * Therefore the DRM GPU VA manager provides an algorithm implementing splitting
++ * and merging of existent GPU VA mappings with the ones that are requested to
++ * be mapped or unmapped. This feature is required by the Vulkan API to
++ * implement Vulkan 'Sparse Memory Bindings' - drivers UAPIs often refer to this
++ * as VM BIND.
++ *
++ * Drivers can call drm_gpuvm_sm_map() to receive a sequence of callbacks
++ * containing map, unmap and remap operations for a given newly requested
++ * mapping. The sequence of callbacks represents the set of operations to
++ * execute in order to integrate the new mapping cleanly into the current state
++ * of the GPU VA space.
++ *
++ * Depending on how the new GPU VA mapping intersects with the existent mappings
++ * of the GPU VA space the &drm_gpuvm_ops callbacks contain an arbitrary amount
++ * of unmap operations, a maximum of two remap operations and a single map
++ * operation. The caller might receive no callback at all if no operation is
++ * required, e.g. if the requested mapping already exists in the exact same way.
++ *
++ * The single map operation represents the original map operation requested by
++ * the caller.
++ *
++ * &drm_gpuva_op_unmap contains a 'keep' field, which indicates whether the
++ * &drm_gpuva to unmap is physically contiguous with the original mapping
++ * request. Optionally, if 'keep' is set, drivers may keep the actual page table
++ * entries for this &drm_gpuva, adding the missing page table entries only and
++ * update the &drm_gpuvm's view of things accordingly.
++ *
++ * Drivers may do the same optimization, namely delta page table updates, also
++ * for remap operations. This is possible since &drm_gpuva_op_remap consists of
++ * one unmap operation and one or two map operations, such that drivers can
++ * derive the page table update delta accordingly.
++ *
++ * Note that there can't be more than two existent mappings to split up, one at
++ * the beginning and one at the end of the new mapping, hence there is a
++ * maximum of two remap operations.
++ *
++ * Analogous to drm_gpuvm_sm_map() drm_gpuvm_sm_unmap() uses &drm_gpuvm_ops to
++ * call back into the driver in order to unmap a range of GPU VA space. The
++ * logic behind this function is way simpler though: For all existent mappings
++ * enclosed by the given range unmap operations are created. For mappings which
++ * are only partically located within the given range, remap operations are
++ * created such that those mappings are split up and re-mapped partically.
++ *
++ * As an alternative to drm_gpuvm_sm_map() and drm_gpuvm_sm_unmap(),
++ * drm_gpuvm_sm_map_ops_create() and drm_gpuvm_sm_unmap_ops_create() can be used
++ * to directly obtain an instance of struct drm_gpuva_ops containing a list of
++ * &drm_gpuva_op, which can be iterated with drm_gpuva_for_each_op(). This list
++ * contains the &drm_gpuva_ops analogous to the callbacks one would receive when
++ * calling drm_gpuvm_sm_map() or drm_gpuvm_sm_unmap(). While this way requires
++ * more memory (to allocate the &drm_gpuva_ops), it provides drivers a way to
++ * iterate the &drm_gpuva_op multiple times, e.g. once in a context where memory
++ * allocations are possible (e.g. to allocate GPU page tables) and once in the
++ * dma-fence signalling critical path.
++ *
++ * To update the &drm_gpuvm's view of the GPU VA space drm_gpuva_insert() and
++ * drm_gpuva_remove() may be used. These functions can safely be used from
++ * &drm_gpuvm_ops callbacks originating from drm_gpuvm_sm_map() or
++ * drm_gpuvm_sm_unmap(). However, it might be more convenient to use the
++ * provided helper functions drm_gpuva_map(), drm_gpuva_remap() and
++ * drm_gpuva_unmap() instead.
++ *
++ * The following diagram depicts the basic relationships of existent GPU VA
++ * mappings, a newly requested mapping and the resulting mappings as implemented
++ * by drm_gpuvm_sm_map() - it doesn't cover any arbitrary combinations of these.
++ *
++ * 1) Requested mapping is identical. Replace it, but indicate the backing PTEs
++ *    could be kept.
++ *
++ *    ::
++ *
++ *	     0     a     1
++ *	old: |-----------| (bo_offset=n)
++ *
++ *	     0     a     1
++ *	req: |-----------| (bo_offset=n)
++ *
++ *	     0     a     1
++ *	new: |-----------| (bo_offset=n)
++ *
++ *
++ * 2) Requested mapping is identical, except for the BO offset, hence replace
++ *    the mapping.
++ *
++ *    ::
++ *
++ *	     0     a     1
++ *	old: |-----------| (bo_offset=n)
++ *
++ *	     0     a     1
++ *	req: |-----------| (bo_offset=m)
++ *
++ *	     0     a     1
++ *	new: |-----------| (bo_offset=m)
++ *
++ *
++ * 3) Requested mapping is identical, except for the backing BO, hence replace
++ *    the mapping.
++ *
++ *    ::
++ *
++ *	     0     a     1
++ *	old: |-----------| (bo_offset=n)
++ *
++ *	     0     b     1
++ *	req: |-----------| (bo_offset=n)
++ *
++ *	     0     b     1
++ *	new: |-----------| (bo_offset=n)
++ *
++ *
++ * 4) Existent mapping is a left aligned subset of the requested one, hence
++ *    replace the existent one.
++ *
++ *    ::
++ *
++ *	     0  a  1
++ *	old: |-----|       (bo_offset=n)
++ *
++ *	     0     a     2
++ *	req: |-----------| (bo_offset=n)
++ *
++ *	     0     a     2
++ *	new: |-----------| (bo_offset=n)
++ *
++ *    .. note::
++ *       We expect to see the same result for a request with a different BO
++ *       and/or non-contiguous BO offset.
++ *
++ *
++ * 5) Requested mapping's range is a left aligned subset of the existent one,
++ *    but backed by a different BO. Hence, map the requested mapping and split
++ *    the existent one adjusting its BO offset.
++ *
++ *    ::
++ *
++ *	     0     a     2
++ *	old: |-----------| (bo_offset=n)
++ *
++ *	     0  b  1
++ *	req: |-----|       (bo_offset=n)
++ *
++ *	     0  b  1  a' 2
++ *	new: |-----|-----| (b.bo_offset=n, a.bo_offset=n+1)
++ *
++ *    .. note::
++ *       We expect to see the same result for a request with a different BO
++ *       and/or non-contiguous BO offset.
++ *
++ *
++ * 6) Existent mapping is a superset of the requested mapping. Split it up, but
++ *    indicate that the backing PTEs could be kept.
++ *
++ *    ::
++ *
++ *	     0     a     2
++ *	old: |-----------| (bo_offset=n)
++ *
++ *	     0  a  1
++ *	req: |-----|       (bo_offset=n)
++ *
++ *	     0  a  1  a' 2
++ *	new: |-----|-----| (a.bo_offset=n, a'.bo_offset=n+1)
++ *
++ *
++ * 7) Requested mapping's range is a right aligned subset of the existent one,
++ *    but backed by a different BO. Hence, map the requested mapping and split
++ *    the existent one, without adjusting the BO offset.
++ *
++ *    ::
++ *
++ *	     0     a     2
++ *	old: |-----------| (bo_offset=n)
++ *
++ *	           1  b  2
++ *	req:       |-----| (bo_offset=m)
++ *
++ *	     0  a  1  b  2
++ *	new: |-----|-----| (a.bo_offset=n,b.bo_offset=m)
++ *
++ *
++ * 8) Existent mapping is a superset of the requested mapping. Split it up, but
++ *    indicate that the backing PTEs could be kept.
++ *
++ *    ::
++ *
++ *	      0     a     2
++ *	old: |-----------| (bo_offset=n)
++ *
++ *	           1  a  2
++ *	req:       |-----| (bo_offset=n+1)
++ *
++ *	     0  a' 1  a  2
++ *	new: |-----|-----| (a'.bo_offset=n, a.bo_offset=n+1)
++ *
++ *
++ * 9) Existent mapping is overlapped at the end by the requested mapping backed
++ *    by a different BO. Hence, map the requested mapping and split up the
++ *    existent one, without adjusting the BO offset.
++ *
++ *    ::
++ *
++ *	     0     a     2
++ *	old: |-----------|       (bo_offset=n)
++ *
++ *	           1     b     3
++ *	req:       |-----------| (bo_offset=m)
++ *
++ *	     0  a  1     b     3
++ *	new: |-----|-----------| (a.bo_offset=n,b.bo_offset=m)
++ *
++ *
++ * 10) Existent mapping is overlapped by the requested mapping, both having the
++ *     same backing BO with a contiguous offset. Indicate the backing PTEs of
++ *     the old mapping could be kept.
++ *
++ *     ::
++ *
++ *	      0     a     2
++ *	 old: |-----------|       (bo_offset=n)
++ *
++ *	            1     a     3
++ *	 req:       |-----------| (bo_offset=n+1)
++ *
++ *	      0  a' 1     a     3
++ *	 new: |-----|-----------| (a'.bo_offset=n, a.bo_offset=n+1)
++ *
++ *
++ * 11) Requested mapping's range is a centered subset of the existent one
++ *     having a different backing BO. Hence, map the requested mapping and split
++ *     up the existent one in two mappings, adjusting the BO offset of the right
++ *     one accordingly.
++ *
++ *     ::
++ *
++ *	      0        a        3
++ *	 old: |-----------------| (bo_offset=n)
++ *
++ *	            1  b  2
++ *	 req:       |-----|       (bo_offset=m)
++ *
++ *	      0  a  1  b  2  a' 3
++ *	 new: |-----|-----|-----| (a.bo_offset=n,b.bo_offset=m,a'.bo_offset=n+2)
++ *
++ *
++ * 12) Requested mapping is a contiguous subset of the existent one. Split it
++ *     up, but indicate that the backing PTEs could be kept.
++ *
++ *     ::
++ *
++ *	      0        a        3
++ *	 old: |-----------------| (bo_offset=n)
++ *
++ *	            1  a  2
++ *	 req:       |-----|       (bo_offset=n+1)
++ *
++ *	      0  a' 1  a  2 a'' 3
++ *	 old: |-----|-----|-----| (a'.bo_offset=n, a.bo_offset=n+1, a''.bo_offset=n+2)
++ *
++ *
++ * 13) Existent mapping is a right aligned subset of the requested one, hence
++ *     replace the existent one.
++ *
++ *     ::
++ *
++ *	            1  a  2
++ *	 old:       |-----| (bo_offset=n+1)
++ *
++ *	      0     a     2
++ *	 req: |-----------| (bo_offset=n)
++ *
++ *	      0     a     2
++ *	 new: |-----------| (bo_offset=n)
++ *
++ *     .. note::
++ *        We expect to see the same result for a request with a different bo
++ *        and/or non-contiguous bo_offset.
++ *
++ *
++ * 14) Existent mapping is a centered subset of the requested one, hence
++ *     replace the existent one.
++ *
++ *     ::
++ *
++ *	            1  a  2
++ *	 old:       |-----| (bo_offset=n+1)
++ *
++ *	      0        a       3
++ *	 req: |----------------| (bo_offset=n)
++ *
++ *	      0        a       3
++ *	 new: |----------------| (bo_offset=n)
++ *
++ *     .. note::
++ *        We expect to see the same result for a request with a different bo
++ *        and/or non-contiguous bo_offset.
++ *
++ *
++ * 15) Existent mappings is overlapped at the beginning by the requested mapping
++ *     backed by a different BO. Hence, map the requested mapping and split up
++ *     the existent one, adjusting its BO offset accordingly.
++ *
++ *     ::
++ *
++ *	            1     a     3
++ *	 old:       |-----------| (bo_offset=n)
++ *
++ *	      0     b     2
++ *	 req: |-----------|       (bo_offset=m)
++ *
++ *	      0     b     2  a' 3
++ *	 new: |-----------|-----| (b.bo_offset=m,a.bo_offset=n+2)
++ */
++
++/**
++ * DOC: Locking
++ *
++ * Generally, the GPU VA manager does not take care of locking itself, it is
++ * the drivers responsibility to take care about locking. Drivers might want to
++ * protect the following operations: inserting, removing and iterating
++ * &drm_gpuva objects as well as generating all kinds of operations, such as
++ * split / merge or prefetch.
++ *
++ * The GPU VA manager also does not take care of the locking of the backing
++ * &drm_gem_object buffers GPU VA lists by itself; drivers are responsible to
++ * enforce mutual exclusion using either the GEMs dma_resv lock or alternatively
++ * a driver specific external lock. For the latter see also
++ * drm_gem_gpuva_set_lock().
++ *
++ * However, the GPU VA manager contains lockdep checks to ensure callers of its
++ * API hold the corresponding lock whenever the &drm_gem_objects GPU VA list is
++ * accessed by functions such as drm_gpuva_link() or drm_gpuva_unlink().
++ */
++
++/**
++ * DOC: Examples
++ *
++ * This section gives two examples on how to let the DRM GPUVA Manager generate
++ * &drm_gpuva_op in order to satisfy a given map or unmap request and how to
++ * make use of them.
++ *
++ * The below code is strictly limited to illustrate the generic usage pattern.
++ * To maintain simplicitly, it doesn't make use of any abstractions for common
++ * code, different (asyncronous) stages with fence signalling critical paths,
++ * any other helpers or error handling in terms of freeing memory and dropping
++ * previously taken locks.
++ *
++ * 1) Obtain a list of &drm_gpuva_op to create a new mapping::
++ *
++ *	// Allocates a new &drm_gpuva.
++ *	struct drm_gpuva * driver_gpuva_alloc(void);
++ *
++ *	// Typically drivers would embedd the &drm_gpuvm and &drm_gpuva
++ *	// structure in individual driver structures and lock the dma-resv with
++ *	// drm_exec or similar helpers.
++ *	int driver_mapping_create(struct drm_gpuvm *gpuvm,
++ *				  u64 addr, u64 range,
++ *				  struct drm_gem_object *obj, u64 offset)
++ *	{
++ *		struct drm_gpuva_ops *ops;
++ *		struct drm_gpuva_op *op
++ *
++ *		driver_lock_va_space();
++ *		ops = drm_gpuvm_sm_map_ops_create(gpuvm, addr, range,
++ *						  obj, offset);
++ *		if (IS_ERR(ops))
++ *			return PTR_ERR(ops);
++ *
++ *		drm_gpuva_for_each_op(op, ops) {
++ *			struct drm_gpuva *va;
++ *
++ *			switch (op->op) {
++ *			case DRM_GPUVA_OP_MAP:
++ *				va = driver_gpuva_alloc();
++ *				if (!va)
++ *					; // unwind previous VA space updates,
++ *					  // free memory and unlock
++ *
++ *				driver_vm_map();
++ *				drm_gpuva_map(gpuvm, va, &op->map);
++ *				drm_gpuva_link(va);
++ *
++ *				break;
++ *			case DRM_GPUVA_OP_REMAP: {
++ *				struct drm_gpuva *prev = NULL, *next = NULL;
++ *
++ *				va = op->remap.unmap->va;
++ *
++ *				if (op->remap.prev) {
++ *					prev = driver_gpuva_alloc();
++ *					if (!prev)
++ *						; // unwind previous VA space
++ *						  // updates, free memory and
++ *						  // unlock
++ *				}
++ *
++ *				if (op->remap.next) {
++ *					next = driver_gpuva_alloc();
++ *					if (!next)
++ *						; // unwind previous VA space
++ *						  // updates, free memory and
++ *						  // unlock
++ *				}
++ *
++ *				driver_vm_remap();
++ *				drm_gpuva_remap(prev, next, &op->remap);
++ *
++ *				drm_gpuva_unlink(va);
++ *				if (prev)
++ *					drm_gpuva_link(prev);
++ *				if (next)
++ *					drm_gpuva_link(next);
++ *
++ *				break;
++ *			}
++ *			case DRM_GPUVA_OP_UNMAP:
++ *				va = op->unmap->va;
++ *
++ *				driver_vm_unmap();
++ *				drm_gpuva_unlink(va);
++ *				drm_gpuva_unmap(&op->unmap);
++ *
++ *				break;
++ *			default:
++ *				break;
++ *			}
++ *		}
++ *		driver_unlock_va_space();
++ *
++ *		return 0;
++ *	}
++ *
++ * 2) Receive a callback for each &drm_gpuva_op to create a new mapping::
++ *
++ *	struct driver_context {
++ *		struct drm_gpuvm *gpuvm;
++ *		struct drm_gpuva *new_va;
++ *		struct drm_gpuva *prev_va;
++ *		struct drm_gpuva *next_va;
++ *	};
++ *
++ *	// ops to pass to drm_gpuvm_init()
++ *	static const struct drm_gpuvm_ops driver_gpuvm_ops = {
++ *		.sm_step_map = driver_gpuva_map,
++ *		.sm_step_remap = driver_gpuva_remap,
++ *		.sm_step_unmap = driver_gpuva_unmap,
++ *	};
++ *
++ *	// Typically drivers would embedd the &drm_gpuvm and &drm_gpuva
++ *	// structure in individual driver structures and lock the dma-resv with
++ *	// drm_exec or similar helpers.
++ *	int driver_mapping_create(struct drm_gpuvm *gpuvm,
++ *				  u64 addr, u64 range,
++ *				  struct drm_gem_object *obj, u64 offset)
++ *	{
++ *		struct driver_context ctx;
++ *		struct drm_gpuva_ops *ops;
++ *		struct drm_gpuva_op *op;
++ *		int ret = 0;
++ *
++ *		ctx.gpuvm = gpuvm;
++ *
++ *		ctx.new_va = kzalloc(sizeof(*ctx.new_va), GFP_KERNEL);
++ *		ctx.prev_va = kzalloc(sizeof(*ctx.prev_va), GFP_KERNEL);
++ *		ctx.next_va = kzalloc(sizeof(*ctx.next_va), GFP_KERNEL);
++ *		if (!ctx.new_va || !ctx.prev_va || !ctx.next_va) {
++ *			ret = -ENOMEM;
++ *			goto out;
++ *		}
++ *
++ *		driver_lock_va_space();
++ *		ret = drm_gpuvm_sm_map(gpuvm, &ctx, addr, range, obj, offset);
++ *		driver_unlock_va_space();
++ *
++ *	out:
++ *		kfree(ctx.new_va);
++ *		kfree(ctx.prev_va);
++ *		kfree(ctx.next_va);
++ *		return ret;
++ *	}
++ *
++ *	int driver_gpuva_map(struct drm_gpuva_op *op, void *__ctx)
++ *	{
++ *		struct driver_context *ctx = __ctx;
++ *
++ *		drm_gpuva_map(ctx->vm, ctx->new_va, &op->map);
++ *
++ *		drm_gpuva_link(ctx->new_va);
++ *
++ *		// prevent the new GPUVA from being freed in
++ *		// driver_mapping_create()
++ *		ctx->new_va = NULL;
++ *
++ *		return 0;
++ *	}
++ *
++ *	int driver_gpuva_remap(struct drm_gpuva_op *op, void *__ctx)
++ *	{
++ *		struct driver_context *ctx = __ctx;
++ *
++ *		drm_gpuva_remap(ctx->prev_va, ctx->next_va, &op->remap);
++ *
++ *		drm_gpuva_unlink(op->remap.unmap->va);
++ *		kfree(op->remap.unmap->va);
++ *
++ *		if (op->remap.prev) {
++ *			drm_gpuva_link(ctx->prev_va);
++ *			ctx->prev_va = NULL;
++ *		}
++ *
++ *		if (op->remap.next) {
++ *			drm_gpuva_link(ctx->next_va);
++ *			ctx->next_va = NULL;
++ *		}
++ *
++ *		return 0;
++ *	}
++ *
++ *	int driver_gpuva_unmap(struct drm_gpuva_op *op, void *__ctx)
++ *	{
++ *		drm_gpuva_unlink(op->unmap.va);
++ *		drm_gpuva_unmap(&op->unmap);
++ *		kfree(op->unmap.va);
++ *
++ *		return 0;
++ *	}
++ */
++
++#define to_drm_gpuva(__node)	container_of((__node), struct drm_gpuva, rb.node)
++
++#define GPUVA_START(node) ((node)->va.addr)
++#define GPUVA_LAST(node) ((node)->va.addr + (node)->va.range - 1)
++
++/* We do not actually use drm_gpuva_it_next(), tell the compiler to not complain
++ * about this.
++ */
++INTERVAL_TREE_DEFINE(struct drm_gpuva, rb.node, u64, rb.__subtree_last,
++		     GPUVA_START, GPUVA_LAST, static __maybe_unused,
++		     drm_gpuva_it)
++
++static int __drm_gpuva_insert(struct drm_gpuvm *gpuvm,
++			      struct drm_gpuva *va);
++static void __drm_gpuva_remove(struct drm_gpuva *va);
++
++static bool
++drm_gpuvm_check_overflow(u64 addr, u64 range)
++{
++	u64 end;
++
++	return WARN(check_add_overflow(addr, range, &end),
++		    "GPUVA address limited to %zu bytes.\n", sizeof(end));
++}
++
++static bool
++drm_gpuvm_in_mm_range(struct drm_gpuvm *gpuvm, u64 addr, u64 range)
++{
++	u64 end = addr + range;
++	u64 mm_start = gpuvm->mm_start;
++	u64 mm_end = mm_start + gpuvm->mm_range;
++
++	return addr >= mm_start && end <= mm_end;
++}
++
++static bool
++drm_gpuvm_in_kernel_node(struct drm_gpuvm *gpuvm, u64 addr, u64 range)
++{
++	u64 end = addr + range;
++	u64 kstart = gpuvm->kernel_alloc_node.va.addr;
++	u64 krange = gpuvm->kernel_alloc_node.va.range;
++	u64 kend = kstart + krange;
++
++	return krange && addr < kend && kstart < end;
++}
++
++static bool
++drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm,
++		      u64 addr, u64 range)
++{
++	return !drm_gpuvm_check_overflow(addr, range) &&
++	       drm_gpuvm_in_mm_range(gpuvm, addr, range) &&
++	       !drm_gpuvm_in_kernel_node(gpuvm, addr, range);
++}
++
++/**
++ * drm_gpuvm_init() - initialize a &drm_gpuvm
++ * @gpuvm: pointer to the &drm_gpuvm to initialize
++ * @name: the name of the GPU VA space
++ * @start_offset: the start offset of the GPU VA space
++ * @range: the size of the GPU VA space
++ * @reserve_offset: the start of the kernel reserved GPU VA area
++ * @reserve_range: the size of the kernel reserved GPU VA area
++ * @ops: &drm_gpuvm_ops called on &drm_gpuvm_sm_map / &drm_gpuvm_sm_unmap
++ *
++ * The &drm_gpuvm must be initialized with this function before use.
++ *
++ * Note that @gpuvm must be cleared to 0 before calling this function. The given
++ * &name is expected to be managed by the surrounding driver structures.
++ */
++void
++drm_gpuvm_init(struct drm_gpuvm *gpuvm,
++	       const char *name,
++	       u64 start_offset, u64 range,
++	       u64 reserve_offset, u64 reserve_range,
++	       const struct drm_gpuvm_ops *ops)
++{
++	gpuvm->rb.tree = RB_ROOT_CACHED;
++	INIT_LIST_HEAD(&gpuvm->rb.list);
++
++	drm_gpuvm_check_overflow(start_offset, range);
++	gpuvm->mm_start = start_offset;
++	gpuvm->mm_range = range;
++
++	gpuvm->name = name ? name : "unknown";
++	gpuvm->ops = ops;
++
++	memset(&gpuvm->kernel_alloc_node, 0, sizeof(struct drm_gpuva));
++
++	if (reserve_range) {
++		gpuvm->kernel_alloc_node.va.addr = reserve_offset;
++		gpuvm->kernel_alloc_node.va.range = reserve_range;
++
++		if (likely(!drm_gpuvm_check_overflow(reserve_offset,
++						     reserve_range)))
++			__drm_gpuva_insert(gpuvm, &gpuvm->kernel_alloc_node);
++	}
++}
++EXPORT_SYMBOL_GPL(drm_gpuvm_init);
++
++/**
++ * drm_gpuvm_destroy() - cleanup a &drm_gpuvm
++ * @gpuvm: pointer to the &drm_gpuvm to clean up
++ *
++ * Note that it is a bug to call this function on a manager that still
++ * holds GPU VA mappings.
++ */
++void
++drm_gpuvm_destroy(struct drm_gpuvm *gpuvm)
++{
++	gpuvm->name = NULL;
++
++	if (gpuvm->kernel_alloc_node.va.range)
++		__drm_gpuva_remove(&gpuvm->kernel_alloc_node);
++
++	WARN(!RB_EMPTY_ROOT(&gpuvm->rb.tree.rb_root),
++	     "GPUVA tree is not empty, potentially leaking memory.");
++}
++EXPORT_SYMBOL_GPL(drm_gpuvm_destroy);
++
++static int
++__drm_gpuva_insert(struct drm_gpuvm *gpuvm,
++		   struct drm_gpuva *va)
++{
++	struct rb_node *node;
++	struct list_head *head;
++
++	if (drm_gpuva_it_iter_first(&gpuvm->rb.tree,
++				    GPUVA_START(va),
++				    GPUVA_LAST(va)))
++		return -EEXIST;
++
++	va->vm = gpuvm;
++
++	drm_gpuva_it_insert(va, &gpuvm->rb.tree);
++
++	node = rb_prev(&va->rb.node);
++	if (node)
++		head = &(to_drm_gpuva(node))->rb.entry;
++	else
++		head = &gpuvm->rb.list;
++
++	list_add(&va->rb.entry, head);
++
++	return 0;
++}
++
++/**
++ * drm_gpuva_insert() - insert a &drm_gpuva
++ * @gpuvm: the &drm_gpuvm to insert the &drm_gpuva in
++ * @va: the &drm_gpuva to insert
++ *
++ * Insert a &drm_gpuva with a given address and range into a
++ * &drm_gpuvm.
++ *
++ * It is safe to use this function using the safe versions of iterating the GPU
++ * VA space, such as drm_gpuvm_for_each_va_safe() and
++ * drm_gpuvm_for_each_va_range_safe().
++ *
++ * Returns: 0 on success, negative error code on failure.
++ */
++int
++drm_gpuva_insert(struct drm_gpuvm *gpuvm,
++		 struct drm_gpuva *va)
++{
++	u64 addr = va->va.addr;
++	u64 range = va->va.range;
++
++	if (unlikely(!drm_gpuvm_range_valid(gpuvm, addr, range)))
++		return -EINVAL;
++
++	return __drm_gpuva_insert(gpuvm, va);
++}
++EXPORT_SYMBOL_GPL(drm_gpuva_insert);
++
++static void
++__drm_gpuva_remove(struct drm_gpuva *va)
++{
++	drm_gpuva_it_remove(va, &va->vm->rb.tree);
++	list_del_init(&va->rb.entry);
++}
++
++/**
++ * drm_gpuva_remove() - remove a &drm_gpuva
++ * @va: the &drm_gpuva to remove
++ *
++ * This removes the given &va from the underlaying tree.
++ *
++ * It is safe to use this function using the safe versions of iterating the GPU
++ * VA space, such as drm_gpuvm_for_each_va_safe() and
++ * drm_gpuvm_for_each_va_range_safe().
++ */
++void
++drm_gpuva_remove(struct drm_gpuva *va)
++{
++	struct drm_gpuvm *gpuvm = va->vm;
++
++	if (unlikely(va == &gpuvm->kernel_alloc_node)) {
++		WARN(1, "Can't destroy kernel reserved node.\n");
++		return;
++	}
++
++	__drm_gpuva_remove(va);
++}
++EXPORT_SYMBOL_GPL(drm_gpuva_remove);
++
++/**
++ * drm_gpuva_link() - link a &drm_gpuva
++ * @va: the &drm_gpuva to link
++ *
++ * This adds the given &va to the GPU VA list of the &drm_gem_object it is
++ * associated with.
++ *
++ * This function expects the caller to protect the GEM's GPUVA list against
++ * concurrent access using the GEMs dma_resv lock.
++ */
++void
++drm_gpuva_link(struct drm_gpuva *va)
++{
++	struct drm_gem_object *obj = va->gem.obj;
++
++	if (unlikely(!obj))
++		return;
++
++	drm_gem_gpuva_assert_lock_held(obj);
++
++	list_add_tail(&va->gem.entry, &obj->gpuva.list);
++}
++EXPORT_SYMBOL_GPL(drm_gpuva_link);
++
++/**
++ * drm_gpuva_unlink() - unlink a &drm_gpuva
++ * @va: the &drm_gpuva to unlink
++ *
++ * This removes the given &va from the GPU VA list of the &drm_gem_object it is
++ * associated with.
++ *
++ * This function expects the caller to protect the GEM's GPUVA list against
++ * concurrent access using the GEMs dma_resv lock.
++ */
++void
++drm_gpuva_unlink(struct drm_gpuva *va)
++{
++	struct drm_gem_object *obj = va->gem.obj;
++
++	if (unlikely(!obj))
++		return;
++
++	drm_gem_gpuva_assert_lock_held(obj);
++
++	list_del_init(&va->gem.entry);
++}
++EXPORT_SYMBOL_GPL(drm_gpuva_unlink);
++
++/**
++ * drm_gpuva_find_first() - find the first &drm_gpuva in the given range
++ * @gpuvm: the &drm_gpuvm to search in
++ * @addr: the &drm_gpuvas address
++ * @range: the &drm_gpuvas range
++ *
++ * Returns: the first &drm_gpuva within the given range
++ */
++struct drm_gpuva *
++drm_gpuva_find_first(struct drm_gpuvm *gpuvm,
++		     u64 addr, u64 range)
++{
++	u64 last = addr + range - 1;
++
++	return drm_gpuva_it_iter_first(&gpuvm->rb.tree, addr, last);
++}
++EXPORT_SYMBOL_GPL(drm_gpuva_find_first);
++
++/**
++ * drm_gpuva_find() - find a &drm_gpuva
++ * @gpuvm: the &drm_gpuvm to search in
++ * @addr: the &drm_gpuvas address
++ * @range: the &drm_gpuvas range
++ *
++ * Returns: the &drm_gpuva at a given &addr and with a given &range
++ */
++struct drm_gpuva *
++drm_gpuva_find(struct drm_gpuvm *gpuvm,
++	       u64 addr, u64 range)
++{
++	struct drm_gpuva *va;
++
++	va = drm_gpuva_find_first(gpuvm, addr, range);
++	if (!va)
++		goto out;
++
++	if (va->va.addr != addr ||
++	    va->va.range != range)
++		goto out;
++
++	return va;
++
++out:
++	return NULL;
++}
++EXPORT_SYMBOL_GPL(drm_gpuva_find);
++
++/**
++ * drm_gpuva_find_prev() - find the &drm_gpuva before the given address
++ * @gpuvm: the &drm_gpuvm to search in
++ * @start: the given GPU VA's start address
++ *
++ * Find the adjacent &drm_gpuva before the GPU VA with given &start address.
++ *
++ * Note that if there is any free space between the GPU VA mappings no mapping
++ * is returned.
++ *
++ * Returns: a pointer to the found &drm_gpuva or NULL if none was found
++ */
++struct drm_gpuva *
++drm_gpuva_find_prev(struct drm_gpuvm *gpuvm, u64 start)
++{
++	if (!drm_gpuvm_range_valid(gpuvm, start - 1, 1))
++		return NULL;
++
++	return drm_gpuva_it_iter_first(&gpuvm->rb.tree, start - 1, start);
++}
++EXPORT_SYMBOL_GPL(drm_gpuva_find_prev);
++
++/**
++ * drm_gpuva_find_next() - find the &drm_gpuva after the given address
++ * @gpuvm: the &drm_gpuvm to search in
++ * @end: the given GPU VA's end address
++ *
++ * Find the adjacent &drm_gpuva after the GPU VA with given &end address.
++ *
++ * Note that if there is any free space between the GPU VA mappings no mapping
++ * is returned.
++ *
++ * Returns: a pointer to the found &drm_gpuva or NULL if none was found
++ */
++struct drm_gpuva *
++drm_gpuva_find_next(struct drm_gpuvm *gpuvm, u64 end)
++{
++	if (!drm_gpuvm_range_valid(gpuvm, end, 1))
++		return NULL;
++
++	return drm_gpuva_it_iter_first(&gpuvm->rb.tree, end, end + 1);
++}
++EXPORT_SYMBOL_GPL(drm_gpuva_find_next);
++
++/**
++ * drm_gpuvm_interval_empty() - indicate whether a given interval of the VA space
++ * is empty
++ * @gpuvm: the &drm_gpuvm to check the range for
++ * @addr: the start address of the range
++ * @range: the range of the interval
++ *
++ * Returns: true if the interval is empty, false otherwise
++ */
++bool
++drm_gpuvm_interval_empty(struct drm_gpuvm *gpuvm, u64 addr, u64 range)
++{
++	return !drm_gpuva_find_first(gpuvm, addr, range);
++}
++EXPORT_SYMBOL_GPL(drm_gpuvm_interval_empty);
++
++/**
++ * drm_gpuva_map() - helper to insert a &drm_gpuva according to a
++ * &drm_gpuva_op_map
++ * @gpuvm: the &drm_gpuvm
++ * @va: the &drm_gpuva to insert
++ * @op: the &drm_gpuva_op_map to initialize @va with
++ *
++ * Initializes the @va from the @op and inserts it into the given @gpuvm.
++ */
++void
++drm_gpuva_map(struct drm_gpuvm *gpuvm,
++	      struct drm_gpuva *va,
++	      struct drm_gpuva_op_map *op)
++{
++	drm_gpuva_init_from_op(va, op);
++	drm_gpuva_insert(gpuvm, va);
++}
++EXPORT_SYMBOL_GPL(drm_gpuva_map);
++
++/**
++ * drm_gpuva_remap() - helper to remap a &drm_gpuva according to a
++ * &drm_gpuva_op_remap
++ * @prev: the &drm_gpuva to remap when keeping the start of a mapping
++ * @next: the &drm_gpuva to remap when keeping the end of a mapping
++ * @op: the &drm_gpuva_op_remap to initialize @prev and @next with
++ *
++ * Removes the currently mapped &drm_gpuva and remaps it using @prev and/or
++ * @next.
++ */
++void
++drm_gpuva_remap(struct drm_gpuva *prev,
++		struct drm_gpuva *next,
++		struct drm_gpuva_op_remap *op)
++{
++	struct drm_gpuva *curr = op->unmap->va;
++	struct drm_gpuvm *gpuvm = curr->vm;
++
++	drm_gpuva_remove(curr);
++
++	if (op->prev) {
++		drm_gpuva_init_from_op(prev, op->prev);
++		drm_gpuva_insert(gpuvm, prev);
++	}
++
++	if (op->next) {
++		drm_gpuva_init_from_op(next, op->next);
++		drm_gpuva_insert(gpuvm, next);
++	}
++}
++EXPORT_SYMBOL_GPL(drm_gpuva_remap);
++
++/**
++ * drm_gpuva_unmap() - helper to remove a &drm_gpuva according to a
++ * &drm_gpuva_op_unmap
++ * @op: the &drm_gpuva_op_unmap specifying the &drm_gpuva to remove
++ *
++ * Removes the &drm_gpuva associated with the &drm_gpuva_op_unmap.
++ */
++void
++drm_gpuva_unmap(struct drm_gpuva_op_unmap *op)
++{
++	drm_gpuva_remove(op->va);
++}
++EXPORT_SYMBOL_GPL(drm_gpuva_unmap);
++
++static int
++op_map_cb(const struct drm_gpuvm_ops *fn, void *priv,
++	  u64 addr, u64 range,
++	  struct drm_gem_object *obj, u64 offset)
++{
++	struct drm_gpuva_op op = {};
++
++	op.op = DRM_GPUVA_OP_MAP;
++	op.map.va.addr = addr;
++	op.map.va.range = range;
++	op.map.gem.obj = obj;
++	op.map.gem.offset = offset;
++
++	return fn->sm_step_map(&op, priv);
++}
++
++static int
++op_remap_cb(const struct drm_gpuvm_ops *fn, void *priv,
++	    struct drm_gpuva_op_map *prev,
++	    struct drm_gpuva_op_map *next,
++	    struct drm_gpuva_op_unmap *unmap)
++{
++	struct drm_gpuva_op op = {};
++	struct drm_gpuva_op_remap *r;
++
++	op.op = DRM_GPUVA_OP_REMAP;
++	r = &op.remap;
++	r->prev = prev;
++	r->next = next;
++	r->unmap = unmap;
++
++	return fn->sm_step_remap(&op, priv);
++}
++
++static int
++op_unmap_cb(const struct drm_gpuvm_ops *fn, void *priv,
++	    struct drm_gpuva *va, bool merge)
++{
++	struct drm_gpuva_op op = {};
++
++	op.op = DRM_GPUVA_OP_UNMAP;
++	op.unmap.va = va;
++	op.unmap.keep = merge;
++
++	return fn->sm_step_unmap(&op, priv);
++}
++
++static int
++__drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm,
++		   const struct drm_gpuvm_ops *ops, void *priv,
++		   u64 req_addr, u64 req_range,
++		   struct drm_gem_object *req_obj, u64 req_offset)
++{
++	struct drm_gpuva *va, *next;
++	u64 req_end = req_addr + req_range;
++	int ret;
++
++	if (unlikely(!drm_gpuvm_range_valid(gpuvm, req_addr, req_range)))
++		return -EINVAL;
++
++	drm_gpuvm_for_each_va_range_safe(va, next, gpuvm, req_addr, req_end) {
++		struct drm_gem_object *obj = va->gem.obj;
++		u64 offset = va->gem.offset;
++		u64 addr = va->va.addr;
++		u64 range = va->va.range;
++		u64 end = addr + range;
++		bool merge = !!va->gem.obj;
++
++		if (addr == req_addr) {
++			merge &= obj == req_obj &&
++				 offset == req_offset;
++
++			if (end == req_end) {
++				ret = op_unmap_cb(ops, priv, va, merge);
++				if (ret)
++					return ret;
++				break;
++			}
++
++			if (end < req_end) {
++				ret = op_unmap_cb(ops, priv, va, merge);
++				if (ret)
++					return ret;
++				continue;
++			}
++
++			if (end > req_end) {
++				struct drm_gpuva_op_map n = {
++					.va.addr = req_end,
++					.va.range = range - req_range,
++					.gem.obj = obj,
++					.gem.offset = offset + req_range,
++				};
++				struct drm_gpuva_op_unmap u = {
++					.va = va,
++					.keep = merge,
++				};
++
++				ret = op_remap_cb(ops, priv, NULL, &n, &u);
++				if (ret)
++					return ret;
++				break;
++			}
++		} else if (addr < req_addr) {
++			u64 ls_range = req_addr - addr;
++			struct drm_gpuva_op_map p = {
++				.va.addr = addr,
++				.va.range = ls_range,
++				.gem.obj = obj,
++				.gem.offset = offset,
++			};
++			struct drm_gpuva_op_unmap u = { .va = va };
++
++			merge &= obj == req_obj &&
++				 offset + ls_range == req_offset;
++			u.keep = merge;
++
++			if (end == req_end) {
++				ret = op_remap_cb(ops, priv, &p, NULL, &u);
++				if (ret)
++					return ret;
++				break;
++			}
++
++			if (end < req_end) {
++				ret = op_remap_cb(ops, priv, &p, NULL, &u);
++				if (ret)
++					return ret;
++				continue;
++			}
++
++			if (end > req_end) {
++				struct drm_gpuva_op_map n = {
++					.va.addr = req_end,
++					.va.range = end - req_end,
++					.gem.obj = obj,
++					.gem.offset = offset + ls_range +
++						      req_range,
++				};
++
++				ret = op_remap_cb(ops, priv, &p, &n, &u);
++				if (ret)
++					return ret;
++				break;
++			}
++		} else if (addr > req_addr) {
++			merge &= obj == req_obj &&
++				 offset == req_offset +
++					   (addr - req_addr);
++
++			if (end == req_end) {
++				ret = op_unmap_cb(ops, priv, va, merge);
++				if (ret)
++					return ret;
++				break;
++			}
++
++			if (end < req_end) {
++				ret = op_unmap_cb(ops, priv, va, merge);
++				if (ret)
++					return ret;
++				continue;
++			}
++
++			if (end > req_end) {
++				struct drm_gpuva_op_map n = {
++					.va.addr = req_end,
++					.va.range = end - req_end,
++					.gem.obj = obj,
++					.gem.offset = offset + req_end - addr,
++				};
++				struct drm_gpuva_op_unmap u = {
++					.va = va,
++					.keep = merge,
++				};
++
++				ret = op_remap_cb(ops, priv, NULL, &n, &u);
++				if (ret)
++					return ret;
++				break;
++			}
++		}
++	}
++
++	return op_map_cb(ops, priv,
++			 req_addr, req_range,
++			 req_obj, req_offset);
++}
++
++static int
++__drm_gpuvm_sm_unmap(struct drm_gpuvm *gpuvm,
++		     const struct drm_gpuvm_ops *ops, void *priv,
++		     u64 req_addr, u64 req_range)
++{
++	struct drm_gpuva *va, *next;
++	u64 req_end = req_addr + req_range;
++	int ret;
++
++	if (unlikely(!drm_gpuvm_range_valid(gpuvm, req_addr, req_range)))
++		return -EINVAL;
++
++	drm_gpuvm_for_each_va_range_safe(va, next, gpuvm, req_addr, req_end) {
++		struct drm_gpuva_op_map prev = {}, next = {};
++		bool prev_split = false, next_split = false;
++		struct drm_gem_object *obj = va->gem.obj;
++		u64 offset = va->gem.offset;
++		u64 addr = va->va.addr;
++		u64 range = va->va.range;
++		u64 end = addr + range;
++
++		if (addr < req_addr) {
++			prev.va.addr = addr;
++			prev.va.range = req_addr - addr;
++			prev.gem.obj = obj;
++			prev.gem.offset = offset;
++
++			prev_split = true;
++		}
++
++		if (end > req_end) {
++			next.va.addr = req_end;
++			next.va.range = end - req_end;
++			next.gem.obj = obj;
++			next.gem.offset = offset + (req_end - addr);
++
++			next_split = true;
++		}
++
++		if (prev_split || next_split) {
++			struct drm_gpuva_op_unmap unmap = { .va = va };
++
++			ret = op_remap_cb(ops, priv,
++					  prev_split ? &prev : NULL,
++					  next_split ? &next : NULL,
++					  &unmap);
++			if (ret)
++				return ret;
++		} else {
++			ret = op_unmap_cb(ops, priv, va, false);
++			if (ret)
++				return ret;
++		}
++	}
++
++	return 0;
++}
++
++/**
++ * drm_gpuvm_sm_map() - creates the &drm_gpuva_op split/merge steps
++ * @gpuvm: the &drm_gpuvm representing the GPU VA space
++ * @req_addr: the start address of the new mapping
++ * @req_range: the range of the new mapping
++ * @req_obj: the &drm_gem_object to map
++ * @req_offset: the offset within the &drm_gem_object
++ * @priv: pointer to a driver private data structure
++ *
++ * This function iterates the given range of the GPU VA space. It utilizes the
++ * &drm_gpuvm_ops to call back into the driver providing the split and merge
++ * steps.
++ *
++ * Drivers may use these callbacks to update the GPU VA space right away within
++ * the callback. In case the driver decides to copy and store the operations for
++ * later processing neither this function nor &drm_gpuvm_sm_unmap is allowed to
++ * be called before the &drm_gpuvm's view of the GPU VA space was
++ * updated with the previous set of operations. To update the
++ * &drm_gpuvm's view of the GPU VA space drm_gpuva_insert(),
++ * drm_gpuva_destroy_locked() and/or drm_gpuva_destroy_unlocked() should be
++ * used.
++ *
++ * A sequence of callbacks can contain map, unmap and remap operations, but
++ * the sequence of callbacks might also be empty if no operation is required,
++ * e.g. if the requested mapping already exists in the exact same way.
++ *
++ * There can be an arbitrary amount of unmap operations, a maximum of two remap
++ * operations and a single map operation. The latter one represents the original
++ * map operation requested by the caller.
++ *
++ * Returns: 0 on success or a negative error code
++ */
++int
++drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, void *priv,
++		 u64 req_addr, u64 req_range,
++		 struct drm_gem_object *req_obj, u64 req_offset)
++{
++	const struct drm_gpuvm_ops *ops = gpuvm->ops;
++
++	if (unlikely(!(ops && ops->sm_step_map &&
++		       ops->sm_step_remap &&
++		       ops->sm_step_unmap)))
++		return -EINVAL;
++
++	return __drm_gpuvm_sm_map(gpuvm, ops, priv,
++				  req_addr, req_range,
++				  req_obj, req_offset);
++}
++EXPORT_SYMBOL_GPL(drm_gpuvm_sm_map);
++
++/**
++ * drm_gpuvm_sm_unmap() - creates the &drm_gpuva_ops to split on unmap
++ * @gpuvm: the &drm_gpuvm representing the GPU VA space
++ * @priv: pointer to a driver private data structure
++ * @req_addr: the start address of the range to unmap
++ * @req_range: the range of the mappings to unmap
++ *
++ * This function iterates the given range of the GPU VA space. It utilizes the
++ * &drm_gpuvm_ops to call back into the driver providing the operations to
++ * unmap and, if required, split existent mappings.
++ *
++ * Drivers may use these callbacks to update the GPU VA space right away within
++ * the callback. In case the driver decides to copy and store the operations for
++ * later processing neither this function nor &drm_gpuvm_sm_map is allowed to be
++ * called before the &drm_gpuvm's view of the GPU VA space was updated
++ * with the previous set of operations. To update the &drm_gpuvm's view
++ * of the GPU VA space drm_gpuva_insert(), drm_gpuva_destroy_locked() and/or
++ * drm_gpuva_destroy_unlocked() should be used.
++ *
++ * A sequence of callbacks can contain unmap and remap operations, depending on
++ * whether there are actual overlapping mappings to split.
++ *
++ * There can be an arbitrary amount of unmap operations and a maximum of two
++ * remap operations.
++ *
++ * Returns: 0 on success or a negative error code
++ */
++int
++drm_gpuvm_sm_unmap(struct drm_gpuvm *gpuvm, void *priv,
++		   u64 req_addr, u64 req_range)
++{
++	const struct drm_gpuvm_ops *ops = gpuvm->ops;
++
++	if (unlikely(!(ops && ops->sm_step_remap &&
++		       ops->sm_step_unmap)))
++		return -EINVAL;
++
++	return __drm_gpuvm_sm_unmap(gpuvm, ops, priv,
++				    req_addr, req_range);
++}
++EXPORT_SYMBOL_GPL(drm_gpuvm_sm_unmap);
++
++static struct drm_gpuva_op *
++gpuva_op_alloc(struct drm_gpuvm *gpuvm)
++{
++	const struct drm_gpuvm_ops *fn = gpuvm->ops;
++	struct drm_gpuva_op *op;
++
++	if (fn && fn->op_alloc)
++		op = fn->op_alloc();
++	else
++		op = kzalloc(sizeof(*op), GFP_KERNEL);
++
++	if (unlikely(!op))
++		return NULL;
++
++	return op;
++}
++
++static void
++gpuva_op_free(struct drm_gpuvm *gpuvm,
++	      struct drm_gpuva_op *op)
++{
++	const struct drm_gpuvm_ops *fn = gpuvm->ops;
++
++	if (fn && fn->op_free)
++		fn->op_free(op);
++	else
++		kfree(op);
++}
++
++static int
++drm_gpuva_sm_step(struct drm_gpuva_op *__op,
++		  void *priv)
++{
++	struct {
++		struct drm_gpuvm *vm;
++		struct drm_gpuva_ops *ops;
++	} *args = priv;
++	struct drm_gpuvm *gpuvm = args->vm;
++	struct drm_gpuva_ops *ops = args->ops;
++	struct drm_gpuva_op *op;
++
++	op = gpuva_op_alloc(gpuvm);
++	if (unlikely(!op))
++		goto err;
++
++	memcpy(op, __op, sizeof(*op));
++
++	if (op->op == DRM_GPUVA_OP_REMAP) {
++		struct drm_gpuva_op_remap *__r = &__op->remap;
++		struct drm_gpuva_op_remap *r = &op->remap;
++
++		r->unmap = kmemdup(__r->unmap, sizeof(*r->unmap),
++				   GFP_KERNEL);
++		if (unlikely(!r->unmap))
++			goto err_free_op;
++
++		if (__r->prev) {
++			r->prev = kmemdup(__r->prev, sizeof(*r->prev),
++					  GFP_KERNEL);
++			if (unlikely(!r->prev))
++				goto err_free_unmap;
++		}
++
++		if (__r->next) {
++			r->next = kmemdup(__r->next, sizeof(*r->next),
++					  GFP_KERNEL);
++			if (unlikely(!r->next))
++				goto err_free_prev;
++		}
++	}
++
++	list_add_tail(&op->entry, &ops->list);
++
++	return 0;
++
++err_free_unmap:
++	kfree(op->remap.unmap);
++err_free_prev:
++	kfree(op->remap.prev);
++err_free_op:
++	gpuva_op_free(gpuvm, op);
++err:
++	return -ENOMEM;
++}
++
++static const struct drm_gpuvm_ops gpuvm_list_ops = {
++	.sm_step_map = drm_gpuva_sm_step,
++	.sm_step_remap = drm_gpuva_sm_step,
++	.sm_step_unmap = drm_gpuva_sm_step,
++};
++
++/**
++ * drm_gpuvm_sm_map_ops_create() - creates the &drm_gpuva_ops to split and merge
++ * @gpuvm: the &drm_gpuvm representing the GPU VA space
++ * @req_addr: the start address of the new mapping
++ * @req_range: the range of the new mapping
++ * @req_obj: the &drm_gem_object to map
++ * @req_offset: the offset within the &drm_gem_object
++ *
++ * This function creates a list of operations to perform splitting and merging
++ * of existent mapping(s) with the newly requested one.
++ *
++ * The list can be iterated with &drm_gpuva_for_each_op and must be processed
++ * in the given order. It can contain map, unmap and remap operations, but it
++ * also can be empty if no operation is required, e.g. if the requested mapping
++ * already exists is the exact same way.
++ *
++ * There can be an arbitrary amount of unmap operations, a maximum of two remap
++ * operations and a single map operation. The latter one represents the original
++ * map operation requested by the caller.
++ *
++ * Note that before calling this function again with another mapping request it
++ * is necessary to update the &drm_gpuvm's view of the GPU VA space. The
++ * previously obtained operations must be either processed or abandoned. To
++ * update the &drm_gpuvm's view of the GPU VA space drm_gpuva_insert(),
++ * drm_gpuva_destroy_locked() and/or drm_gpuva_destroy_unlocked() should be
++ * used.
++ *
++ * After the caller finished processing the returned &drm_gpuva_ops, they must
++ * be freed with &drm_gpuva_ops_free.
++ *
++ * Returns: a pointer to the &drm_gpuva_ops on success, an ERR_PTR on failure
++ */
++struct drm_gpuva_ops *
++drm_gpuvm_sm_map_ops_create(struct drm_gpuvm *gpuvm,
++			    u64 req_addr, u64 req_range,
++			    struct drm_gem_object *req_obj, u64 req_offset)
++{
++	struct drm_gpuva_ops *ops;
++	struct {
++		struct drm_gpuvm *vm;
++		struct drm_gpuva_ops *ops;
++	} args;
++	int ret;
++
++	ops = kzalloc(sizeof(*ops), GFP_KERNEL);
++	if (unlikely(!ops))
++		return ERR_PTR(-ENOMEM);
++
++	INIT_LIST_HEAD(&ops->list);
++
++	args.vm = gpuvm;
++	args.ops = ops;
++
++	ret = __drm_gpuvm_sm_map(gpuvm, &gpuvm_list_ops, &args,
++				 req_addr, req_range,
++				 req_obj, req_offset);
++	if (ret)
++		goto err_free_ops;
++
++	return ops;
++
++err_free_ops:
++	drm_gpuva_ops_free(gpuvm, ops);
++	return ERR_PTR(ret);
++}
++EXPORT_SYMBOL_GPL(drm_gpuvm_sm_map_ops_create);
++
++/**
++ * drm_gpuvm_sm_unmap_ops_create() - creates the &drm_gpuva_ops to split on
++ * unmap
++ * @gpuvm: the &drm_gpuvm representing the GPU VA space
++ * @req_addr: the start address of the range to unmap
++ * @req_range: the range of the mappings to unmap
++ *
++ * This function creates a list of operations to perform unmapping and, if
++ * required, splitting of the mappings overlapping the unmap range.
++ *
++ * The list can be iterated with &drm_gpuva_for_each_op and must be processed
++ * in the given order. It can contain unmap and remap operations, depending on
++ * whether there are actual overlapping mappings to split.
++ *
++ * There can be an arbitrary amount of unmap operations and a maximum of two
++ * remap operations.
++ *
++ * Note that before calling this function again with another range to unmap it
++ * is necessary to update the &drm_gpuvm's view of the GPU VA space. The
++ * previously obtained operations must be processed or abandoned. To update the
++ * &drm_gpuvm's view of the GPU VA space drm_gpuva_insert(),
++ * drm_gpuva_destroy_locked() and/or drm_gpuva_destroy_unlocked() should be
++ * used.
++ *
++ * After the caller finished processing the returned &drm_gpuva_ops, they must
++ * be freed with &drm_gpuva_ops_free.
++ *
++ * Returns: a pointer to the &drm_gpuva_ops on success, an ERR_PTR on failure
++ */
++struct drm_gpuva_ops *
++drm_gpuvm_sm_unmap_ops_create(struct drm_gpuvm *gpuvm,
++			      u64 req_addr, u64 req_range)
++{
++	struct drm_gpuva_ops *ops;
++	struct {
++		struct drm_gpuvm *vm;
++		struct drm_gpuva_ops *ops;
++	} args;
++	int ret;
++
++	ops = kzalloc(sizeof(*ops), GFP_KERNEL);
++	if (unlikely(!ops))
++		return ERR_PTR(-ENOMEM);
++
++	INIT_LIST_HEAD(&ops->list);
++
++	args.vm = gpuvm;
++	args.ops = ops;
++
++	ret = __drm_gpuvm_sm_unmap(gpuvm, &gpuvm_list_ops, &args,
++				   req_addr, req_range);
++	if (ret)
++		goto err_free_ops;
++
++	return ops;
++
++err_free_ops:
++	drm_gpuva_ops_free(gpuvm, ops);
++	return ERR_PTR(ret);
++}
++EXPORT_SYMBOL_GPL(drm_gpuvm_sm_unmap_ops_create);
++
++/**
++ * drm_gpuvm_prefetch_ops_create() - creates the &drm_gpuva_ops to prefetch
++ * @gpuvm: the &drm_gpuvm representing the GPU VA space
++ * @addr: the start address of the range to prefetch
++ * @range: the range of the mappings to prefetch
++ *
++ * This function creates a list of operations to perform prefetching.
++ *
++ * The list can be iterated with &drm_gpuva_for_each_op and must be processed
++ * in the given order. It can contain prefetch operations.
++ *
++ * There can be an arbitrary amount of prefetch operations.
++ *
++ * After the caller finished processing the returned &drm_gpuva_ops, they must
++ * be freed with &drm_gpuva_ops_free.
++ *
++ * Returns: a pointer to the &drm_gpuva_ops on success, an ERR_PTR on failure
++ */
++struct drm_gpuva_ops *
++drm_gpuvm_prefetch_ops_create(struct drm_gpuvm *gpuvm,
++			      u64 addr, u64 range)
++{
++	struct drm_gpuva_ops *ops;
++	struct drm_gpuva_op *op;
++	struct drm_gpuva *va;
++	u64 end = addr + range;
++	int ret;
++
++	ops = kzalloc(sizeof(*ops), GFP_KERNEL);
++	if (!ops)
++		return ERR_PTR(-ENOMEM);
++
++	INIT_LIST_HEAD(&ops->list);
++
++	drm_gpuvm_for_each_va_range(va, gpuvm, addr, end) {
++		op = gpuva_op_alloc(gpuvm);
++		if (!op) {
++			ret = -ENOMEM;
++			goto err_free_ops;
++		}
++
++		op->op = DRM_GPUVA_OP_PREFETCH;
++		op->prefetch.va = va;
++		list_add_tail(&op->entry, &ops->list);
++	}
++
++	return ops;
++
++err_free_ops:
++	drm_gpuva_ops_free(gpuvm, ops);
++	return ERR_PTR(ret);
++}
++EXPORT_SYMBOL_GPL(drm_gpuvm_prefetch_ops_create);
++
++/**
++ * drm_gpuvm_gem_unmap_ops_create() - creates the &drm_gpuva_ops to unmap a GEM
++ * @gpuvm: the &drm_gpuvm representing the GPU VA space
++ * @obj: the &drm_gem_object to unmap
++ *
++ * This function creates a list of operations to perform unmapping for every
++ * GPUVA attached to a GEM.
++ *
++ * The list can be iterated with &drm_gpuva_for_each_op and consists out of an
++ * arbitrary amount of unmap operations.
++ *
++ * After the caller finished processing the returned &drm_gpuva_ops, they must
++ * be freed with &drm_gpuva_ops_free.
++ *
++ * It is the callers responsibility to protect the GEMs GPUVA list against
++ * concurrent access using the GEMs dma_resv lock.
++ *
++ * Returns: a pointer to the &drm_gpuva_ops on success, an ERR_PTR on failure
++ */
++struct drm_gpuva_ops *
++drm_gpuvm_gem_unmap_ops_create(struct drm_gpuvm *gpuvm,
++			       struct drm_gem_object *obj)
++{
++	struct drm_gpuva_ops *ops;
++	struct drm_gpuva_op *op;
++	struct drm_gpuva *va;
++	int ret;
++
++	drm_gem_gpuva_assert_lock_held(obj);
++
++	ops = kzalloc(sizeof(*ops), GFP_KERNEL);
++	if (!ops)
++		return ERR_PTR(-ENOMEM);
++
++	INIT_LIST_HEAD(&ops->list);
++
++	drm_gem_for_each_gpuva(va, obj) {
++		op = gpuva_op_alloc(gpuvm);
++		if (!op) {
++			ret = -ENOMEM;
++			goto err_free_ops;
++		}
++
++		op->op = DRM_GPUVA_OP_UNMAP;
++		op->unmap.va = va;
++		list_add_tail(&op->entry, &ops->list);
++	}
++
++	return ops;
++
++err_free_ops:
++	drm_gpuva_ops_free(gpuvm, ops);
++	return ERR_PTR(ret);
++}
++EXPORT_SYMBOL_GPL(drm_gpuvm_gem_unmap_ops_create);
++
++/**
++ * drm_gpuva_ops_free() - free the given &drm_gpuva_ops
++ * @gpuvm: the &drm_gpuvm the ops were created for
++ * @ops: the &drm_gpuva_ops to free
++ *
++ * Frees the given &drm_gpuva_ops structure including all the ops associated
++ * with it.
++ */
++void
++drm_gpuva_ops_free(struct drm_gpuvm *gpuvm,
++		   struct drm_gpuva_ops *ops)
++{
++	struct drm_gpuva_op *op, *next;
++
++	drm_gpuva_for_each_op_safe(op, next, ops) {
++		list_del(&op->entry);
++
++		if (op->op == DRM_GPUVA_OP_REMAP) {
++			kfree(op->remap.prev);
++			kfree(op->remap.next);
++			kfree(op->remap.unmap);
++		}
++
++		gpuva_op_free(gpuvm, op);
++	}
++
++	kfree(ops);
++}
++EXPORT_SYMBOL_GPL(drm_gpuva_ops_free);
+--- a/drivers/gpu/drm/nouveau/nouveau_exec.c
++++ b/drivers/gpu/drm/nouveau/nouveau_exec.c
+@@ -107,7 +107,7 @@ nouveau_exec_job_submit(struct nouveau_j
+ 	drm_exec_until_all_locked(exec) {
+ 		struct drm_gpuva *va;
+ 
+-		drm_gpuva_for_each_va(va, &uvmm->umgr) {
++		drm_gpuvm_for_each_va(va, &uvmm->umgr) {
+ 			if (unlikely(va == &uvmm->umgr.kernel_alloc_node))
+ 				continue;
+ 
+--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+@@ -329,7 +329,7 @@ nouveau_uvma_region_create(struct nouvea
+ 	struct nouveau_uvma_region *reg;
+ 	int ret;
+ 
+-	if (!drm_gpuva_interval_empty(&uvmm->umgr, addr, range))
++	if (!drm_gpuvm_interval_empty(&uvmm->umgr, addr, range))
+ 		return -ENOSPC;
+ 
+ 	ret = nouveau_uvma_region_alloc(&reg);
+@@ -384,7 +384,7 @@ nouveau_uvma_region_empty(struct nouveau
+ {
+ 	struct nouveau_uvmm *uvmm = reg->uvmm;
+ 
+-	return drm_gpuva_interval_empty(&uvmm->umgr,
++	return drm_gpuvm_interval_empty(&uvmm->umgr,
+ 					reg->va.addr,
+ 					reg->va.range);
+ }
+@@ -444,7 +444,7 @@ op_map_prepare_unwind(struct nouveau_uvm
+ static void
+ op_unmap_prepare_unwind(struct drm_gpuva *va)
+ {
+-	drm_gpuva_insert(va->mgr, va);
++	drm_gpuva_insert(va->vm, va);
+ }
+ 
+ static void
+@@ -1194,7 +1194,7 @@ nouveau_uvmm_bind_job_submit(struct nouv
+ 				goto unwind_continue;
+ 			}
+ 
+-			op->ops = drm_gpuva_sm_unmap_ops_create(&uvmm->umgr,
++			op->ops = drm_gpuvm_sm_unmap_ops_create(&uvmm->umgr,
+ 								op->va.addr,
+ 								op->va.range);
+ 			if (IS_ERR(op->ops)) {
+@@ -1240,7 +1240,7 @@ nouveau_uvmm_bind_job_submit(struct nouv
+ 				}
+ 			}
+ 
+-			op->ops = drm_gpuva_sm_map_ops_create(&uvmm->umgr,
++			op->ops = drm_gpuvm_sm_map_ops_create(&uvmm->umgr,
+ 							      op->va.addr,
+ 							      op->va.range,
+ 							      op->gem.obj,
+@@ -1264,7 +1264,7 @@ nouveau_uvmm_bind_job_submit(struct nouv
+ 			break;
+ 		}
+ 		case OP_UNMAP:
+-			op->ops = drm_gpuva_sm_unmap_ops_create(&uvmm->umgr,
++			op->ops = drm_gpuvm_sm_unmap_ops_create(&uvmm->umgr,
+ 								op->va.addr,
+ 								op->va.range);
+ 			if (IS_ERR(op->ops)) {
+@@ -1836,11 +1836,11 @@ nouveau_uvmm_init(struct nouveau_uvmm *u
+ 	uvmm->kernel_managed_addr = kernel_managed_addr;
+ 	uvmm->kernel_managed_size = kernel_managed_size;
+ 
+-	drm_gpuva_manager_init(&uvmm->umgr, cli->name,
+-			       NOUVEAU_VA_SPACE_START,
+-			       NOUVEAU_VA_SPACE_END,
+-			       kernel_managed_addr, kernel_managed_size,
+-			       NULL);
++	drm_gpuvm_init(&uvmm->umgr, cli->name,
++		       NOUVEAU_VA_SPACE_START,
++		       NOUVEAU_VA_SPACE_END,
++		       kernel_managed_addr, kernel_managed_size,
++		       NULL);
+ 
+ 	ret = nvif_vmm_ctor(&cli->mmu, "uvmm",
+ 			    cli->vmm.vmm.object.oclass, RAW,
+@@ -1855,7 +1855,7 @@ nouveau_uvmm_init(struct nouveau_uvmm *u
+ 	return 0;
+ 
+ out_free_gpuva_mgr:
+-	drm_gpuva_manager_destroy(&uvmm->umgr);
++	drm_gpuvm_destroy(&uvmm->umgr);
+ out_unlock:
+ 	mutex_unlock(&cli->mutex);
+ 	return ret;
+@@ -1877,7 +1877,7 @@ nouveau_uvmm_fini(struct nouveau_uvmm *u
+ 	wait_event(entity->job.wq, list_empty(&entity->job.list.head));
+ 
+ 	nouveau_uvmm_lock(uvmm);
+-	drm_gpuva_for_each_va_safe(va, next, &uvmm->umgr) {
++	drm_gpuvm_for_each_va_safe(va, next, &uvmm->umgr) {
+ 		struct nouveau_uvma *uvma = uvma_from_va(va);
+ 		struct drm_gem_object *obj = va->gem.obj;
+ 
+@@ -1910,7 +1910,7 @@ nouveau_uvmm_fini(struct nouveau_uvmm *u
+ 
+ 	mutex_lock(&cli->mutex);
+ 	nouveau_vmm_fini(&uvmm->vmm);
+-	drm_gpuva_manager_destroy(&uvmm->umgr);
++	drm_gpuvm_destroy(&uvmm->umgr);
+ 	mutex_unlock(&cli->mutex);
+ 
+ 	dma_resv_fini(&uvmm->resv);
+--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.h
++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.h
+@@ -3,13 +3,13 @@
+ #ifndef __NOUVEAU_UVMM_H__
+ #define __NOUVEAU_UVMM_H__
+ 
+-#include <drm/drm_gpuva_mgr.h>
++#include <drm/drm_gpuvm.h>
+ 
+ #include "nouveau_drv.h"
+ 
+ struct nouveau_uvmm {
+ 	struct nouveau_vmm vmm;
+-	struct drm_gpuva_manager umgr;
++	struct drm_gpuvm umgr;
+ 	struct maple_tree region_mt;
+ 	struct mutex mutex;
+ 	struct dma_resv resv;
+@@ -44,7 +44,7 @@ struct nouveau_uvma {
+ #define uvmm_from_mgr(x) container_of((x), struct nouveau_uvmm, umgr)
+ #define uvma_from_va(x) container_of((x), struct nouveau_uvma, va)
+ 
+-#define to_uvmm(x) uvmm_from_mgr((x)->va.mgr)
++#define to_uvmm(x) uvmm_from_mgr((x)->va.vm)
+ 
+ struct nouveau_uvmm_bind_job {
+ 	struct nouveau_job base;
+--- a/include/drm/drm_debugfs.h
++++ b/include/drm/drm_debugfs.h
+@@ -35,7 +35,7 @@
+ #include <linux/types.h>
+ #include <linux/seq_file.h>
+ 
+-#include <drm/drm_gpuva_mgr.h>
++#include <drm/drm_gpuvm.h>
+ 
+ /**
+  * DRM_DEBUGFS_GPUVA_INFO - &drm_info_list entry to dump a GPU VA space
+@@ -152,7 +152,7 @@ void drm_debugfs_add_files(struct drm_de
+ 			   const struct drm_debugfs_info *files, int count);
+ 
+ int drm_debugfs_gpuva_info(struct seq_file *m,
+-			   struct drm_gpuva_manager *mgr);
++			   struct drm_gpuvm *gpuvm);
+ #else
+ static inline void drm_debugfs_create_files(const struct drm_info_list *files,
+ 					    int count, struct dentry *root,
+@@ -176,7 +176,7 @@ static inline void drm_debugfs_add_files
+ {}
+ 
+ static inline int drm_debugfs_gpuva_info(struct seq_file *m,
+-					 struct drm_gpuva_manager *mgr)
++					 struct drm_gpuvm *gpuvm)
+ {
+ 	return 0;
+ }
+--- a/include/drm/drm_gpuva_mgr.h
++++ /dev/null
+@@ -1,706 +0,0 @@
+-/* SPDX-License-Identifier: GPL-2.0-only */
+-
+-#ifndef __DRM_GPUVA_MGR_H__
+-#define __DRM_GPUVA_MGR_H__
+-
+-/*
+- * Copyright (c) 2022 Red Hat.
+- *
+- * Permission is hereby granted, free of charge, to any person obtaining a
+- * copy of this software and associated documentation files (the "Software"),
+- * to deal in the Software without restriction, including without limitation
+- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+- * and/or sell copies of the Software, and to permit persons to whom the
+- * Software is furnished to do so, subject to the following conditions:
+- *
+- * The above copyright notice and this permission notice shall be included in
+- * all copies or substantial portions of the Software.
+- *
+- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+- * OTHER DEALINGS IN THE SOFTWARE.
+- */
+-
+-#include <linux/list.h>
+-#include <linux/rbtree.h>
+-#include <linux/types.h>
+-
+-#include <drm/drm_gem.h>
+-
+-struct drm_gpuva_manager;
+-struct drm_gpuva_fn_ops;
+-
+-/**
+- * enum drm_gpuva_flags - flags for struct drm_gpuva
+- */
+-enum drm_gpuva_flags {
+-	/**
+-	 * @DRM_GPUVA_INVALIDATED:
+-	 *
+-	 * Flag indicating that the &drm_gpuva's backing GEM is invalidated.
+-	 */
+-	DRM_GPUVA_INVALIDATED = (1 << 0),
+-
+-	/**
+-	 * @DRM_GPUVA_SPARSE:
+-	 *
+-	 * Flag indicating that the &drm_gpuva is a sparse mapping.
+-	 */
+-	DRM_GPUVA_SPARSE = (1 << 1),
+-
+-	/**
+-	 * @DRM_GPUVA_USERBITS: user defined bits
+-	 */
+-	DRM_GPUVA_USERBITS = (1 << 2),
+-};
+-
+-/**
+- * struct drm_gpuva - structure to track a GPU VA mapping
+- *
+- * This structure represents a GPU VA mapping and is associated with a
+- * &drm_gpuva_manager.
+- *
+- * Typically, this structure is embedded in bigger driver structures.
+- */
+-struct drm_gpuva {
+-	/**
+-	 * @mgr: the &drm_gpuva_manager this object is associated with
+-	 */
+-	struct drm_gpuva_manager *mgr;
+-
+-	/**
+-	 * @flags: the &drm_gpuva_flags for this mapping
+-	 */
+-	enum drm_gpuva_flags flags;
+-
+-	/**
+-	 * @va: structure containing the address and range of the &drm_gpuva
+-	 */
+-	struct {
+-		/**
+-		 * @addr: the start address
+-		 */
+-		u64 addr;
+-
+-		/*
+-		 * @range: the range
+-		 */
+-		u64 range;
+-	} va;
+-
+-	/**
+-	 * @gem: structure containing the &drm_gem_object and it's offset
+-	 */
+-	struct {
+-		/**
+-		 * @offset: the offset within the &drm_gem_object
+-		 */
+-		u64 offset;
+-
+-		/**
+-		 * @obj: the mapped &drm_gem_object
+-		 */
+-		struct drm_gem_object *obj;
+-
+-		/**
+-		 * @entry: the &list_head to attach this object to a &drm_gem_object
+-		 */
+-		struct list_head entry;
+-	} gem;
+-
+-	/**
+-	 * @rb: structure containing data to store &drm_gpuvas in a rb-tree
+-	 */
+-	struct {
+-		/**
+-		 * @rb: the rb-tree node
+-		 */
+-		struct rb_node node;
+-
+-		/**
+-		 * @entry: The &list_head to additionally connect &drm_gpuvas
+-		 * in the same order they appear in the interval tree. This is
+-		 * useful to keep iterating &drm_gpuvas from a start node found
+-		 * through the rb-tree while doing modifications on the rb-tree
+-		 * itself.
+-		 */
+-		struct list_head entry;
+-
+-		/**
+-		 * @__subtree_last: needed by the interval tree, holding last-in-subtree
+-		 */
+-		u64 __subtree_last;
+-	} rb;
+-};
+-
+-int drm_gpuva_insert(struct drm_gpuva_manager *mgr, struct drm_gpuva *va);
+-void drm_gpuva_remove(struct drm_gpuva *va);
+-
+-void drm_gpuva_link(struct drm_gpuva *va);
+-void drm_gpuva_unlink(struct drm_gpuva *va);
+-
+-struct drm_gpuva *drm_gpuva_find(struct drm_gpuva_manager *mgr,
+-				 u64 addr, u64 range);
+-struct drm_gpuva *drm_gpuva_find_first(struct drm_gpuva_manager *mgr,
+-				       u64 addr, u64 range);
+-struct drm_gpuva *drm_gpuva_find_prev(struct drm_gpuva_manager *mgr, u64 start);
+-struct drm_gpuva *drm_gpuva_find_next(struct drm_gpuva_manager *mgr, u64 end);
+-
+-bool drm_gpuva_interval_empty(struct drm_gpuva_manager *mgr, u64 addr, u64 range);
+-
+-static inline void drm_gpuva_init(struct drm_gpuva *va, u64 addr, u64 range,
+-				  struct drm_gem_object *obj, u64 offset)
+-{
+-	va->va.addr = addr;
+-	va->va.range = range;
+-	va->gem.obj = obj;
+-	va->gem.offset = offset;
+-}
+-
+-/**
+- * drm_gpuva_invalidate() - sets whether the backing GEM of this &drm_gpuva is
+- * invalidated
+- * @va: the &drm_gpuva to set the invalidate flag for
+- * @invalidate: indicates whether the &drm_gpuva is invalidated
+- */
+-static inline void drm_gpuva_invalidate(struct drm_gpuva *va, bool invalidate)
+-{
+-	if (invalidate)
+-		va->flags |= DRM_GPUVA_INVALIDATED;
+-	else
+-		va->flags &= ~DRM_GPUVA_INVALIDATED;
+-}
+-
+-/**
+- * drm_gpuva_invalidated() - indicates whether the backing BO of this &drm_gpuva
+- * is invalidated
+- * @va: the &drm_gpuva to check
+- */
+-static inline bool drm_gpuva_invalidated(struct drm_gpuva *va)
+-{
+-	return va->flags & DRM_GPUVA_INVALIDATED;
+-}
+-
+-/**
+- * struct drm_gpuva_manager - DRM GPU VA Manager
+- *
+- * The DRM GPU VA Manager keeps track of a GPU's virtual address space by using
+- * &maple_tree structures. Typically, this structure is embedded in bigger
+- * driver structures.
+- *
+- * Drivers can pass addresses and ranges in an arbitrary unit, e.g. bytes or
+- * pages.
+- *
+- * There should be one manager instance per GPU virtual address space.
+- */
+-struct drm_gpuva_manager {
+-	/**
+-	 * @name: the name of the DRM GPU VA space
+-	 */
+-	const char *name;
+-
+-	/**
+-	 * @mm_start: start of the VA space
+-	 */
+-	u64 mm_start;
+-
+-	/**
+-	 * @mm_range: length of the VA space
+-	 */
+-	u64 mm_range;
+-
+-	/**
+-	 * @rb: structures to track &drm_gpuva entries
+-	 */
+-	struct {
+-		/**
+-		 * @tree: the rb-tree to track GPU VA mappings
+-		 */
+-		struct rb_root_cached tree;
+-
+-		/**
+-		 * @list: the &list_head to track GPU VA mappings
+-		 */
+-		struct list_head list;
+-	} rb;
+-
+-	/**
+-	 * @kernel_alloc_node:
+-	 *
+-	 * &drm_gpuva representing the address space cutout reserved for
+-	 * the kernel
+-	 */
+-	struct drm_gpuva kernel_alloc_node;
+-
+-	/**
+-	 * @ops: &drm_gpuva_fn_ops providing the split/merge steps to drivers
+-	 */
+-	const struct drm_gpuva_fn_ops *ops;
+-};
+-
+-void drm_gpuva_manager_init(struct drm_gpuva_manager *mgr,
+-			    const char *name,
+-			    u64 start_offset, u64 range,
+-			    u64 reserve_offset, u64 reserve_range,
+-			    const struct drm_gpuva_fn_ops *ops);
+-void drm_gpuva_manager_destroy(struct drm_gpuva_manager *mgr);
+-
+-static inline struct drm_gpuva *
+-__drm_gpuva_next(struct drm_gpuva *va)
+-{
+-	if (va && !list_is_last(&va->rb.entry, &va->mgr->rb.list))
+-		return list_next_entry(va, rb.entry);
+-
+-	return NULL;
+-}
+-
+-/**
+- * drm_gpuva_for_each_va_range() - iterate over a range of &drm_gpuvas
+- * @va__: &drm_gpuva structure to assign to in each iteration step
+- * @mgr__: &drm_gpuva_manager to walk over
+- * @start__: starting offset, the first gpuva will overlap this
+- * @end__: ending offset, the last gpuva will start before this (but may
+- * overlap)
+- *
+- * This iterator walks over all &drm_gpuvas in the &drm_gpuva_manager that lie
+- * between @start__ and @end__. It is implemented similarly to list_for_each(),
+- * but is using the &drm_gpuva_manager's internal interval tree to accelerate
+- * the search for the starting &drm_gpuva, and hence isn't safe against removal
+- * of elements. It assumes that @end__ is within (or is the upper limit of) the
+- * &drm_gpuva_manager. This iterator does not skip over the &drm_gpuva_manager's
+- * @kernel_alloc_node.
+- */
+-#define drm_gpuva_for_each_va_range(va__, mgr__, start__, end__) \
+-	for (va__ = drm_gpuva_find_first((mgr__), (start__), (end__) - (start__)); \
+-	     va__ && (va__->va.addr < (end__)); \
+-	     va__ = __drm_gpuva_next(va__))
+-
+-/**
+- * drm_gpuva_for_each_va_range_safe() - safely iterate over a range of
+- * &drm_gpuvas
+- * @va__: &drm_gpuva to assign to in each iteration step
+- * @next__: another &drm_gpuva to use as temporary storage
+- * @mgr__: &drm_gpuva_manager to walk over
+- * @start__: starting offset, the first gpuva will overlap this
+- * @end__: ending offset, the last gpuva will start before this (but may
+- * overlap)
+- *
+- * This iterator walks over all &drm_gpuvas in the &drm_gpuva_manager that lie
+- * between @start__ and @end__. It is implemented similarly to
+- * list_for_each_safe(), but is using the &drm_gpuva_manager's internal interval
+- * tree to accelerate the search for the starting &drm_gpuva, and hence is safe
+- * against removal of elements. It assumes that @end__ is within (or is the
+- * upper limit of) the &drm_gpuva_manager. This iterator does not skip over the
+- * &drm_gpuva_manager's @kernel_alloc_node.
+- */
+-#define drm_gpuva_for_each_va_range_safe(va__, next__, mgr__, start__, end__) \
+-	for (va__ = drm_gpuva_find_first((mgr__), (start__), (end__) - (start__)), \
+-	     next__ = __drm_gpuva_next(va__); \
+-	     va__ && (va__->va.addr < (end__)); \
+-	     va__ = next__, next__ = __drm_gpuva_next(va__))
+-
+-/**
+- * drm_gpuva_for_each_va() - iterate over all &drm_gpuvas
+- * @va__: &drm_gpuva to assign to in each iteration step
+- * @mgr__: &drm_gpuva_manager to walk over
+- *
+- * This iterator walks over all &drm_gpuva structures associated with the given
+- * &drm_gpuva_manager.
+- */
+-#define drm_gpuva_for_each_va(va__, mgr__) \
+-	list_for_each_entry(va__, &(mgr__)->rb.list, rb.entry)
+-
+-/**
+- * drm_gpuva_for_each_va_safe() - safely iterate over all &drm_gpuvas
+- * @va__: &drm_gpuva to assign to in each iteration step
+- * @next__: another &drm_gpuva to use as temporary storage
+- * @mgr__: &drm_gpuva_manager to walk over
+- *
+- * This iterator walks over all &drm_gpuva structures associated with the given
+- * &drm_gpuva_manager. It is implemented with list_for_each_entry_safe(), and
+- * hence safe against the removal of elements.
+- */
+-#define drm_gpuva_for_each_va_safe(va__, next__, mgr__) \
+-	list_for_each_entry_safe(va__, next__, &(mgr__)->rb.list, rb.entry)
+-
+-/**
+- * enum drm_gpuva_op_type - GPU VA operation type
+- *
+- * Operations to alter the GPU VA mappings tracked by the &drm_gpuva_manager.
+- */
+-enum drm_gpuva_op_type {
+-	/**
+-	 * @DRM_GPUVA_OP_MAP: the map op type
+-	 */
+-	DRM_GPUVA_OP_MAP,
+-
+-	/**
+-	 * @DRM_GPUVA_OP_REMAP: the remap op type
+-	 */
+-	DRM_GPUVA_OP_REMAP,
+-
+-	/**
+-	 * @DRM_GPUVA_OP_UNMAP: the unmap op type
+-	 */
+-	DRM_GPUVA_OP_UNMAP,
+-
+-	/**
+-	 * @DRM_GPUVA_OP_PREFETCH: the prefetch op type
+-	 */
+-	DRM_GPUVA_OP_PREFETCH,
+-};
+-
+-/**
+- * struct drm_gpuva_op_map - GPU VA map operation
+- *
+- * This structure represents a single map operation generated by the
+- * DRM GPU VA manager.
+- */
+-struct drm_gpuva_op_map {
+-	/**
+-	 * @va: structure containing address and range of a map
+-	 * operation
+-	 */
+-	struct {
+-		/**
+-		 * @addr: the base address of the new mapping
+-		 */
+-		u64 addr;
+-
+-		/**
+-		 * @range: the range of the new mapping
+-		 */
+-		u64 range;
+-	} va;
+-
+-	/**
+-	 * @gem: structure containing the &drm_gem_object and it's offset
+-	 */
+-	struct {
+-		/**
+-		 * @offset: the offset within the &drm_gem_object
+-		 */
+-		u64 offset;
+-
+-		/**
+-		 * @obj: the &drm_gem_object to map
+-		 */
+-		struct drm_gem_object *obj;
+-	} gem;
+-};
+-
+-/**
+- * struct drm_gpuva_op_unmap - GPU VA unmap operation
+- *
+- * This structure represents a single unmap operation generated by the
+- * DRM GPU VA manager.
+- */
+-struct drm_gpuva_op_unmap {
+-	/**
+-	 * @va: the &drm_gpuva to unmap
+-	 */
+-	struct drm_gpuva *va;
+-
+-	/**
+-	 * @keep:
+-	 *
+-	 * Indicates whether this &drm_gpuva is physically contiguous with the
+-	 * original mapping request.
+-	 *
+-	 * Optionally, if &keep is set, drivers may keep the actual page table
+-	 * mappings for this &drm_gpuva, adding the missing page table entries
+-	 * only and update the &drm_gpuva_manager accordingly.
+-	 */
+-	bool keep;
+-};
+-
+-/**
+- * struct drm_gpuva_op_remap - GPU VA remap operation
+- *
+- * This represents a single remap operation generated by the DRM GPU VA manager.
+- *
+- * A remap operation is generated when an existing GPU VA mmapping is split up
+- * by inserting a new GPU VA mapping or by partially unmapping existent
+- * mapping(s), hence it consists of a maximum of two map and one unmap
+- * operation.
+- *
+- * The @unmap operation takes care of removing the original existing mapping.
+- * @prev is used to remap the preceding part, @next the subsequent part.
+- *
+- * If either a new mapping's start address is aligned with the start address
+- * of the old mapping or the new mapping's end address is aligned with the
+- * end address of the old mapping, either @prev or @next is NULL.
+- *
+- * Note, the reason for a dedicated remap operation, rather than arbitrary
+- * unmap and map operations, is to give drivers the chance of extracting driver
+- * specific data for creating the new mappings from the unmap operations's
+- * &drm_gpuva structure which typically is embedded in larger driver specific
+- * structures.
+- */
+-struct drm_gpuva_op_remap {
+-	/**
+-	 * @prev: the preceding part of a split mapping
+-	 */
+-	struct drm_gpuva_op_map *prev;
+-
+-	/**
+-	 * @next: the subsequent part of a split mapping
+-	 */
+-	struct drm_gpuva_op_map *next;
+-
+-	/**
+-	 * @unmap: the unmap operation for the original existing mapping
+-	 */
+-	struct drm_gpuva_op_unmap *unmap;
+-};
+-
+-/**
+- * struct drm_gpuva_op_prefetch - GPU VA prefetch operation
+- *
+- * This structure represents a single prefetch operation generated by the
+- * DRM GPU VA manager.
+- */
+-struct drm_gpuva_op_prefetch {
+-	/**
+-	 * @va: the &drm_gpuva to prefetch
+-	 */
+-	struct drm_gpuva *va;
+-};
+-
+-/**
+- * struct drm_gpuva_op - GPU VA operation
+- *
+- * This structure represents a single generic operation.
+- *
+- * The particular type of the operation is defined by @op.
+- */
+-struct drm_gpuva_op {
+-	/**
+-	 * @entry:
+-	 *
+-	 * The &list_head used to distribute instances of this struct within
+-	 * &drm_gpuva_ops.
+-	 */
+-	struct list_head entry;
+-
+-	/**
+-	 * @op: the type of the operation
+-	 */
+-	enum drm_gpuva_op_type op;
+-
+-	union {
+-		/**
+-		 * @map: the map operation
+-		 */
+-		struct drm_gpuva_op_map map;
+-
+-		/**
+-		 * @remap: the remap operation
+-		 */
+-		struct drm_gpuva_op_remap remap;
+-
+-		/**
+-		 * @unmap: the unmap operation
+-		 */
+-		struct drm_gpuva_op_unmap unmap;
+-
+-		/**
+-		 * @prefetch: the prefetch operation
+-		 */
+-		struct drm_gpuva_op_prefetch prefetch;
+-	};
+-};
+-
+-/**
+- * struct drm_gpuva_ops - wraps a list of &drm_gpuva_op
+- */
+-struct drm_gpuva_ops {
+-	/**
+-	 * @list: the &list_head
+-	 */
+-	struct list_head list;
+-};
+-
+-/**
+- * drm_gpuva_for_each_op() - iterator to walk over &drm_gpuva_ops
+- * @op: &drm_gpuva_op to assign in each iteration step
+- * @ops: &drm_gpuva_ops to walk
+- *
+- * This iterator walks over all ops within a given list of operations.
+- */
+-#define drm_gpuva_for_each_op(op, ops) list_for_each_entry(op, &(ops)->list, entry)
+-
+-/**
+- * drm_gpuva_for_each_op_safe() - iterator to safely walk over &drm_gpuva_ops
+- * @op: &drm_gpuva_op to assign in each iteration step
+- * @next: &next &drm_gpuva_op to store the next step
+- * @ops: &drm_gpuva_ops to walk
+- *
+- * This iterator walks over all ops within a given list of operations. It is
+- * implemented with list_for_each_safe(), so save against removal of elements.
+- */
+-#define drm_gpuva_for_each_op_safe(op, next, ops) \
+-	list_for_each_entry_safe(op, next, &(ops)->list, entry)
+-
+-/**
+- * drm_gpuva_for_each_op_from_reverse() - iterate backwards from the given point
+- * @op: &drm_gpuva_op to assign in each iteration step
+- * @ops: &drm_gpuva_ops to walk
+- *
+- * This iterator walks over all ops within a given list of operations beginning
+- * from the given operation in reverse order.
+- */
+-#define drm_gpuva_for_each_op_from_reverse(op, ops) \
+-	list_for_each_entry_from_reverse(op, &(ops)->list, entry)
+-
+-/**
+- * drm_gpuva_first_op() - returns the first &drm_gpuva_op from &drm_gpuva_ops
+- * @ops: the &drm_gpuva_ops to get the fist &drm_gpuva_op from
+- */
+-#define drm_gpuva_first_op(ops) \
+-	list_first_entry(&(ops)->list, struct drm_gpuva_op, entry)
+-
+-/**
+- * drm_gpuva_last_op() - returns the last &drm_gpuva_op from &drm_gpuva_ops
+- * @ops: the &drm_gpuva_ops to get the last &drm_gpuva_op from
+- */
+-#define drm_gpuva_last_op(ops) \
+-	list_last_entry(&(ops)->list, struct drm_gpuva_op, entry)
+-
+-/**
+- * drm_gpuva_prev_op() - previous &drm_gpuva_op in the list
+- * @op: the current &drm_gpuva_op
+- */
+-#define drm_gpuva_prev_op(op) list_prev_entry(op, entry)
+-
+-/**
+- * drm_gpuva_next_op() - next &drm_gpuva_op in the list
+- * @op: the current &drm_gpuva_op
+- */
+-#define drm_gpuva_next_op(op) list_next_entry(op, entry)
+-
+-struct drm_gpuva_ops *
+-drm_gpuva_sm_map_ops_create(struct drm_gpuva_manager *mgr,
+-			    u64 addr, u64 range,
+-			    struct drm_gem_object *obj, u64 offset);
+-struct drm_gpuva_ops *
+-drm_gpuva_sm_unmap_ops_create(struct drm_gpuva_manager *mgr,
+-			      u64 addr, u64 range);
+-
+-struct drm_gpuva_ops *
+-drm_gpuva_prefetch_ops_create(struct drm_gpuva_manager *mgr,
+-				 u64 addr, u64 range);
+-
+-struct drm_gpuva_ops *
+-drm_gpuva_gem_unmap_ops_create(struct drm_gpuva_manager *mgr,
+-			       struct drm_gem_object *obj);
+-
+-void drm_gpuva_ops_free(struct drm_gpuva_manager *mgr,
+-			struct drm_gpuva_ops *ops);
+-
+-static inline void drm_gpuva_init_from_op(struct drm_gpuva *va,
+-					  struct drm_gpuva_op_map *op)
+-{
+-	drm_gpuva_init(va, op->va.addr, op->va.range,
+-		       op->gem.obj, op->gem.offset);
+-}
+-
+-/**
+- * struct drm_gpuva_fn_ops - callbacks for split/merge steps
+- *
+- * This structure defines the callbacks used by &drm_gpuva_sm_map and
+- * &drm_gpuva_sm_unmap to provide the split/merge steps for map and unmap
+- * operations to drivers.
+- */
+-struct drm_gpuva_fn_ops {
+-	/**
+-	 * @op_alloc: called when the &drm_gpuva_manager allocates
+-	 * a struct drm_gpuva_op
+-	 *
+-	 * Some drivers may want to embed struct drm_gpuva_op into driver
+-	 * specific structures. By implementing this callback drivers can
+-	 * allocate memory accordingly.
+-	 *
+-	 * This callback is optional.
+-	 */
+-	struct drm_gpuva_op *(*op_alloc)(void);
+-
+-	/**
+-	 * @op_free: called when the &drm_gpuva_manager frees a
+-	 * struct drm_gpuva_op
+-	 *
+-	 * Some drivers may want to embed struct drm_gpuva_op into driver
+-	 * specific structures. By implementing this callback drivers can
+-	 * free the previously allocated memory accordingly.
+-	 *
+-	 * This callback is optional.
+-	 */
+-	void (*op_free)(struct drm_gpuva_op *op);
+-
+-	/**
+-	 * @sm_step_map: called from &drm_gpuva_sm_map to finally insert the
+-	 * mapping once all previous steps were completed
+-	 *
+-	 * The &priv pointer matches the one the driver passed to
+-	 * &drm_gpuva_sm_map or &drm_gpuva_sm_unmap, respectively.
+-	 *
+-	 * Can be NULL if &drm_gpuva_sm_map is used.
+-	 */
+-	int (*sm_step_map)(struct drm_gpuva_op *op, void *priv);
+-
+-	/**
+-	 * @sm_step_remap: called from &drm_gpuva_sm_map and
+-	 * &drm_gpuva_sm_unmap to split up an existent mapping
+-	 *
+-	 * This callback is called when existent mapping needs to be split up.
+-	 * This is the case when either a newly requested mapping overlaps or
+-	 * is enclosed by an existent mapping or a partial unmap of an existent
+-	 * mapping is requested.
+-	 *
+-	 * The &priv pointer matches the one the driver passed to
+-	 * &drm_gpuva_sm_map or &drm_gpuva_sm_unmap, respectively.
+-	 *
+-	 * Can be NULL if neither &drm_gpuva_sm_map nor &drm_gpuva_sm_unmap is
+-	 * used.
+-	 */
+-	int (*sm_step_remap)(struct drm_gpuva_op *op, void *priv);
+-
+-	/**
+-	 * @sm_step_unmap: called from &drm_gpuva_sm_map and
+-	 * &drm_gpuva_sm_unmap to unmap an existent mapping
+-	 *
+-	 * This callback is called when existent mapping needs to be unmapped.
+-	 * This is the case when either a newly requested mapping encloses an
+-	 * existent mapping or an unmap of an existent mapping is requested.
+-	 *
+-	 * The &priv pointer matches the one the driver passed to
+-	 * &drm_gpuva_sm_map or &drm_gpuva_sm_unmap, respectively.
+-	 *
+-	 * Can be NULL if neither &drm_gpuva_sm_map nor &drm_gpuva_sm_unmap is
+-	 * used.
+-	 */
+-	int (*sm_step_unmap)(struct drm_gpuva_op *op, void *priv);
+-};
+-
+-int drm_gpuva_sm_map(struct drm_gpuva_manager *mgr, void *priv,
+-		     u64 addr, u64 range,
+-		     struct drm_gem_object *obj, u64 offset);
+-
+-int drm_gpuva_sm_unmap(struct drm_gpuva_manager *mgr, void *priv,
+-		       u64 addr, u64 range);
+-
+-void drm_gpuva_map(struct drm_gpuva_manager *mgr,
+-		   struct drm_gpuva *va,
+-		   struct drm_gpuva_op_map *op);
+-
+-void drm_gpuva_remap(struct drm_gpuva *prev,
+-		     struct drm_gpuva *next,
+-		     struct drm_gpuva_op_remap *op);
+-
+-void drm_gpuva_unmap(struct drm_gpuva_op_unmap *op);
+-
+-#endif /* __DRM_GPUVA_MGR_H__ */
+--- /dev/null
++++ b/include/drm/drm_gpuvm.h
+@@ -0,0 +1,705 @@
++/* SPDX-License-Identifier: GPL-2.0-only */
++
++#ifndef __DRM_GPUVM_H__
++#define __DRM_GPUVM_H__
++
++/*
++ * Copyright (c) 2022 Red Hat.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include <linux/list.h>
++#include <linux/rbtree.h>
++#include <linux/types.h>
++
++#include <drm/drm_gem.h>
++
++struct drm_gpuvm;
++struct drm_gpuvm_ops;
++
++/**
++ * enum drm_gpuva_flags - flags for struct drm_gpuva
++ */
++enum drm_gpuva_flags {
++	/**
++	 * @DRM_GPUVA_INVALIDATED:
++	 *
++	 * Flag indicating that the &drm_gpuva's backing GEM is invalidated.
++	 */
++	DRM_GPUVA_INVALIDATED = (1 << 0),
++
++	/**
++	 * @DRM_GPUVA_SPARSE:
++	 *
++	 * Flag indicating that the &drm_gpuva is a sparse mapping.
++	 */
++	DRM_GPUVA_SPARSE = (1 << 1),
++
++	/**
++	 * @DRM_GPUVA_USERBITS: user defined bits
++	 */
++	DRM_GPUVA_USERBITS = (1 << 2),
++};
++
++/**
++ * struct drm_gpuva - structure to track a GPU VA mapping
++ *
++ * This structure represents a GPU VA mapping and is associated with a
++ * &drm_gpuvm.
++ *
++ * Typically, this structure is embedded in bigger driver structures.
++ */
++struct drm_gpuva {
++	/**
++	 * @vm: the &drm_gpuvm this object is associated with
++	 */
++	struct drm_gpuvm *vm;
++
++	/**
++	 * @flags: the &drm_gpuva_flags for this mapping
++	 */
++	enum drm_gpuva_flags flags;
++
++	/**
++	 * @va: structure containing the address and range of the &drm_gpuva
++	 */
++	struct {
++		/**
++		 * @addr: the start address
++		 */
++		u64 addr;
++
++		/*
++		 * @range: the range
++		 */
++		u64 range;
++	} va;
++
++	/**
++	 * @gem: structure containing the &drm_gem_object and it's offset
++	 */
++	struct {
++		/**
++		 * @offset: the offset within the &drm_gem_object
++		 */
++		u64 offset;
++
++		/**
++		 * @obj: the mapped &drm_gem_object
++		 */
++		struct drm_gem_object *obj;
++
++		/**
++		 * @entry: the &list_head to attach this object to a &drm_gem_object
++		 */
++		struct list_head entry;
++	} gem;
++
++	/**
++	 * @rb: structure containing data to store &drm_gpuvas in a rb-tree
++	 */
++	struct {
++		/**
++		 * @rb: the rb-tree node
++		 */
++		struct rb_node node;
++
++		/**
++		 * @entry: The &list_head to additionally connect &drm_gpuvas
++		 * in the same order they appear in the interval tree. This is
++		 * useful to keep iterating &drm_gpuvas from a start node found
++		 * through the rb-tree while doing modifications on the rb-tree
++		 * itself.
++		 */
++		struct list_head entry;
++
++		/**
++		 * @__subtree_last: needed by the interval tree, holding last-in-subtree
++		 */
++		u64 __subtree_last;
++	} rb;
++};
++
++int drm_gpuva_insert(struct drm_gpuvm *gpuvm, struct drm_gpuva *va);
++void drm_gpuva_remove(struct drm_gpuva *va);
++
++void drm_gpuva_link(struct drm_gpuva *va);
++void drm_gpuva_unlink(struct drm_gpuva *va);
++
++struct drm_gpuva *drm_gpuva_find(struct drm_gpuvm *gpuvm,
++				 u64 addr, u64 range);
++struct drm_gpuva *drm_gpuva_find_first(struct drm_gpuvm *gpuvm,
++				       u64 addr, u64 range);
++struct drm_gpuva *drm_gpuva_find_prev(struct drm_gpuvm *gpuvm, u64 start);
++struct drm_gpuva *drm_gpuva_find_next(struct drm_gpuvm *gpuvm, u64 end);
++
++static inline void drm_gpuva_init(struct drm_gpuva *va, u64 addr, u64 range,
++				  struct drm_gem_object *obj, u64 offset)
++{
++	va->va.addr = addr;
++	va->va.range = range;
++	va->gem.obj = obj;
++	va->gem.offset = offset;
++}
++
++/**
++ * drm_gpuva_invalidate() - sets whether the backing GEM of this &drm_gpuva is
++ * invalidated
++ * @va: the &drm_gpuva to set the invalidate flag for
++ * @invalidate: indicates whether the &drm_gpuva is invalidated
++ */
++static inline void drm_gpuva_invalidate(struct drm_gpuva *va, bool invalidate)
++{
++	if (invalidate)
++		va->flags |= DRM_GPUVA_INVALIDATED;
++	else
++		va->flags &= ~DRM_GPUVA_INVALIDATED;
++}
++
++/**
++ * drm_gpuva_invalidated() - indicates whether the backing BO of this &drm_gpuva
++ * is invalidated
++ * @va: the &drm_gpuva to check
++ */
++static inline bool drm_gpuva_invalidated(struct drm_gpuva *va)
++{
++	return va->flags & DRM_GPUVA_INVALIDATED;
++}
++
++/**
++ * struct drm_gpuvm - DRM GPU VA Manager
++ *
++ * The DRM GPU VA Manager keeps track of a GPU's virtual address space by using
++ * &maple_tree structures. Typically, this structure is embedded in bigger
++ * driver structures.
++ *
++ * Drivers can pass addresses and ranges in an arbitrary unit, e.g. bytes or
++ * pages.
++ *
++ * There should be one manager instance per GPU virtual address space.
++ */
++struct drm_gpuvm {
++	/**
++	 * @name: the name of the DRM GPU VA space
++	 */
++	const char *name;
++
++	/**
++	 * @mm_start: start of the VA space
++	 */
++	u64 mm_start;
++
++	/**
++	 * @mm_range: length of the VA space
++	 */
++	u64 mm_range;
++
++	/**
++	 * @rb: structures to track &drm_gpuva entries
++	 */
++	struct {
++		/**
++		 * @tree: the rb-tree to track GPU VA mappings
++		 */
++		struct rb_root_cached tree;
++
++		/**
++		 * @list: the &list_head to track GPU VA mappings
++		 */
++		struct list_head list;
++	} rb;
++
++	/**
++	 * @kernel_alloc_node:
++	 *
++	 * &drm_gpuva representing the address space cutout reserved for
++	 * the kernel
++	 */
++	struct drm_gpuva kernel_alloc_node;
++
++	/**
++	 * @ops: &drm_gpuvm_ops providing the split/merge steps to drivers
++	 */
++	const struct drm_gpuvm_ops *ops;
++};
++
++void drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
++		    u64 start_offset, u64 range,
++		    u64 reserve_offset, u64 reserve_range,
++		    const struct drm_gpuvm_ops *ops);
++void drm_gpuvm_destroy(struct drm_gpuvm *gpuvm);
++
++bool drm_gpuvm_interval_empty(struct drm_gpuvm *gpuvm, u64 addr, u64 range);
++
++static inline struct drm_gpuva *
++__drm_gpuva_next(struct drm_gpuva *va)
++{
++	if (va && !list_is_last(&va->rb.entry, &va->vm->rb.list))
++		return list_next_entry(va, rb.entry);
++
++	return NULL;
++}
++
++/**
++ * drm_gpuvm_for_each_va_range() - iterate over a range of &drm_gpuvas
++ * @va__: &drm_gpuva structure to assign to in each iteration step
++ * @gpuvm__: &drm_gpuvm to walk over
++ * @start__: starting offset, the first gpuva will overlap this
++ * @end__: ending offset, the last gpuva will start before this (but may
++ * overlap)
++ *
++ * This iterator walks over all &drm_gpuvas in the &drm_gpuvm that lie
++ * between @start__ and @end__. It is implemented similarly to list_for_each(),
++ * but is using the &drm_gpuvm's internal interval tree to accelerate
++ * the search for the starting &drm_gpuva, and hence isn't safe against removal
++ * of elements. It assumes that @end__ is within (or is the upper limit of) the
++ * &drm_gpuvm. This iterator does not skip over the &drm_gpuvm's
++ * @kernel_alloc_node.
++ */
++#define drm_gpuvm_for_each_va_range(va__, gpuvm__, start__, end__) \
++	for (va__ = drm_gpuva_find_first((gpuvm__), (start__), (end__) - (start__)); \
++	     va__ && (va__->va.addr < (end__)); \
++	     va__ = __drm_gpuva_next(va__))
++
++/**
++ * drm_gpuvm_for_each_va_range_safe() - safely iterate over a range of
++ * &drm_gpuvas
++ * @va__: &drm_gpuva to assign to in each iteration step
++ * @next__: another &drm_gpuva to use as temporary storage
++ * @gpuvm__: &drm_gpuvm to walk over
++ * @start__: starting offset, the first gpuva will overlap this
++ * @end__: ending offset, the last gpuva will start before this (but may
++ * overlap)
++ *
++ * This iterator walks over all &drm_gpuvas in the &drm_gpuvm that lie
++ * between @start__ and @end__. It is implemented similarly to
++ * list_for_each_safe(), but is using the &drm_gpuvm's internal interval
++ * tree to accelerate the search for the starting &drm_gpuva, and hence is safe
++ * against removal of elements. It assumes that @end__ is within (or is the
++ * upper limit of) the &drm_gpuvm. This iterator does not skip over the
++ * &drm_gpuvm's @kernel_alloc_node.
++ */
++#define drm_gpuvm_for_each_va_range_safe(va__, next__, gpuvm__, start__, end__) \
++	for (va__ = drm_gpuva_find_first((gpuvm__), (start__), (end__) - (start__)), \
++	     next__ = __drm_gpuva_next(va__); \
++	     va__ && (va__->va.addr < (end__)); \
++	     va__ = next__, next__ = __drm_gpuva_next(va__))
++
++/**
++ * drm_gpuvm_for_each_va() - iterate over all &drm_gpuvas
++ * @va__: &drm_gpuva to assign to in each iteration step
++ * @gpuvm__: &drm_gpuvm to walk over
++ *
++ * This iterator walks over all &drm_gpuva structures associated with the given
++ * &drm_gpuvm.
++ */
++#define drm_gpuvm_for_each_va(va__, gpuvm__) \
++	list_for_each_entry(va__, &(gpuvm__)->rb.list, rb.entry)
++
++/**
++ * drm_gpuvm_for_each_va_safe() - safely iterate over all &drm_gpuvas
++ * @va__: &drm_gpuva to assign to in each iteration step
++ * @next__: another &drm_gpuva to use as temporary storage
++ * @gpuvm__: &drm_gpuvm to walk over
++ *
++ * This iterator walks over all &drm_gpuva structures associated with the given
++ * &drm_gpuvm. It is implemented with list_for_each_entry_safe(), and
++ * hence safe against the removal of elements.
++ */
++#define drm_gpuvm_for_each_va_safe(va__, next__, gpuvm__) \
++	list_for_each_entry_safe(va__, next__, &(gpuvm__)->rb.list, rb.entry)
++
++/**
++ * enum drm_gpuva_op_type - GPU VA operation type
++ *
++ * Operations to alter the GPU VA mappings tracked by the &drm_gpuvm.
++ */
++enum drm_gpuva_op_type {
++	/**
++	 * @DRM_GPUVA_OP_MAP: the map op type
++	 */
++	DRM_GPUVA_OP_MAP,
++
++	/**
++	 * @DRM_GPUVA_OP_REMAP: the remap op type
++	 */
++	DRM_GPUVA_OP_REMAP,
++
++	/**
++	 * @DRM_GPUVA_OP_UNMAP: the unmap op type
++	 */
++	DRM_GPUVA_OP_UNMAP,
++
++	/**
++	 * @DRM_GPUVA_OP_PREFETCH: the prefetch op type
++	 */
++	DRM_GPUVA_OP_PREFETCH,
++};
++
++/**
++ * struct drm_gpuva_op_map - GPU VA map operation
++ *
++ * This structure represents a single map operation generated by the
++ * DRM GPU VA manager.
++ */
++struct drm_gpuva_op_map {
++	/**
++	 * @va: structure containing address and range of a map
++	 * operation
++	 */
++	struct {
++		/**
++		 * @addr: the base address of the new mapping
++		 */
++		u64 addr;
++
++		/**
++		 * @range: the range of the new mapping
++		 */
++		u64 range;
++	} va;
++
++	/**
++	 * @gem: structure containing the &drm_gem_object and it's offset
++	 */
++	struct {
++		/**
++		 * @offset: the offset within the &drm_gem_object
++		 */
++		u64 offset;
++
++		/**
++		 * @obj: the &drm_gem_object to map
++		 */
++		struct drm_gem_object *obj;
++	} gem;
++};
++
++/**
++ * struct drm_gpuva_op_unmap - GPU VA unmap operation
++ *
++ * This structure represents a single unmap operation generated by the
++ * DRM GPU VA manager.
++ */
++struct drm_gpuva_op_unmap {
++	/**
++	 * @va: the &drm_gpuva to unmap
++	 */
++	struct drm_gpuva *va;
++
++	/**
++	 * @keep:
++	 *
++	 * Indicates whether this &drm_gpuva is physically contiguous with the
++	 * original mapping request.
++	 *
++	 * Optionally, if &keep is set, drivers may keep the actual page table
++	 * mappings for this &drm_gpuva, adding the missing page table entries
++	 * only and update the &drm_gpuvm accordingly.
++	 */
++	bool keep;
++};
++
++/**
++ * struct drm_gpuva_op_remap - GPU VA remap operation
++ *
++ * This represents a single remap operation generated by the DRM GPU VA manager.
++ *
++ * A remap operation is generated when an existing GPU VA mmapping is split up
++ * by inserting a new GPU VA mapping or by partially unmapping existent
++ * mapping(s), hence it consists of a maximum of two map and one unmap
++ * operation.
++ *
++ * The @unmap operation takes care of removing the original existing mapping.
++ * @prev is used to remap the preceding part, @next the subsequent part.
++ *
++ * If either a new mapping's start address is aligned with the start address
++ * of the old mapping or the new mapping's end address is aligned with the
++ * end address of the old mapping, either @prev or @next is NULL.
++ *
++ * Note, the reason for a dedicated remap operation, rather than arbitrary
++ * unmap and map operations, is to give drivers the chance of extracting driver
++ * specific data for creating the new mappings from the unmap operations's
++ * &drm_gpuva structure which typically is embedded in larger driver specific
++ * structures.
++ */
++struct drm_gpuva_op_remap {
++	/**
++	 * @prev: the preceding part of a split mapping
++	 */
++	struct drm_gpuva_op_map *prev;
++
++	/**
++	 * @next: the subsequent part of a split mapping
++	 */
++	struct drm_gpuva_op_map *next;
++
++	/**
++	 * @unmap: the unmap operation for the original existing mapping
++	 */
++	struct drm_gpuva_op_unmap *unmap;
++};
++
++/**
++ * struct drm_gpuva_op_prefetch - GPU VA prefetch operation
++ *
++ * This structure represents a single prefetch operation generated by the
++ * DRM GPU VA manager.
++ */
++struct drm_gpuva_op_prefetch {
++	/**
++	 * @va: the &drm_gpuva to prefetch
++	 */
++	struct drm_gpuva *va;
++};
++
++/**
++ * struct drm_gpuva_op - GPU VA operation
++ *
++ * This structure represents a single generic operation.
++ *
++ * The particular type of the operation is defined by @op.
++ */
++struct drm_gpuva_op {
++	/**
++	 * @entry:
++	 *
++	 * The &list_head used to distribute instances of this struct within
++	 * &drm_gpuva_ops.
++	 */
++	struct list_head entry;
++
++	/**
++	 * @op: the type of the operation
++	 */
++	enum drm_gpuva_op_type op;
++
++	union {
++		/**
++		 * @map: the map operation
++		 */
++		struct drm_gpuva_op_map map;
++
++		/**
++		 * @remap: the remap operation
++		 */
++		struct drm_gpuva_op_remap remap;
++
++		/**
++		 * @unmap: the unmap operation
++		 */
++		struct drm_gpuva_op_unmap unmap;
++
++		/**
++		 * @prefetch: the prefetch operation
++		 */
++		struct drm_gpuva_op_prefetch prefetch;
++	};
++};
++
++/**
++ * struct drm_gpuva_ops - wraps a list of &drm_gpuva_op
++ */
++struct drm_gpuva_ops {
++	/**
++	 * @list: the &list_head
++	 */
++	struct list_head list;
++};
++
++/**
++ * drm_gpuva_for_each_op() - iterator to walk over &drm_gpuva_ops
++ * @op: &drm_gpuva_op to assign in each iteration step
++ * @ops: &drm_gpuva_ops to walk
++ *
++ * This iterator walks over all ops within a given list of operations.
++ */
++#define drm_gpuva_for_each_op(op, ops) list_for_each_entry(op, &(ops)->list, entry)
++
++/**
++ * drm_gpuva_for_each_op_safe() - iterator to safely walk over &drm_gpuva_ops
++ * @op: &drm_gpuva_op to assign in each iteration step
++ * @next: &next &drm_gpuva_op to store the next step
++ * @ops: &drm_gpuva_ops to walk
++ *
++ * This iterator walks over all ops within a given list of operations. It is
++ * implemented with list_for_each_safe(), so save against removal of elements.
++ */
++#define drm_gpuva_for_each_op_safe(op, next, ops) \
++	list_for_each_entry_safe(op, next, &(ops)->list, entry)
++
++/**
++ * drm_gpuva_for_each_op_from_reverse() - iterate backwards from the given point
++ * @op: &drm_gpuva_op to assign in each iteration step
++ * @ops: &drm_gpuva_ops to walk
++ *
++ * This iterator walks over all ops within a given list of operations beginning
++ * from the given operation in reverse order.
++ */
++#define drm_gpuva_for_each_op_from_reverse(op, ops) \
++	list_for_each_entry_from_reverse(op, &(ops)->list, entry)
++
++/**
++ * drm_gpuva_first_op() - returns the first &drm_gpuva_op from &drm_gpuva_ops
++ * @ops: the &drm_gpuva_ops to get the fist &drm_gpuva_op from
++ */
++#define drm_gpuva_first_op(ops) \
++	list_first_entry(&(ops)->list, struct drm_gpuva_op, entry)
++
++/**
++ * drm_gpuva_last_op() - returns the last &drm_gpuva_op from &drm_gpuva_ops
++ * @ops: the &drm_gpuva_ops to get the last &drm_gpuva_op from
++ */
++#define drm_gpuva_last_op(ops) \
++	list_last_entry(&(ops)->list, struct drm_gpuva_op, entry)
++
++/**
++ * drm_gpuva_prev_op() - previous &drm_gpuva_op in the list
++ * @op: the current &drm_gpuva_op
++ */
++#define drm_gpuva_prev_op(op) list_prev_entry(op, entry)
++
++/**
++ * drm_gpuva_next_op() - next &drm_gpuva_op in the list
++ * @op: the current &drm_gpuva_op
++ */
++#define drm_gpuva_next_op(op) list_next_entry(op, entry)
++
++struct drm_gpuva_ops *
++drm_gpuvm_sm_map_ops_create(struct drm_gpuvm *gpuvm,
++			    u64 addr, u64 range,
++			    struct drm_gem_object *obj, u64 offset);
++struct drm_gpuva_ops *
++drm_gpuvm_sm_unmap_ops_create(struct drm_gpuvm *gpuvm,
++			      u64 addr, u64 range);
++
++struct drm_gpuva_ops *
++drm_gpuvm_prefetch_ops_create(struct drm_gpuvm *gpuvm,
++				 u64 addr, u64 range);
++
++struct drm_gpuva_ops *
++drm_gpuvm_gem_unmap_ops_create(struct drm_gpuvm *gpuvm,
++			       struct drm_gem_object *obj);
++
++void drm_gpuva_ops_free(struct drm_gpuvm *gpuvm,
++			struct drm_gpuva_ops *ops);
++
++static inline void drm_gpuva_init_from_op(struct drm_gpuva *va,
++					  struct drm_gpuva_op_map *op)
++{
++	drm_gpuva_init(va, op->va.addr, op->va.range,
++		       op->gem.obj, op->gem.offset);
++}
++
++/**
++ * struct drm_gpuvm_ops - callbacks for split/merge steps
++ *
++ * This structure defines the callbacks used by &drm_gpuvm_sm_map and
++ * &drm_gpuvm_sm_unmap to provide the split/merge steps for map and unmap
++ * operations to drivers.
++ */
++struct drm_gpuvm_ops {
++	/**
++	 * @op_alloc: called when the &drm_gpuvm allocates
++	 * a struct drm_gpuva_op
++	 *
++	 * Some drivers may want to embed struct drm_gpuva_op into driver
++	 * specific structures. By implementing this callback drivers can
++	 * allocate memory accordingly.
++	 *
++	 * This callback is optional.
++	 */
++	struct drm_gpuva_op *(*op_alloc)(void);
++
++	/**
++	 * @op_free: called when the &drm_gpuvm frees a
++	 * struct drm_gpuva_op
++	 *
++	 * Some drivers may want to embed struct drm_gpuva_op into driver
++	 * specific structures. By implementing this callback drivers can
++	 * free the previously allocated memory accordingly.
++	 *
++	 * This callback is optional.
++	 */
++	void (*op_free)(struct drm_gpuva_op *op);
++
++	/**
++	 * @sm_step_map: called from &drm_gpuvm_sm_map to finally insert the
++	 * mapping once all previous steps were completed
++	 *
++	 * The &priv pointer matches the one the driver passed to
++	 * &drm_gpuvm_sm_map or &drm_gpuvm_sm_unmap, respectively.
++	 *
++	 * Can be NULL if &drm_gpuvm_sm_map is used.
++	 */
++	int (*sm_step_map)(struct drm_gpuva_op *op, void *priv);
++
++	/**
++	 * @sm_step_remap: called from &drm_gpuvm_sm_map and
++	 * &drm_gpuvm_sm_unmap to split up an existent mapping
++	 *
++	 * This callback is called when existent mapping needs to be split up.
++	 * This is the case when either a newly requested mapping overlaps or
++	 * is enclosed by an existent mapping or a partial unmap of an existent
++	 * mapping is requested.
++	 *
++	 * The &priv pointer matches the one the driver passed to
++	 * &drm_gpuvm_sm_map or &drm_gpuvm_sm_unmap, respectively.
++	 *
++	 * Can be NULL if neither &drm_gpuvm_sm_map nor &drm_gpuvm_sm_unmap is
++	 * used.
++	 */
++	int (*sm_step_remap)(struct drm_gpuva_op *op, void *priv);
++
++	/**
++	 * @sm_step_unmap: called from &drm_gpuvm_sm_map and
++	 * &drm_gpuvm_sm_unmap to unmap an existent mapping
++	 *
++	 * This callback is called when existent mapping needs to be unmapped.
++	 * This is the case when either a newly requested mapping encloses an
++	 * existent mapping or an unmap of an existent mapping is requested.
++	 *
++	 * The &priv pointer matches the one the driver passed to
++	 * &drm_gpuvm_sm_map or &drm_gpuvm_sm_unmap, respectively.
++	 *
++	 * Can be NULL if neither &drm_gpuvm_sm_map nor &drm_gpuvm_sm_unmap is
++	 * used.
++	 */
++	int (*sm_step_unmap)(struct drm_gpuva_op *op, void *priv);
++};
++
++int drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, void *priv,
++		     u64 addr, u64 range,
++		     struct drm_gem_object *obj, u64 offset);
++
++int drm_gpuvm_sm_unmap(struct drm_gpuvm *gpuvm, void *priv,
++		       u64 addr, u64 range);
++
++void drm_gpuva_map(struct drm_gpuvm *gpuvm,
++		   struct drm_gpuva *va,
++		   struct drm_gpuva_op_map *op);
++
++void drm_gpuva_remap(struct drm_gpuva *prev,
++		     struct drm_gpuva *next,
++		     struct drm_gpuva_op_remap *op);
++
++void drm_gpuva_unmap(struct drm_gpuva_op_unmap *op);
++
++#endif /* __DRM_GPUVM_H__ */
diff --git a/patches-6.6/034-02-v6.7-drm-gpuvm-allow-building-as-module.patch b/patches-6.6/034-02-v6.7-drm-gpuvm-allow-building-as-module.patch
new file mode 100644
index 0000000..6dba2d3
--- /dev/null
+++ b/patches-6.6/034-02-v6.7-drm-gpuvm-allow-building-as-module.patch
@@ -0,0 +1,78 @@
+From fe7acaa727e135621c062caa2d6d3ad4ad0b0185 Mon Sep 17 00:00:00 2001
+From: Danilo Krummrich <dakr@redhat.com>
+Date: Wed, 20 Sep 2023 16:42:35 +0200
+Subject: [PATCH] drm/gpuvm: allow building as module
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Currently, the DRM GPUVM does not have any core dependencies preventing
+a module build.
+
+Also, new features from subsequent patches require helpers (namely
+drm_exec) which can be built as module.
+
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Reviewed-by: Dave Airlie <airlied@redhat.com>
+Signed-off-by: Danilo Krummrich <dakr@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230920144343.64830-3-dakr@redhat.com
+---
+ drivers/gpu/drm/Kconfig         | 7 +++++++
+ drivers/gpu/drm/Makefile        | 2 +-
+ drivers/gpu/drm/drm_gpuvm.c     | 3 +++
+ drivers/gpu/drm/nouveau/Kconfig | 1 +
+ 4 files changed, 12 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/Kconfig
++++ b/drivers/gpu/drm/Kconfig
+@@ -217,6 +217,13 @@ config DRM_EXEC
+ 	help
+ 	  Execution context for command submissions
+ 
++config DRM_GPUVM
++	tristate
++	depends on DRM
++	help
++	  GPU-VM representation providing helpers to manage a GPUs virtual
++	  address space
++
+ config DRM_BUDDY
+ 	tristate
+ 	depends on DRM
+--- a/drivers/gpu/drm/Makefile
++++ b/drivers/gpu/drm/Makefile
+@@ -45,7 +45,6 @@ drm-y := \
+ 	drm_vblank.o \
+ 	drm_vblank_work.o \
+ 	drm_vma_manager.o \
+-	drm_gpuvm.o \
+ 	drm_writeback.o
+ drm-$(CONFIG_DRM_LEGACY) += \
+ 	drm_agpsupport.o \
+@@ -81,6 +80,7 @@ obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRK
+ #
+ #
+ obj-$(CONFIG_DRM_EXEC) += drm_exec.o
++obj-$(CONFIG_DRM_GPUVM) += drm_gpuvm.o
+ 
+ obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o
+ 
+--- a/drivers/gpu/drm/drm_gpuvm.c
++++ b/drivers/gpu/drm/drm_gpuvm.c
+@@ -1721,3 +1721,6 @@ drm_gpuva_ops_free(struct drm_gpuvm *gpu
+ 	kfree(ops);
+ }
+ EXPORT_SYMBOL_GPL(drm_gpuva_ops_free);
++
++MODULE_DESCRIPTION("DRM GPUVM");
++MODULE_LICENSE("GPL");
+--- a/drivers/gpu/drm/nouveau/Kconfig
++++ b/drivers/gpu/drm/nouveau/Kconfig
+@@ -11,6 +11,7 @@ config DRM_NOUVEAU
+ 	select DRM_TTM
+ 	select DRM_TTM_HELPER
+ 	select DRM_EXEC
++	select DRM_GPUVM
+ 	select DRM_SCHED
+ 	select I2C
+ 	select I2C_ALGOBIT
diff --git a/patches-6.6/034-03-v6.7-drm-nouveau-uvmm-rename-umgr-to-base-.patch b/patches-6.6/034-03-v6.7-drm-nouveau-uvmm-rename-umgr-to-base-.patch
new file mode 100644
index 0000000..8576238
--- /dev/null
+++ b/patches-6.6/034-03-v6.7-drm-nouveau-uvmm-rename-umgr-to-base-.patch
@@ -0,0 +1,208 @@
+From 78f54469b871db5ba8ea49abd4e5994e97bd525b Mon Sep 17 00:00:00 2001
+From: Danilo Krummrich <dakr@redhat.com>
+Date: Wed, 20 Sep 2023 16:42:36 +0200
+Subject: [PATCH] drm/nouveau: uvmm: rename 'umgr' to 'base'
+
+Rename struct drm_gpuvm within struct nouveau_uvmm from 'umgr' to base.
+
+Reviewed-by: Dave Airlie <airlied@redhat.com>
+Signed-off-by: Danilo Krummrich <dakr@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230920144343.64830-4-dakr@redhat.com
+---
+ drivers/gpu/drm/nouveau/nouveau_debugfs.c |  2 +-
+ drivers/gpu/drm/nouveau/nouveau_exec.c    |  4 +--
+ drivers/gpu/drm/nouveau/nouveau_uvmm.c    | 32 +++++++++++------------
+ drivers/gpu/drm/nouveau/nouveau_uvmm.h    |  6 ++---
+ 4 files changed, 22 insertions(+), 22 deletions(-)
+
+--- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c
++++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
+@@ -231,7 +231,7 @@ nouveau_debugfs_gpuva(struct seq_file *m
+ 			continue;
+ 
+ 		nouveau_uvmm_lock(uvmm);
+-		drm_debugfs_gpuva_info(m, &uvmm->umgr);
++		drm_debugfs_gpuva_info(m, &uvmm->base);
+ 		seq_puts(m, "\n");
+ 		nouveau_debugfs_gpuva_regions(m, uvmm);
+ 		nouveau_uvmm_unlock(uvmm);
+--- a/drivers/gpu/drm/nouveau/nouveau_exec.c
++++ b/drivers/gpu/drm/nouveau/nouveau_exec.c
+@@ -107,8 +107,8 @@ nouveau_exec_job_submit(struct nouveau_j
+ 	drm_exec_until_all_locked(exec) {
+ 		struct drm_gpuva *va;
+ 
+-		drm_gpuvm_for_each_va(va, &uvmm->umgr) {
+-			if (unlikely(va == &uvmm->umgr.kernel_alloc_node))
++		drm_gpuvm_for_each_va(va, &uvmm->base) {
++			if (unlikely(va == &uvmm->base.kernel_alloc_node))
+ 				continue;
+ 
+ 			ret = drm_exec_prepare_obj(exec, va->gem.obj, 1);
+--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+@@ -329,7 +329,7 @@ nouveau_uvma_region_create(struct nouvea
+ 	struct nouveau_uvma_region *reg;
+ 	int ret;
+ 
+-	if (!drm_gpuvm_interval_empty(&uvmm->umgr, addr, range))
++	if (!drm_gpuvm_interval_empty(&uvmm->base, addr, range))
+ 		return -ENOSPC;
+ 
+ 	ret = nouveau_uvma_region_alloc(&reg);
+@@ -384,7 +384,7 @@ nouveau_uvma_region_empty(struct nouveau
+ {
+ 	struct nouveau_uvmm *uvmm = reg->uvmm;
+ 
+-	return drm_gpuvm_interval_empty(&uvmm->umgr,
++	return drm_gpuvm_interval_empty(&uvmm->base,
+ 					reg->va.addr,
+ 					reg->va.range);
+ }
+@@ -589,7 +589,7 @@ op_map_prepare(struct nouveau_uvmm *uvmm
+ 	uvma->region = args->region;
+ 	uvma->kind = args->kind;
+ 
+-	drm_gpuva_map(&uvmm->umgr, &uvma->va, op);
++	drm_gpuva_map(&uvmm->base, &uvma->va, op);
+ 
+ 	/* Keep a reference until this uvma is destroyed. */
+ 	nouveau_uvma_gem_get(uvma);
+@@ -1194,7 +1194,7 @@ nouveau_uvmm_bind_job_submit(struct nouv
+ 				goto unwind_continue;
+ 			}
+ 
+-			op->ops = drm_gpuvm_sm_unmap_ops_create(&uvmm->umgr,
++			op->ops = drm_gpuvm_sm_unmap_ops_create(&uvmm->base,
+ 								op->va.addr,
+ 								op->va.range);
+ 			if (IS_ERR(op->ops)) {
+@@ -1205,7 +1205,7 @@ nouveau_uvmm_bind_job_submit(struct nouv
+ 			ret = nouveau_uvmm_sm_unmap_prepare(uvmm, &op->new,
+ 							    op->ops);
+ 			if (ret) {
+-				drm_gpuva_ops_free(&uvmm->umgr, op->ops);
++				drm_gpuva_ops_free(&uvmm->base, op->ops);
+ 				op->ops = NULL;
+ 				op->reg = NULL;
+ 				goto unwind_continue;
+@@ -1240,7 +1240,7 @@ nouveau_uvmm_bind_job_submit(struct nouv
+ 				}
+ 			}
+ 
+-			op->ops = drm_gpuvm_sm_map_ops_create(&uvmm->umgr,
++			op->ops = drm_gpuvm_sm_map_ops_create(&uvmm->base,
+ 							      op->va.addr,
+ 							      op->va.range,
+ 							      op->gem.obj,
+@@ -1256,7 +1256,7 @@ nouveau_uvmm_bind_job_submit(struct nouv
+ 							  op->va.range,
+ 							  op->flags & 0xff);
+ 			if (ret) {
+-				drm_gpuva_ops_free(&uvmm->umgr, op->ops);
++				drm_gpuva_ops_free(&uvmm->base, op->ops);
+ 				op->ops = NULL;
+ 				goto unwind_continue;
+ 			}
+@@ -1264,7 +1264,7 @@ nouveau_uvmm_bind_job_submit(struct nouv
+ 			break;
+ 		}
+ 		case OP_UNMAP:
+-			op->ops = drm_gpuvm_sm_unmap_ops_create(&uvmm->umgr,
++			op->ops = drm_gpuvm_sm_unmap_ops_create(&uvmm->base,
+ 								op->va.addr,
+ 								op->va.range);
+ 			if (IS_ERR(op->ops)) {
+@@ -1275,7 +1275,7 @@ nouveau_uvmm_bind_job_submit(struct nouv
+ 			ret = nouveau_uvmm_sm_unmap_prepare(uvmm, &op->new,
+ 							    op->ops);
+ 			if (ret) {
+-				drm_gpuva_ops_free(&uvmm->umgr, op->ops);
++				drm_gpuva_ops_free(&uvmm->base, op->ops);
+ 				op->ops = NULL;
+ 				goto unwind_continue;
+ 			}
+@@ -1404,7 +1404,7 @@ unwind:
+ 			break;
+ 		}
+ 
+-		drm_gpuva_ops_free(&uvmm->umgr, op->ops);
++		drm_gpuva_ops_free(&uvmm->base, op->ops);
+ 		op->ops = NULL;
+ 		op->reg = NULL;
+ 	}
+@@ -1509,7 +1509,7 @@ nouveau_uvmm_bind_job_free_work_fn(struc
+ 		}
+ 
+ 		if (!IS_ERR_OR_NULL(op->ops))
+-			drm_gpuva_ops_free(&uvmm->umgr, op->ops);
++			drm_gpuva_ops_free(&uvmm->base, op->ops);
+ 
+ 		if (obj)
+ 			drm_gem_object_put(obj);
+@@ -1836,7 +1836,7 @@ nouveau_uvmm_init(struct nouveau_uvmm *u
+ 	uvmm->kernel_managed_addr = kernel_managed_addr;
+ 	uvmm->kernel_managed_size = kernel_managed_size;
+ 
+-	drm_gpuvm_init(&uvmm->umgr, cli->name,
++	drm_gpuvm_init(&uvmm->base, cli->name,
+ 		       NOUVEAU_VA_SPACE_START,
+ 		       NOUVEAU_VA_SPACE_END,
+ 		       kernel_managed_addr, kernel_managed_size,
+@@ -1855,7 +1855,7 @@ nouveau_uvmm_init(struct nouveau_uvmm *u
+ 	return 0;
+ 
+ out_free_gpuva_mgr:
+-	drm_gpuvm_destroy(&uvmm->umgr);
++	drm_gpuvm_destroy(&uvmm->base);
+ out_unlock:
+ 	mutex_unlock(&cli->mutex);
+ 	return ret;
+@@ -1877,11 +1877,11 @@ nouveau_uvmm_fini(struct nouveau_uvmm *u
+ 	wait_event(entity->job.wq, list_empty(&entity->job.list.head));
+ 
+ 	nouveau_uvmm_lock(uvmm);
+-	drm_gpuvm_for_each_va_safe(va, next, &uvmm->umgr) {
++	drm_gpuvm_for_each_va_safe(va, next, &uvmm->base) {
+ 		struct nouveau_uvma *uvma = uvma_from_va(va);
+ 		struct drm_gem_object *obj = va->gem.obj;
+ 
+-		if (unlikely(va == &uvmm->umgr.kernel_alloc_node))
++		if (unlikely(va == &uvmm->base.kernel_alloc_node))
+ 			continue;
+ 
+ 		drm_gpuva_remove(va);
+@@ -1910,7 +1910,7 @@ nouveau_uvmm_fini(struct nouveau_uvmm *u
+ 
+ 	mutex_lock(&cli->mutex);
+ 	nouveau_vmm_fini(&uvmm->vmm);
+-	drm_gpuvm_destroy(&uvmm->umgr);
++	drm_gpuvm_destroy(&uvmm->base);
+ 	mutex_unlock(&cli->mutex);
+ 
+ 	dma_resv_fini(&uvmm->resv);
+--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.h
++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.h
+@@ -8,8 +8,8 @@
+ #include "nouveau_drv.h"
+ 
+ struct nouveau_uvmm {
++	struct drm_gpuvm base;
+ 	struct nouveau_vmm vmm;
+-	struct drm_gpuvm umgr;
+ 	struct maple_tree region_mt;
+ 	struct mutex mutex;
+ 	struct dma_resv resv;
+@@ -41,10 +41,10 @@ struct nouveau_uvma {
+ 	u8 kind;
+ };
+ 
+-#define uvmm_from_mgr(x) container_of((x), struct nouveau_uvmm, umgr)
++#define uvmm_from_gpuvm(x) container_of((x), struct nouveau_uvmm, base)
+ #define uvma_from_va(x) container_of((x), struct nouveau_uvma, va)
+ 
+-#define to_uvmm(x) uvmm_from_mgr((x)->va.vm)
++#define to_uvmm(x) uvmm_from_gpuvm((x)->va.vm)
+ 
+ struct nouveau_uvmm_bind_job {
+ 	struct nouveau_job base;
diff --git a/patches-6.6/034-04-v6.7-drm-gpuvm-Dual-licence-the-drm_gpuvm-code-GPL-2.0-OR-MIT.patch b/patches-6.6/034-04-v6.7-drm-gpuvm-Dual-licence-the-drm_gpuvm-code-GPL-2.0-OR-MIT.patch
new file mode 100644
index 0000000..c1b38ad
--- /dev/null
+++ b/patches-6.6/034-04-v6.7-drm-gpuvm-Dual-licence-the-drm_gpuvm-code-GPL-2.0-OR-MIT.patch
@@ -0,0 +1,45 @@
+From f7749a549b4f4db0c02e6b3d3800ea400dd76c12 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
+Date: Tue, 10 Oct 2023 16:27:24 +0200
+Subject: [PATCH] drm/gpuvm: Dual-licence the drm_gpuvm code GPL-2.0 OR MIT
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Dual-licence in order to make it possible for other non-GPL os'es
+to re-implement the code. The use of EXPORT_SYMBOL_GPL() is intentionally
+left untouched to prevent use of drm_gpuvm as a proxy for non-GPL drivers
+to access GPL-only kernel symbols.
+
+Much of the ideas and algorithms used in the drm_gpuvm code is already
+present in one way or another in MIT-licensed code.
+
+Cc: Danilo Krummrich <dakr@redhat.com>
+Cc: airlied@gmail.com
+Cc: daniel@ffwll.ch
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
+Acked-by: Danilo Krummrich <dakr@redhat.com>
+Reviewed-by: Francois Dugast <francois.dugast@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231010142725.8920-1-thomas.hellstrom@linux.intel.com
+---
+ drivers/gpu/drm/drm_gpuvm.c | 2 +-
+ include/drm/drm_gpuvm.h     | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/drm_gpuvm.c
++++ b/drivers/gpu/drm/drm_gpuvm.c
+@@ -1,4 +1,4 @@
+-// SPDX-License-Identifier: GPL-2.0-only
++// SPDX-License-Identifier: GPL-2.0 OR MIT
+ /*
+  * Copyright (c) 2022 Red Hat.
+  *
+--- a/include/drm/drm_gpuvm.h
++++ b/include/drm/drm_gpuvm.h
+@@ -1,4 +1,4 @@
+-/* SPDX-License-Identifier: GPL-2.0-only */
++/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+ 
+ #ifndef __DRM_GPUVM_H__
+ #define __DRM_GPUVM_H__
diff --git a/patches-6.6/034-05-v6.8-drm-gpuvm-convert-WARN-to-drm_WARN-variants.patch b/patches-6.6/034-05-v6.8-drm-gpuvm-convert-WARN-to-drm_WARN-variants.patch
new file mode 100644
index 0000000..64e1719
--- /dev/null
+++ b/patches-6.6/034-05-v6.8-drm-gpuvm-convert-WARN-to-drm_WARN-variants.patch
@@ -0,0 +1,165 @@
+From 546ca4d35dccaca6613766ed36ccfb2b5bd63bfe Mon Sep 17 00:00:00 2001
+From: Danilo Krummrich <dakr@redhat.com>
+Date: Wed, 8 Nov 2023 01:12:31 +0100
+Subject: [PATCH] drm/gpuvm: convert WARN() to drm_WARN() variants
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Use drm_WARN() and drm_WARN_ON() variants to indicate drivers the
+context the failing VM resides in.
+
+Acked-by: Christian König <christian.koenig@amd.com>
+Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
+Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
+Signed-off-by: Danilo Krummrich <dakr@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-2-dakr@redhat.com
+---
+ drivers/gpu/drm/drm_gpuvm.c            | 32 ++++++++++++++------------
+ drivers/gpu/drm/nouveau/nouveau_uvmm.c |  3 ++-
+ include/drm/drm_gpuvm.h                |  7 ++++++
+ 3 files changed, 26 insertions(+), 16 deletions(-)
+
+--- a/drivers/gpu/drm/drm_gpuvm.c
++++ b/drivers/gpu/drm/drm_gpuvm.c
+@@ -614,12 +614,12 @@ static int __drm_gpuva_insert(struct drm
+ static void __drm_gpuva_remove(struct drm_gpuva *va);
+ 
+ static bool
+-drm_gpuvm_check_overflow(u64 addr, u64 range)
++drm_gpuvm_check_overflow(struct drm_gpuvm *gpuvm, u64 addr, u64 range)
+ {
+ 	u64 end;
+ 
+-	return WARN(check_add_overflow(addr, range, &end),
+-		    "GPUVA address limited to %zu bytes.\n", sizeof(end));
++	return drm_WARN(gpuvm->drm, check_add_overflow(addr, range, &end),
++			"GPUVA address limited to %zu bytes.\n", sizeof(end));
+ }
+ 
+ static bool
+@@ -647,7 +647,7 @@ static bool
+ drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm,
+ 		      u64 addr, u64 range)
+ {
+-	return !drm_gpuvm_check_overflow(addr, range) &&
++	return !drm_gpuvm_check_overflow(gpuvm, addr, range) &&
+ 	       drm_gpuvm_in_mm_range(gpuvm, addr, range) &&
+ 	       !drm_gpuvm_in_kernel_node(gpuvm, addr, range);
+ }
+@@ -656,6 +656,7 @@ drm_gpuvm_range_valid(struct drm_gpuvm *
+  * drm_gpuvm_init() - initialize a &drm_gpuvm
+  * @gpuvm: pointer to the &drm_gpuvm to initialize
+  * @name: the name of the GPU VA space
++ * @drm: the &drm_device this VM resides in
+  * @start_offset: the start offset of the GPU VA space
+  * @range: the size of the GPU VA space
+  * @reserve_offset: the start of the kernel reserved GPU VA area
+@@ -668,8 +669,8 @@ drm_gpuvm_range_valid(struct drm_gpuvm *
+  * &name is expected to be managed by the surrounding driver structures.
+  */
+ void
+-drm_gpuvm_init(struct drm_gpuvm *gpuvm,
+-	       const char *name,
++drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
++	       struct drm_device *drm,
+ 	       u64 start_offset, u64 range,
+ 	       u64 reserve_offset, u64 reserve_range,
+ 	       const struct drm_gpuvm_ops *ops)
+@@ -677,20 +678,20 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm,
+ 	gpuvm->rb.tree = RB_ROOT_CACHED;
+ 	INIT_LIST_HEAD(&gpuvm->rb.list);
+ 
+-	drm_gpuvm_check_overflow(start_offset, range);
+-	gpuvm->mm_start = start_offset;
+-	gpuvm->mm_range = range;
+-
+ 	gpuvm->name = name ? name : "unknown";
+ 	gpuvm->ops = ops;
++	gpuvm->drm = drm;
+ 
+-	memset(&gpuvm->kernel_alloc_node, 0, sizeof(struct drm_gpuva));
++	drm_gpuvm_check_overflow(gpuvm, start_offset, range);
++	gpuvm->mm_start = start_offset;
++	gpuvm->mm_range = range;
+ 
++	memset(&gpuvm->kernel_alloc_node, 0, sizeof(struct drm_gpuva));
+ 	if (reserve_range) {
+ 		gpuvm->kernel_alloc_node.va.addr = reserve_offset;
+ 		gpuvm->kernel_alloc_node.va.range = reserve_range;
+ 
+-		if (likely(!drm_gpuvm_check_overflow(reserve_offset,
++		if (likely(!drm_gpuvm_check_overflow(gpuvm, reserve_offset,
+ 						     reserve_range)))
+ 			__drm_gpuva_insert(gpuvm, &gpuvm->kernel_alloc_node);
+ 	}
+@@ -712,8 +713,8 @@ drm_gpuvm_destroy(struct drm_gpuvm *gpuv
+ 	if (gpuvm->kernel_alloc_node.va.range)
+ 		__drm_gpuva_remove(&gpuvm->kernel_alloc_node);
+ 
+-	WARN(!RB_EMPTY_ROOT(&gpuvm->rb.tree.rb_root),
+-	     "GPUVA tree is not empty, potentially leaking memory.");
++	drm_WARN(gpuvm->drm, !RB_EMPTY_ROOT(&gpuvm->rb.tree.rb_root),
++		 "GPUVA tree is not empty, potentially leaking memory.\n");
+ }
+ EXPORT_SYMBOL_GPL(drm_gpuvm_destroy);
+ 
+@@ -795,7 +796,8 @@ drm_gpuva_remove(struct drm_gpuva *va)
+ 	struct drm_gpuvm *gpuvm = va->vm;
+ 
+ 	if (unlikely(va == &gpuvm->kernel_alloc_node)) {
+-		WARN(1, "Can't destroy kernel reserved node.\n");
++		drm_WARN(gpuvm->drm, 1,
++			 "Can't destroy kernel reserved node.\n");
+ 		return;
+ 	}
+ 
+--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+@@ -1808,6 +1808,7 @@ int
+ nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli,
+ 		  u64 kernel_managed_addr, u64 kernel_managed_size)
+ {
++	struct drm_device *drm = cli->drm->dev;
+ 	int ret;
+ 	u64 kernel_managed_end = kernel_managed_addr + kernel_managed_size;
+ 
+@@ -1836,7 +1837,7 @@ nouveau_uvmm_init(struct nouveau_uvmm *u
+ 	uvmm->kernel_managed_addr = kernel_managed_addr;
+ 	uvmm->kernel_managed_size = kernel_managed_size;
+ 
+-	drm_gpuvm_init(&uvmm->base, cli->name,
++	drm_gpuvm_init(&uvmm->base, cli->name, drm,
+ 		       NOUVEAU_VA_SPACE_START,
+ 		       NOUVEAU_VA_SPACE_END,
+ 		       kernel_managed_addr, kernel_managed_size,
+--- a/include/drm/drm_gpuvm.h
++++ b/include/drm/drm_gpuvm.h
+@@ -29,6 +29,7 @@
+ #include <linux/rbtree.h>
+ #include <linux/types.h>
+ 
++#include <drm/drm_device.h>
+ #include <drm/drm_gem.h>
+ 
+ struct drm_gpuvm;
+@@ -202,6 +203,11 @@ struct drm_gpuvm {
+ 	const char *name;
+ 
+ 	/**
++	 * @drm: the &drm_device this VM lives in
++	 */
++	struct drm_device *drm;
++
++	/**
+ 	 * @mm_start: start of the VA space
+ 	 */
+ 	u64 mm_start;
+@@ -241,6 +247,7 @@ struct drm_gpuvm {
+ };
+ 
+ void drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
++		    struct drm_device *drm,
+ 		    u64 start_offset, u64 range,
+ 		    u64 reserve_offset, u64 reserve_range,
+ 		    const struct drm_gpuvm_ops *ops);
diff --git a/patches-6.6/034-06-v6.8-drm-gpuvm-export-drm_gpuvm_range_valid.patch b/patches-6.6/034-06-v6.8-drm-gpuvm-export-drm_gpuvm_range_valid.patch
new file mode 100644
index 0000000..cead71a
--- /dev/null
+++ b/patches-6.6/034-06-v6.8-drm-gpuvm-export-drm_gpuvm_range_valid.patch
@@ -0,0 +1,61 @@
+From 9297cfc9405bc6b60540b8b8aaf930b7e449e15a Mon Sep 17 00:00:00 2001
+From: Danilo Krummrich <dakr@redhat.com>
+Date: Wed, 8 Nov 2023 01:12:33 +0100
+Subject: [PATCH] drm/gpuvm: export drm_gpuvm_range_valid()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Drivers may use this function to validate userspace requests in advance,
+hence export it.
+
+Acked-by: Christian König <christian.koenig@amd.com>
+Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
+Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
+Signed-off-by: Danilo Krummrich <dakr@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-4-dakr@redhat.com
+---
+ drivers/gpu/drm/drm_gpuvm.c | 14 +++++++++++++-
+ include/drm/drm_gpuvm.h     |  1 +
+ 2 files changed, 14 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/drm_gpuvm.c
++++ b/drivers/gpu/drm/drm_gpuvm.c
+@@ -643,7 +643,18 @@ drm_gpuvm_in_kernel_node(struct drm_gpuv
+ 	return krange && addr < kend && kstart < end;
+ }
+ 
+-static bool
++/**
++ * drm_gpuvm_range_valid() - checks whether the given range is valid for the
++ * given &drm_gpuvm
++ * @gpuvm: the GPUVM to check the range for
++ * @addr: the base address
++ * @range: the range starting from the base address
++ *
++ * Checks whether the range is within the GPUVM's managed boundaries.
++ *
++ * Returns: true for a valid range, false otherwise
++ */
++bool
+ drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm,
+ 		      u64 addr, u64 range)
+ {
+@@ -651,6 +662,7 @@ drm_gpuvm_range_valid(struct drm_gpuvm *
+ 	       drm_gpuvm_in_mm_range(gpuvm, addr, range) &&
+ 	       !drm_gpuvm_in_kernel_node(gpuvm, addr, range);
+ }
++EXPORT_SYMBOL_GPL(drm_gpuvm_range_valid);
+ 
+ /**
+  * drm_gpuvm_init() - initialize a &drm_gpuvm
+--- a/include/drm/drm_gpuvm.h
++++ b/include/drm/drm_gpuvm.h
+@@ -253,6 +253,7 @@ void drm_gpuvm_init(struct drm_gpuvm *gp
+ 		    const struct drm_gpuvm_ops *ops);
+ void drm_gpuvm_destroy(struct drm_gpuvm *gpuvm);
+ 
++bool drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm, u64 addr, u64 range);
+ bool drm_gpuvm_interval_empty(struct drm_gpuvm *gpuvm, u64 addr, u64 range);
+ 
+ static inline struct drm_gpuva *
diff --git a/patches-6.6/034-07-v6.8-drm-nouveau-make-use-of-drm_gpuvm_range_valid.patch b/patches-6.6/034-07-v6.8-drm-nouveau-make-use-of-drm_gpuvm_range_valid.patch
new file mode 100644
index 0000000..3d790e6
--- /dev/null
+++ b/patches-6.6/034-07-v6.8-drm-nouveau-make-use-of-drm_gpuvm_range_valid.patch
@@ -0,0 +1,66 @@
+From b41e297abd2347075ec640daf0e5da576e3d7418 Mon Sep 17 00:00:00 2001
+From: Danilo Krummrich <dakr@redhat.com>
+Date: Wed, 8 Nov 2023 01:12:34 +0100
+Subject: [PATCH] drm/nouveau: make use of drm_gpuvm_range_valid()
+
+Use drm_gpuvm_range_valid() in order to validate userspace requests.
+
+Reviewed-by: Dave Airlie <airlied@redhat.com>
+Signed-off-by: Danilo Krummrich <dakr@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-5-dakr@redhat.com
+---
+ drivers/gpu/drm/nouveau/nouveau_uvmm.c | 17 +----------------
+ drivers/gpu/drm/nouveau/nouveau_uvmm.h |  3 ---
+ 2 files changed, 1 insertion(+), 19 deletions(-)
+
+--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+@@ -929,25 +929,13 @@ nouveau_uvmm_sm_unmap_cleanup(struct nou
+ static int
+ nouveau_uvmm_validate_range(struct nouveau_uvmm *uvmm, u64 addr, u64 range)
+ {
+-	u64 end = addr + range;
+-	u64 kernel_managed_end = uvmm->kernel_managed_addr +
+-				 uvmm->kernel_managed_size;
+-
+ 	if (addr & ~PAGE_MASK)
+ 		return -EINVAL;
+ 
+ 	if (range & ~PAGE_MASK)
+ 		return -EINVAL;
+ 
+-	if (end <= addr)
+-		return -EINVAL;
+-
+-	if (addr < NOUVEAU_VA_SPACE_START ||
+-	    end > NOUVEAU_VA_SPACE_END)
+-		return -EINVAL;
+-
+-	if (addr < kernel_managed_end &&
+-	    end > uvmm->kernel_managed_addr)
++	if (!drm_gpuvm_range_valid(&uvmm->base, addr, range))
+ 		return -EINVAL;
+ 
+ 	return 0;
+@@ -1834,9 +1822,6 @@ nouveau_uvmm_init(struct nouveau_uvmm *u
+ 		goto out_unlock;
+ 	}
+ 
+-	uvmm->kernel_managed_addr = kernel_managed_addr;
+-	uvmm->kernel_managed_size = kernel_managed_size;
+-
+ 	drm_gpuvm_init(&uvmm->base, cli->name, drm,
+ 		       NOUVEAU_VA_SPACE_START,
+ 		       NOUVEAU_VA_SPACE_END,
+--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.h
++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.h
+@@ -14,9 +14,6 @@ struct nouveau_uvmm {
+ 	struct mutex mutex;
+ 	struct dma_resv resv;
+ 
+-	u64 kernel_managed_addr;
+-	u64 kernel_managed_size;
+-
+ 	bool disabled;
+ };
+ 
diff --git a/patches-6.6/034-08-v6.8-drm-gpuvm-add-common-dma-resv-per-struct-drm_gpuvm.patch b/patches-6.6/034-08-v6.8-drm-gpuvm-add-common-dma-resv-per-struct-drm_gpuvm.patch
new file mode 100644
index 0000000..c562650
--- /dev/null
+++ b/patches-6.6/034-08-v6.8-drm-gpuvm-add-common-dma-resv-per-struct-drm_gpuvm.patch
@@ -0,0 +1,205 @@
+From bbe8458037e74b9887ba2f0f0b8084a13ade3a90 Mon Sep 17 00:00:00 2001
+From: Danilo Krummrich <dakr@redhat.com>
+Date: Wed, 8 Nov 2023 01:12:35 +0100
+Subject: [PATCH] drm/gpuvm: add common dma-resv per struct drm_gpuvm
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Provide a common dma-resv for GEM objects not being used outside of this
+GPU-VM. This is used in a subsequent patch to generalize dma-resv,
+external and evicted object handling and GEM validation.
+
+Acked-by: Christian König <christian.koenig@amd.com>
+Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
+Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
+Signed-off-by: Danilo Krummrich <dakr@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-6-dakr@redhat.com
+---
+ drivers/gpu/drm/drm_gpuvm.c            | 53 ++++++++++++++++++++++++++
+ drivers/gpu/drm/nouveau/nouveau_uvmm.c | 13 ++++++-
+ include/drm/drm_gpuvm.h                | 33 ++++++++++++++++
+ 3 files changed, 97 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/drm_gpuvm.c
++++ b/drivers/gpu/drm/drm_gpuvm.c
+@@ -61,6 +61,15 @@
+  * contained within struct drm_gpuva already. Hence, for inserting &drm_gpuva
+  * entries from within dma-fence signalling critical sections it is enough to
+  * pre-allocate the &drm_gpuva structures.
++ *
++ * &drm_gem_objects which are private to a single VM can share a common
++ * &dma_resv in order to improve locking efficiency (e.g. with &drm_exec).
++ * For this purpose drivers must pass a &drm_gem_object to drm_gpuvm_init(), in
++ * the following called 'resv object', which serves as the container of the
++ * GPUVM's shared &dma_resv. This resv object can be a driver specific
++ * &drm_gem_object, such as the &drm_gem_object containing the root page table,
++ * but it can also be a 'dummy' object, which can be allocated with
++ * drm_gpuvm_resv_object_alloc().
+  */
+ 
+ /**
+@@ -664,11 +673,49 @@ drm_gpuvm_range_valid(struct drm_gpuvm *
+ }
+ EXPORT_SYMBOL_GPL(drm_gpuvm_range_valid);
+ 
++static void
++drm_gpuvm_gem_object_free(struct drm_gem_object *obj)
++{
++	drm_gem_object_release(obj);
++	kfree(obj);
++}
++
++static const struct drm_gem_object_funcs drm_gpuvm_object_funcs = {
++	.free = drm_gpuvm_gem_object_free,
++};
++
++/**
++ * drm_gpuvm_resv_object_alloc() - allocate a dummy &drm_gem_object
++ * @drm: the drivers &drm_device
++ *
++ * Allocates a dummy &drm_gem_object which can be passed to drm_gpuvm_init() in
++ * order to serve as root GEM object providing the &drm_resv shared across
++ * &drm_gem_objects local to a single GPUVM.
++ *
++ * Returns: the &drm_gem_object on success, NULL on failure
++ */
++struct drm_gem_object *
++drm_gpuvm_resv_object_alloc(struct drm_device *drm)
++{
++	struct drm_gem_object *obj;
++
++	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
++	if (!obj)
++		return NULL;
++
++	obj->funcs = &drm_gpuvm_object_funcs;
++	drm_gem_private_object_init(drm, obj, 0);
++
++	return obj;
++}
++EXPORT_SYMBOL_GPL(drm_gpuvm_resv_object_alloc);
++
+ /**
+  * drm_gpuvm_init() - initialize a &drm_gpuvm
+  * @gpuvm: pointer to the &drm_gpuvm to initialize
+  * @name: the name of the GPU VA space
+  * @drm: the &drm_device this VM resides in
++ * @r_obj: the resv &drm_gem_object providing the GPUVM's common &dma_resv
+  * @start_offset: the start offset of the GPU VA space
+  * @range: the size of the GPU VA space
+  * @reserve_offset: the start of the kernel reserved GPU VA area
+@@ -683,6 +730,7 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_range_valid)
+ void
+ drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
+ 	       struct drm_device *drm,
++	       struct drm_gem_object *r_obj,
+ 	       u64 start_offset, u64 range,
+ 	       u64 reserve_offset, u64 reserve_range,
+ 	       const struct drm_gpuvm_ops *ops)
+@@ -693,6 +741,9 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm,
+ 	gpuvm->name = name ? name : "unknown";
+ 	gpuvm->ops = ops;
+ 	gpuvm->drm = drm;
++	gpuvm->r_obj = r_obj;
++
++	drm_gem_object_get(r_obj);
+ 
+ 	drm_gpuvm_check_overflow(gpuvm, start_offset, range);
+ 	gpuvm->mm_start = start_offset;
+@@ -727,6 +778,8 @@ drm_gpuvm_destroy(struct drm_gpuvm *gpuv
+ 
+ 	drm_WARN(gpuvm->drm, !RB_EMPTY_ROOT(&gpuvm->rb.tree.rb_root),
+ 		 "GPUVA tree is not empty, potentially leaking memory.\n");
++
++	drm_gem_object_put(gpuvm->r_obj);
+ }
+ EXPORT_SYMBOL_GPL(drm_gpuvm_destroy);
+ 
+--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+@@ -1797,8 +1797,9 @@ nouveau_uvmm_init(struct nouveau_uvmm *u
+ 		  u64 kernel_managed_addr, u64 kernel_managed_size)
+ {
+ 	struct drm_device *drm = cli->drm->dev;
+-	int ret;
++	struct drm_gem_object *r_obj;
+ 	u64 kernel_managed_end = kernel_managed_addr + kernel_managed_size;
++	int ret;
+ 
+ 	mutex_init(&uvmm->mutex);
+ 	dma_resv_init(&uvmm->resv);
+@@ -1822,11 +1823,19 @@ nouveau_uvmm_init(struct nouveau_uvmm *u
+ 		goto out_unlock;
+ 	}
+ 
+-	drm_gpuvm_init(&uvmm->base, cli->name, drm,
++	r_obj = drm_gpuvm_resv_object_alloc(drm);
++	if (!r_obj) {
++		ret = -ENOMEM;
++		goto out_unlock;
++	}
++
++	drm_gpuvm_init(&uvmm->base, cli->name, drm, r_obj,
+ 		       NOUVEAU_VA_SPACE_START,
+ 		       NOUVEAU_VA_SPACE_END,
+ 		       kernel_managed_addr, kernel_managed_size,
+ 		       NULL);
++	/* GPUVM takes care from here on. */
++	drm_gem_object_put(r_obj);
+ 
+ 	ret = nvif_vmm_ctor(&cli->mmu, "uvmm",
+ 			    cli->vmm.vmm.object.oclass, RAW,
+--- a/include/drm/drm_gpuvm.h
++++ b/include/drm/drm_gpuvm.h
+@@ -244,10 +244,16 @@ struct drm_gpuvm {
+ 	 * @ops: &drm_gpuvm_ops providing the split/merge steps to drivers
+ 	 */
+ 	const struct drm_gpuvm_ops *ops;
++
++	/**
++	 * @r_obj: Resv GEM object; representing the GPUVM's common &dma_resv.
++	 */
++	struct drm_gem_object *r_obj;
+ };
+ 
+ void drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
+ 		    struct drm_device *drm,
++		    struct drm_gem_object *r_obj,
+ 		    u64 start_offset, u64 range,
+ 		    u64 reserve_offset, u64 reserve_range,
+ 		    const struct drm_gpuvm_ops *ops);
+@@ -256,6 +262,33 @@ void drm_gpuvm_destroy(struct drm_gpuvm
+ bool drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm, u64 addr, u64 range);
+ bool drm_gpuvm_interval_empty(struct drm_gpuvm *gpuvm, u64 addr, u64 range);
+ 
++struct drm_gem_object *
++drm_gpuvm_resv_object_alloc(struct drm_device *drm);
++
++/**
++ * drm_gpuvm_resv() - returns the &drm_gpuvm's &dma_resv
++ * @gpuvm__: the &drm_gpuvm
++ *
++ * Returns: a pointer to the &drm_gpuvm's shared &dma_resv
++ */
++#define drm_gpuvm_resv(gpuvm__) ((gpuvm__)->r_obj->resv)
++
++/**
++ * drm_gpuvm_resv_obj() - returns the &drm_gem_object holding the &drm_gpuvm's
++ * &dma_resv
++ * @gpuvm__: the &drm_gpuvm
++ *
++ * Returns: a pointer to the &drm_gem_object holding the &drm_gpuvm's shared
++ * &dma_resv
++ */
++#define drm_gpuvm_resv_obj(gpuvm__) ((gpuvm__)->r_obj)
++
++#define drm_gpuvm_resv_held(gpuvm__) \
++	dma_resv_held(drm_gpuvm_resv(gpuvm__))
++
++#define drm_gpuvm_resv_assert_held(gpuvm__) \
++	dma_resv_assert_held(drm_gpuvm_resv(gpuvm__))
++
+ static inline struct drm_gpuva *
+ __drm_gpuva_next(struct drm_gpuva *va)
+ {
diff --git a/patches-6.6/034-09-v6.8-drm-nouveau-make-use-of-the-GPUVM-s-shared-dma-resv.patch b/patches-6.6/034-09-v6.8-drm-nouveau-make-use-of-the-GPUVM-s-shared-dma-resv.patch
new file mode 100644
index 0000000..90d3fec
--- /dev/null
+++ b/patches-6.6/034-09-v6.8-drm-nouveau-make-use-of-the-GPUVM-s-shared-dma-resv.patch
@@ -0,0 +1,140 @@
+From 6118411428a393fb0868bad9025d71875418058b Mon Sep 17 00:00:00 2001
+From: Danilo Krummrich <dakr@redhat.com>
+Date: Wed, 8 Nov 2023 01:12:36 +0100
+Subject: [PATCH] drm/nouveau: make use of the GPUVM's shared dma-resv
+
+DRM GEM objects private to a single GPUVM can use a shared dma-resv.
+Make use of the shared dma-resv of GPUVM rather than a driver specific
+one.
+
+The shared dma-resv originates from a "root" GEM object serving as
+container for the dma-resv to make it compatible with drm_exec.
+
+In order to make sure the object proving the shared dma-resv can't be
+freed up before the objects making use of it, let every such GEM object
+take a reference on it.
+
+Reviewed-by: Dave Airlie <airlied@redhat.com>
+Signed-off-by: Danilo Krummrich <dakr@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-7-dakr@redhat.com
+---
+ drivers/gpu/drm/nouveau/nouveau_bo.c   | 11 +++++++++--
+ drivers/gpu/drm/nouveau/nouveau_bo.h   |  5 +++++
+ drivers/gpu/drm/nouveau/nouveau_gem.c  | 10 ++++++++--
+ drivers/gpu/drm/nouveau/nouveau_uvmm.c |  7 ++-----
+ drivers/gpu/drm/nouveau/nouveau_uvmm.h |  1 -
+ 5 files changed, 24 insertions(+), 10 deletions(-)
+
+--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
++++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
+@@ -148,10 +148,17 @@ nouveau_bo_del_ttm(struct ttm_buffer_obj
+ 	 * If nouveau_bo_new() allocated this buffer, the GEM object was never
+ 	 * initialized, so don't attempt to release it.
+ 	 */
+-	if (bo->base.dev)
++	if (bo->base.dev) {
++		/* Gem objects not being shared with other VMs get their
++		 * dma_resv from a root GEM object.
++		 */
++		if (nvbo->no_share)
++			drm_gem_object_put(nvbo->r_obj);
++
+ 		drm_gem_object_release(&bo->base);
+-	else
++	} else {
+ 		dma_resv_fini(&bo->base._resv);
++	}
+ 
+ 	kfree(nvbo);
+ }
+--- a/drivers/gpu/drm/nouveau/nouveau_bo.h
++++ b/drivers/gpu/drm/nouveau/nouveau_bo.h
+@@ -26,6 +26,11 @@ struct nouveau_bo {
+ 	struct list_head entry;
+ 	int pbbo_index;
+ 	bool validate_mapped;
++
++	/* Root GEM object we derive the dma_resv of in case this BO is not
++	 * shared between VMs.
++	 */
++	struct drm_gem_object *r_obj;
+ 	bool no_share;
+ 
+ 	/* GPU address space is independent of CPU word size */
+--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
++++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
+@@ -111,7 +111,8 @@ nouveau_gem_object_open(struct drm_gem_o
+ 	if (vmm->vmm.object.oclass < NVIF_CLASS_VMM_NV50)
+ 		return 0;
+ 
+-	if (nvbo->no_share && uvmm && &uvmm->resv != nvbo->bo.base.resv)
++	if (nvbo->no_share && uvmm &&
++	    drm_gpuvm_resv(&uvmm->base) != nvbo->bo.base.resv)
+ 		return -EPERM;
+ 
+ 	ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL);
+@@ -245,7 +246,7 @@ nouveau_gem_new(struct nouveau_cli *cli,
+ 		if (unlikely(!uvmm))
+ 			return -EINVAL;
+ 
+-		resv = &uvmm->resv;
++		resv = drm_gpuvm_resv(&uvmm->base);
+ 	}
+ 
+ 	if (!(domain & (NOUVEAU_GEM_DOMAIN_VRAM | NOUVEAU_GEM_DOMAIN_GART)))
+@@ -288,6 +289,11 @@ nouveau_gem_new(struct nouveau_cli *cli,
+ 	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA)
+ 		nvbo->valid_domains &= domain;
+ 
++	if (nvbo->no_share) {
++		nvbo->r_obj = drm_gpuvm_resv_obj(&uvmm->base);
++		drm_gem_object_get(nvbo->r_obj);
++	}
++
+ 	*pnvbo = nvbo;
+ 	return 0;
+ }
+--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+@@ -1802,7 +1802,6 @@ nouveau_uvmm_init(struct nouveau_uvmm *u
+ 	int ret;
+ 
+ 	mutex_init(&uvmm->mutex);
+-	dma_resv_init(&uvmm->resv);
+ 	mt_init_flags(&uvmm->region_mt, MT_FLAGS_LOCK_EXTERN);
+ 	mt_set_external_lock(&uvmm->region_mt, &uvmm->mutex);
+ 
+@@ -1842,14 +1841,14 @@ nouveau_uvmm_init(struct nouveau_uvmm *u
+ 			    kernel_managed_addr, kernel_managed_size,
+ 			    NULL, 0, &cli->uvmm.vmm.vmm);
+ 	if (ret)
+-		goto out_free_gpuva_mgr;
++		goto out_gpuvm_fini;
+ 
+ 	cli->uvmm.vmm.cli = cli;
+ 	mutex_unlock(&cli->mutex);
+ 
+ 	return 0;
+ 
+-out_free_gpuva_mgr:
++out_gpuvm_fini:
+ 	drm_gpuvm_destroy(&uvmm->base);
+ out_unlock:
+ 	mutex_unlock(&cli->mutex);
+@@ -1907,6 +1906,4 @@ nouveau_uvmm_fini(struct nouveau_uvmm *u
+ 	nouveau_vmm_fini(&uvmm->vmm);
+ 	drm_gpuvm_destroy(&uvmm->base);
+ 	mutex_unlock(&cli->mutex);
+-
+-	dma_resv_fini(&uvmm->resv);
+ }
+--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.h
++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.h
+@@ -12,7 +12,6 @@ struct nouveau_uvmm {
+ 	struct nouveau_vmm vmm;
+ 	struct maple_tree region_mt;
+ 	struct mutex mutex;
+-	struct dma_resv resv;
+ 
+ 	bool disabled;
+ };
diff --git a/patches-6.6/034-10-v6.8-drm-gpuvm-add-drm_gpuvm_flags-to-drm_gpuvm.patch b/patches-6.6/034-10-v6.8-drm-gpuvm-add-drm_gpuvm_flags-to-drm_gpuvm.patch
new file mode 100644
index 0000000..fd95b76
--- /dev/null
+++ b/patches-6.6/034-10-v6.8-drm-gpuvm-add-drm_gpuvm_flags-to-drm_gpuvm.patch
@@ -0,0 +1,98 @@
+From 809ef191ee600e8bcbe2f8a769e00d2d54c16094 Mon Sep 17 00:00:00 2001
+From: Danilo Krummrich <dakr@redhat.com>
+Date: Wed, 8 Nov 2023 01:12:37 +0100
+Subject: [PATCH] drm/gpuvm: add drm_gpuvm_flags to drm_gpuvm
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Introduce flags for struct drm_gpuvm, this required by subsequent
+commits.
+
+Acked-by: Christian König <christian.koenig@amd.com>
+Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
+Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
+Signed-off-by: Danilo Krummrich <dakr@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-8-dakr@redhat.com
+---
+ drivers/gpu/drm/drm_gpuvm.c            |  3 +++
+ drivers/gpu/drm/nouveau/nouveau_uvmm.c |  2 +-
+ include/drm/drm_gpuvm.h                | 16 ++++++++++++++++
+ 3 files changed, 20 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/drm_gpuvm.c
++++ b/drivers/gpu/drm/drm_gpuvm.c
+@@ -714,6 +714,7 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_resv_object_
+  * drm_gpuvm_init() - initialize a &drm_gpuvm
+  * @gpuvm: pointer to the &drm_gpuvm to initialize
+  * @name: the name of the GPU VA space
++ * @flags: the &drm_gpuvm_flags for this GPUVM
+  * @drm: the &drm_device this VM resides in
+  * @r_obj: the resv &drm_gem_object providing the GPUVM's common &dma_resv
+  * @start_offset: the start offset of the GPU VA space
+@@ -729,6 +730,7 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_resv_object_
+  */
+ void
+ drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
++	       enum drm_gpuvm_flags flags,
+ 	       struct drm_device *drm,
+ 	       struct drm_gem_object *r_obj,
+ 	       u64 start_offset, u64 range,
+@@ -739,6 +741,7 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm,
+ 	INIT_LIST_HEAD(&gpuvm->rb.list);
+ 
+ 	gpuvm->name = name ? name : "unknown";
++	gpuvm->flags = flags;
+ 	gpuvm->ops = ops;
+ 	gpuvm->drm = drm;
+ 	gpuvm->r_obj = r_obj;
+--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+@@ -1828,7 +1828,7 @@ nouveau_uvmm_init(struct nouveau_uvmm *u
+ 		goto out_unlock;
+ 	}
+ 
+-	drm_gpuvm_init(&uvmm->base, cli->name, drm, r_obj,
++	drm_gpuvm_init(&uvmm->base, cli->name, 0, drm, r_obj,
+ 		       NOUVEAU_VA_SPACE_START,
+ 		       NOUVEAU_VA_SPACE_END,
+ 		       kernel_managed_addr, kernel_managed_size,
+--- a/include/drm/drm_gpuvm.h
++++ b/include/drm/drm_gpuvm.h
+@@ -185,6 +185,16 @@ static inline bool drm_gpuva_invalidated
+ }
+ 
+ /**
++ * enum drm_gpuvm_flags - flags for struct drm_gpuvm
++ */
++enum drm_gpuvm_flags {
++	/**
++	 * @DRM_GPUVM_USERBITS: user defined bits
++	 */
++	DRM_GPUVM_USERBITS = BIT(0),
++};
++
++/**
+  * struct drm_gpuvm - DRM GPU VA Manager
+  *
+  * The DRM GPU VA Manager keeps track of a GPU's virtual address space by using
+@@ -203,6 +213,11 @@ struct drm_gpuvm {
+ 	const char *name;
+ 
+ 	/**
++	 * @flags: the &drm_gpuvm_flags of this GPUVM
++	 */
++	enum drm_gpuvm_flags flags;
++
++	/**
+ 	 * @drm: the &drm_device this VM lives in
+ 	 */
+ 	struct drm_device *drm;
+@@ -252,6 +267,7 @@ struct drm_gpuvm {
+ };
+ 
+ void drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
++		    enum drm_gpuvm_flags flags,
+ 		    struct drm_device *drm,
+ 		    struct drm_gem_object *r_obj,
+ 		    u64 start_offset, u64 range,
diff --git a/patches-6.6/034-11-v6.8-drm-nouveau-separately-allocate-struct-nouveau_uvmm.patch b/patches-6.6/034-11-v6.8-drm-nouveau-separately-allocate-struct-nouveau_uvmm.patch
new file mode 100644
index 0000000..7876d5d
--- /dev/null
+++ b/patches-6.6/034-11-v6.8-drm-nouveau-separately-allocate-struct-nouveau_uvmm.patch
@@ -0,0 +1,219 @@
+From 266f7618e761c8a6aa89dbfe43cda1b69cdbbf14 Mon Sep 17 00:00:00 2001
+From: Danilo Krummrich <dakr@redhat.com>
+Date: Wed, 8 Nov 2023 01:12:38 +0100
+Subject: [PATCH] drm/nouveau: separately allocate struct nouveau_uvmm
+
+Allocate struct nouveau_uvmm separately in preparation for subsequent
+commits introducing reference counting for struct drm_gpuvm.
+
+While at it, get rid of nouveau_uvmm_init() as indirection of
+nouveau_uvmm_ioctl_vm_init() and perform some minor cleanups.
+
+Reviewed-by: Dave Airlie <airlied@redhat.com>
+Signed-off-by: Danilo Krummrich <dakr@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-9-dakr@redhat.com
+---
+ drivers/gpu/drm/nouveau/nouveau_drm.c  |  5 +-
+ drivers/gpu/drm/nouveau/nouveau_drv.h  | 10 ++--
+ drivers/gpu/drm/nouveau/nouveau_uvmm.c | 63 +++++++++++++-------------
+ drivers/gpu/drm/nouveau/nouveau_uvmm.h |  4 --
+ 4 files changed, 40 insertions(+), 42 deletions(-)
+
+--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
++++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
+@@ -190,6 +190,8 @@ nouveau_cli_work_queue(struct nouveau_cl
+ static void
+ nouveau_cli_fini(struct nouveau_cli *cli)
+ {
++	struct nouveau_uvmm *uvmm = nouveau_cli_uvmm_locked(cli);
++
+ 	/* All our channels are dead now, which means all the fences they
+ 	 * own are signalled, and all callback functions have been called.
+ 	 *
+@@ -199,7 +201,8 @@ nouveau_cli_fini(struct nouveau_cli *cli
+ 	WARN_ON(!list_empty(&cli->worker));
+ 
+ 	usif_client_fini(cli);
+-	nouveau_uvmm_fini(&cli->uvmm);
++	if (uvmm)
++		nouveau_uvmm_fini(uvmm);
+ 	nouveau_sched_entity_fini(&cli->sched_entity);
+ 	nouveau_vmm_fini(&cli->svm);
+ 	nouveau_vmm_fini(&cli->vmm);
+--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
++++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
+@@ -93,7 +93,10 @@ struct nouveau_cli {
+ 	struct nvif_mmu mmu;
+ 	struct nouveau_vmm vmm;
+ 	struct nouveau_vmm svm;
+-	struct nouveau_uvmm uvmm;
++	struct {
++		struct nouveau_uvmm *ptr;
++		bool disabled;
++	} uvmm;
+ 
+ 	struct nouveau_sched_entity sched_entity;
+ 
+@@ -121,10 +124,7 @@ struct nouveau_cli_work {
+ static inline struct nouveau_uvmm *
+ nouveau_cli_uvmm(struct nouveau_cli *cli)
+ {
+-	if (!cli || !cli->uvmm.vmm.cli)
+-		return NULL;
+-
+-	return &cli->uvmm;
++	return cli ? cli->uvmm.ptr : NULL;
+ }
+ 
+ static inline struct nouveau_uvmm *
+--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+@@ -1636,18 +1636,6 @@ err_free:
+ 	return ret;
+ }
+ 
+-int
+-nouveau_uvmm_ioctl_vm_init(struct drm_device *dev,
+-			   void *data,
+-			   struct drm_file *file_priv)
+-{
+-	struct nouveau_cli *cli = nouveau_cli(file_priv);
+-	struct drm_nouveau_vm_init *init = data;
+-
+-	return nouveau_uvmm_init(&cli->uvmm, cli, init->kernel_managed_addr,
+-				 init->kernel_managed_size);
+-}
+-
+ static int
+ nouveau_uvmm_vm_bind(struct nouveau_uvmm_bind_job_args *args)
+ {
+@@ -1793,17 +1781,25 @@ nouveau_uvmm_bo_unmap_all(struct nouveau
+ }
+ 
+ int
+-nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli,
+-		  u64 kernel_managed_addr, u64 kernel_managed_size)
++nouveau_uvmm_ioctl_vm_init(struct drm_device *dev,
++			   void *data,
++			   struct drm_file *file_priv)
+ {
++	struct nouveau_uvmm *uvmm;
++	struct nouveau_cli *cli = nouveau_cli(file_priv);
+ 	struct drm_device *drm = cli->drm->dev;
+ 	struct drm_gem_object *r_obj;
+-	u64 kernel_managed_end = kernel_managed_addr + kernel_managed_size;
++	struct drm_nouveau_vm_init *init = data;
++	u64 kernel_managed_end;
+ 	int ret;
+ 
+-	mutex_init(&uvmm->mutex);
+-	mt_init_flags(&uvmm->region_mt, MT_FLAGS_LOCK_EXTERN);
+-	mt_set_external_lock(&uvmm->region_mt, &uvmm->mutex);
++	if (check_add_overflow(init->kernel_managed_addr,
++			       init->kernel_managed_size,
++			       &kernel_managed_end))
++		return -EINVAL;
++
++	if (kernel_managed_end > NOUVEAU_VA_SPACE_END)
++		return -EINVAL;
+ 
+ 	mutex_lock(&cli->mutex);
+ 
+@@ -1812,44 +1808,49 @@ nouveau_uvmm_init(struct nouveau_uvmm *u
+ 		goto out_unlock;
+ 	}
+ 
+-	if (kernel_managed_end <= kernel_managed_addr) {
+-		ret = -EINVAL;
+-		goto out_unlock;
+-	}
+-
+-	if (kernel_managed_end > NOUVEAU_VA_SPACE_END) {
+-		ret = -EINVAL;
++	uvmm = kzalloc(sizeof(*uvmm), GFP_KERNEL);
++	if (!uvmm) {
++		ret = -ENOMEM;
+ 		goto out_unlock;
+ 	}
+ 
+ 	r_obj = drm_gpuvm_resv_object_alloc(drm);
+ 	if (!r_obj) {
++		kfree(uvmm);
+ 		ret = -ENOMEM;
+ 		goto out_unlock;
+ 	}
+ 
++	mutex_init(&uvmm->mutex);
++	mt_init_flags(&uvmm->region_mt, MT_FLAGS_LOCK_EXTERN);
++	mt_set_external_lock(&uvmm->region_mt, &uvmm->mutex);
++
+ 	drm_gpuvm_init(&uvmm->base, cli->name, 0, drm, r_obj,
+ 		       NOUVEAU_VA_SPACE_START,
+ 		       NOUVEAU_VA_SPACE_END,
+-		       kernel_managed_addr, kernel_managed_size,
++		       init->kernel_managed_addr,
++		       init->kernel_managed_size,
+ 		       NULL);
+ 	/* GPUVM takes care from here on. */
+ 	drm_gem_object_put(r_obj);
+ 
+ 	ret = nvif_vmm_ctor(&cli->mmu, "uvmm",
+ 			    cli->vmm.vmm.object.oclass, RAW,
+-			    kernel_managed_addr, kernel_managed_size,
+-			    NULL, 0, &cli->uvmm.vmm.vmm);
++			    init->kernel_managed_addr,
++			    init->kernel_managed_size,
++			    NULL, 0, &uvmm->vmm.vmm);
+ 	if (ret)
+ 		goto out_gpuvm_fini;
+ 
+-	cli->uvmm.vmm.cli = cli;
++	uvmm->vmm.cli = cli;
++	cli->uvmm.ptr = uvmm;
+ 	mutex_unlock(&cli->mutex);
+ 
+ 	return 0;
+ 
+ out_gpuvm_fini:
+ 	drm_gpuvm_destroy(&uvmm->base);
++	kfree(uvmm);
+ out_unlock:
+ 	mutex_unlock(&cli->mutex);
+ 	return ret;
+@@ -1864,9 +1865,6 @@ nouveau_uvmm_fini(struct nouveau_uvmm *u
+ 	struct nouveau_sched_entity *entity = &cli->sched_entity;
+ 	struct drm_gpuva *va, *next;
+ 
+-	if (!cli)
+-		return;
+-
+ 	rmb(); /* for list_empty to work without lock */
+ 	wait_event(entity->job.wq, list_empty(&entity->job.list.head));
+ 
+@@ -1905,5 +1903,6 @@ nouveau_uvmm_fini(struct nouveau_uvmm *u
+ 	mutex_lock(&cli->mutex);
+ 	nouveau_vmm_fini(&uvmm->vmm);
+ 	drm_gpuvm_destroy(&uvmm->base);
++	kfree(uvmm);
+ 	mutex_unlock(&cli->mutex);
+ }
+--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.h
++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.h
+@@ -12,8 +12,6 @@ struct nouveau_uvmm {
+ 	struct nouveau_vmm vmm;
+ 	struct maple_tree region_mt;
+ 	struct mutex mutex;
+-
+-	bool disabled;
+ };
+ 
+ struct nouveau_uvma_region {
+@@ -78,8 +76,6 @@ struct nouveau_uvmm_bind_job_args {
+ 
+ #define to_uvmm_bind_job(job) container_of((job), struct nouveau_uvmm_bind_job, base)
+ 
+-int nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli,
+-		      u64 kernel_managed_addr, u64 kernel_managed_size);
+ void nouveau_uvmm_fini(struct nouveau_uvmm *uvmm);
+ 
+ void nouveau_uvmm_bo_map_all(struct nouveau_bo *nvbov, struct nouveau_mem *mem);
diff --git a/patches-6.6/034-12-v6.8-drm-gpuvm-reference-count-drm_gpuvm-structures.patch b/patches-6.6/034-12-v6.8-drm-gpuvm-reference-count-drm_gpuvm-structures.patch
new file mode 100644
index 0000000..5c5d1d2
--- /dev/null
+++ b/patches-6.6/034-12-v6.8-drm-gpuvm-reference-count-drm_gpuvm-structures.patch
@@ -0,0 +1,221 @@
+From 8af72338dd81d1f8667e0240bd28f5fc98b3f20d Mon Sep 17 00:00:00 2001
+From: Danilo Krummrich <dakr@redhat.com>
+Date: Wed, 8 Nov 2023 01:12:39 +0100
+Subject: [PATCH] drm/gpuvm: reference count drm_gpuvm structures
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Implement reference counting for struct drm_gpuvm.
+
+Acked-by: Christian König <christian.koenig@amd.com>
+Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
+Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
+Signed-off-by: Danilo Krummrich <dakr@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-10-dakr@redhat.com
+---
+ drivers/gpu/drm/drm_gpuvm.c            | 56 +++++++++++++++++++++-----
+ drivers/gpu/drm/nouveau/nouveau_uvmm.c | 20 ++++++---
+ include/drm/drm_gpuvm.h                | 31 +++++++++++++-
+ 3 files changed, 90 insertions(+), 17 deletions(-)
+
+--- a/drivers/gpu/drm/drm_gpuvm.c
++++ b/drivers/gpu/drm/drm_gpuvm.c
+@@ -740,6 +740,8 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm,
+ 	gpuvm->rb.tree = RB_ROOT_CACHED;
+ 	INIT_LIST_HEAD(&gpuvm->rb.list);
+ 
++	kref_init(&gpuvm->kref);
++
+ 	gpuvm->name = name ? name : "unknown";
+ 	gpuvm->flags = flags;
+ 	gpuvm->ops = ops;
+@@ -764,15 +766,8 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm,
+ }
+ EXPORT_SYMBOL_GPL(drm_gpuvm_init);
+ 
+-/**
+- * drm_gpuvm_destroy() - cleanup a &drm_gpuvm
+- * @gpuvm: pointer to the &drm_gpuvm to clean up
+- *
+- * Note that it is a bug to call this function on a manager that still
+- * holds GPU VA mappings.
+- */
+-void
+-drm_gpuvm_destroy(struct drm_gpuvm *gpuvm)
++static void
++drm_gpuvm_fini(struct drm_gpuvm *gpuvm)
+ {
+ 	gpuvm->name = NULL;
+ 
+@@ -784,7 +779,35 @@ drm_gpuvm_destroy(struct drm_gpuvm *gpuv
+ 
+ 	drm_gem_object_put(gpuvm->r_obj);
+ }
+-EXPORT_SYMBOL_GPL(drm_gpuvm_destroy);
++
++static void
++drm_gpuvm_free(struct kref *kref)
++{
++	struct drm_gpuvm *gpuvm = container_of(kref, struct drm_gpuvm, kref);
++
++	drm_gpuvm_fini(gpuvm);
++
++	if (drm_WARN_ON(gpuvm->drm, !gpuvm->ops->vm_free))
++		return;
++
++	gpuvm->ops->vm_free(gpuvm);
++}
++
++/**
++ * drm_gpuvm_put() - drop a struct drm_gpuvm reference
++ * @gpuvm: the &drm_gpuvm to release the reference of
++ *
++ * This releases a reference to @gpuvm.
++ *
++ * This function may be called from atomic context.
++ */
++void
++drm_gpuvm_put(struct drm_gpuvm *gpuvm)
++{
++	if (gpuvm)
++		kref_put(&gpuvm->kref, drm_gpuvm_free);
++}
++EXPORT_SYMBOL_GPL(drm_gpuvm_put);
+ 
+ static int
+ __drm_gpuva_insert(struct drm_gpuvm *gpuvm,
+@@ -833,11 +856,21 @@ drm_gpuva_insert(struct drm_gpuvm *gpuvm
+ {
+ 	u64 addr = va->va.addr;
+ 	u64 range = va->va.range;
++	int ret;
+ 
+ 	if (unlikely(!drm_gpuvm_range_valid(gpuvm, addr, range)))
+ 		return -EINVAL;
+ 
+-	return __drm_gpuva_insert(gpuvm, va);
++	ret = __drm_gpuva_insert(gpuvm, va);
++	if (likely(!ret))
++		/* Take a reference of the GPUVM for the successfully inserted
++		 * drm_gpuva. We can't take the reference in
++		 * __drm_gpuva_insert() itself, since we don't want to increse
++		 * the reference count for the GPUVM's kernel_alloc_node.
++		 */
++		drm_gpuvm_get(gpuvm);
++
++	return ret;
+ }
+ EXPORT_SYMBOL_GPL(drm_gpuva_insert);
+ 
+@@ -870,6 +903,7 @@ drm_gpuva_remove(struct drm_gpuva *va)
+ 	}
+ 
+ 	__drm_gpuva_remove(va);
++	drm_gpuvm_put(va->vm);
+ }
+ EXPORT_SYMBOL_GPL(drm_gpuva_remove);
+ 
+--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+@@ -1780,6 +1780,18 @@ nouveau_uvmm_bo_unmap_all(struct nouveau
+ 	}
+ }
+ 
++static void
++nouveau_uvmm_free(struct drm_gpuvm *gpuvm)
++{
++	struct nouveau_uvmm *uvmm = uvmm_from_gpuvm(gpuvm);
++
++	kfree(uvmm);
++}
++
++static const struct drm_gpuvm_ops gpuvm_ops = {
++	.vm_free = nouveau_uvmm_free,
++};
++
+ int
+ nouveau_uvmm_ioctl_vm_init(struct drm_device *dev,
+ 			   void *data,
+@@ -1830,7 +1842,7 @@ nouveau_uvmm_ioctl_vm_init(struct drm_de
+ 		       NOUVEAU_VA_SPACE_END,
+ 		       init->kernel_managed_addr,
+ 		       init->kernel_managed_size,
+-		       NULL);
++		       &gpuvm_ops);
+ 	/* GPUVM takes care from here on. */
+ 	drm_gem_object_put(r_obj);
+ 
+@@ -1849,8 +1861,7 @@ nouveau_uvmm_ioctl_vm_init(struct drm_de
+ 	return 0;
+ 
+ out_gpuvm_fini:
+-	drm_gpuvm_destroy(&uvmm->base);
+-	kfree(uvmm);
++	drm_gpuvm_put(&uvmm->base);
+ out_unlock:
+ 	mutex_unlock(&cli->mutex);
+ 	return ret;
+@@ -1902,7 +1913,6 @@ nouveau_uvmm_fini(struct nouveau_uvmm *u
+ 
+ 	mutex_lock(&cli->mutex);
+ 	nouveau_vmm_fini(&uvmm->vmm);
+-	drm_gpuvm_destroy(&uvmm->base);
+-	kfree(uvmm);
++	drm_gpuvm_put(&uvmm->base);
+ 	mutex_unlock(&cli->mutex);
+ }
+--- a/include/drm/drm_gpuvm.h
++++ b/include/drm/drm_gpuvm.h
+@@ -248,6 +248,11 @@ struct drm_gpuvm {
+ 	} rb;
+ 
+ 	/**
++	 * @kref: reference count of this object
++	 */
++	struct kref kref;
++
++	/**
+ 	 * @kernel_alloc_node:
+ 	 *
+ 	 * &drm_gpuva representing the address space cutout reserved for
+@@ -273,7 +278,23 @@ void drm_gpuvm_init(struct drm_gpuvm *gp
+ 		    u64 start_offset, u64 range,
+ 		    u64 reserve_offset, u64 reserve_range,
+ 		    const struct drm_gpuvm_ops *ops);
+-void drm_gpuvm_destroy(struct drm_gpuvm *gpuvm);
++
++/**
++ * drm_gpuvm_get() - acquire a struct drm_gpuvm reference
++ * @gpuvm: the &drm_gpuvm to acquire the reference of
++ *
++ * This function acquires an additional reference to @gpuvm. It is illegal to
++ * call this without already holding a reference. No locks required.
++ */
++static inline struct drm_gpuvm *
++drm_gpuvm_get(struct drm_gpuvm *gpuvm)
++{
++	kref_get(&gpuvm->kref);
++
++	return gpuvm;
++}
++
++void drm_gpuvm_put(struct drm_gpuvm *gpuvm);
+ 
+ bool drm_gpuvm_range_valid(struct drm_gpuvm *gpuvm, u64 addr, u64 range);
+ bool drm_gpuvm_interval_empty(struct drm_gpuvm *gpuvm, u64 addr, u64 range);
+@@ -674,6 +695,14 @@ static inline void drm_gpuva_init_from_o
+  */
+ struct drm_gpuvm_ops {
+ 	/**
++	 * @vm_free: called when the last reference of a struct drm_gpuvm is
++	 * dropped
++	 *
++	 * This callback is mandatory.
++	 */
++	void (*vm_free)(struct drm_gpuvm *gpuvm);
++
++	/**
+ 	 * @op_alloc: called when the &drm_gpuvm allocates
+ 	 * a struct drm_gpuva_op
+ 	 *
diff --git a/patches-6.6/034-13-v6.8-drm-gpuvm-add-an-abstraction-for-a-VM-BO-combination.patch b/patches-6.6/034-13-v6.8-drm-gpuvm-add-an-abstraction-for-a-VM-BO-combination.patch
new file mode 100644
index 0000000..37aa2f6
--- /dev/null
+++ b/patches-6.6/034-13-v6.8-drm-gpuvm-add-an-abstraction-for-a-VM-BO-combination.patch
@@ -0,0 +1,1036 @@
+From 94bc2249f08e141fb4aa120bfdc392c7a5e78211 Mon Sep 17 00:00:00 2001
+From: Danilo Krummrich <dakr@redhat.com>
+Date: Wed, 8 Nov 2023 01:12:40 +0100
+Subject: [PATCH] drm/gpuvm: add an abstraction for a VM / BO combination
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Add an abstraction layer between the drm_gpuva mappings of a particular
+drm_gem_object and this GEM object itself. The abstraction represents a
+combination of a drm_gem_object and drm_gpuvm. The drm_gem_object holds
+a list of drm_gpuvm_bo structures (the structure representing this
+abstraction), while each drm_gpuvm_bo contains list of mappings of this
+GEM object.
+
+This has multiple advantages:
+
+1) We can use the drm_gpuvm_bo structure to attach it to various lists
+   of the drm_gpuvm. This is useful for tracking external and evicted
+   objects per VM, which is introduced in subsequent patches.
+
+2) Finding mappings of a certain drm_gem_object mapped in a certain
+   drm_gpuvm becomes much cheaper.
+
+3) Drivers can derive and extend the structure to easily represent
+   driver specific states of a BO for a certain GPUVM.
+
+The idea of this abstraction was taken from amdgpu, hence the credit for
+this idea goes to the developers of amdgpu.
+
+Cc: Christian König <christian.koenig@amd.com>
+Acked-by: Christian König <christian.koenig@amd.com>
+Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
+Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
+Signed-off-by: Danilo Krummrich <dakr@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-11-dakr@redhat.com
+---
+ drivers/gpu/drm/drm_gpuvm.c            | 340 +++++++++++++++++++++----
+ drivers/gpu/drm/nouveau/nouveau_uvmm.c |  63 +++--
+ include/drm/drm_gem.h                  |  32 +--
+ include/drm/drm_gpuvm.h                | 185 +++++++++++++-
+ 4 files changed, 534 insertions(+), 86 deletions(-)
+
+--- a/drivers/gpu/drm/drm_gpuvm.c
++++ b/drivers/gpu/drm/drm_gpuvm.c
+@@ -70,6 +70,18 @@
+  * &drm_gem_object, such as the &drm_gem_object containing the root page table,
+  * but it can also be a 'dummy' object, which can be allocated with
+  * drm_gpuvm_resv_object_alloc().
++ *
++ * In order to connect a struct drm_gpuva its backing &drm_gem_object each
++ * &drm_gem_object maintains a list of &drm_gpuvm_bo structures, and each
++ * &drm_gpuvm_bo contains a list of &drm_gpuva structures.
++ *
++ * A &drm_gpuvm_bo is an abstraction that represents a combination of a
++ * &drm_gpuvm and a &drm_gem_object. Every such combination should be unique.
++ * This is ensured by the API through drm_gpuvm_bo_obtain() and
++ * drm_gpuvm_bo_obtain_prealloc() which first look into the corresponding
++ * &drm_gem_object list of &drm_gpuvm_bos for an existing instance of this
++ * particular combination. If not existent a new instance is created and linked
++ * to the &drm_gem_object.
+  */
+ 
+ /**
+@@ -395,21 +407,28 @@
+ /**
+  * DOC: Locking
+  *
+- * Generally, the GPU VA manager does not take care of locking itself, it is
+- * the drivers responsibility to take care about locking. Drivers might want to
+- * protect the following operations: inserting, removing and iterating
+- * &drm_gpuva objects as well as generating all kinds of operations, such as
+- * split / merge or prefetch.
+- *
+- * The GPU VA manager also does not take care of the locking of the backing
+- * &drm_gem_object buffers GPU VA lists by itself; drivers are responsible to
+- * enforce mutual exclusion using either the GEMs dma_resv lock or alternatively
+- * a driver specific external lock. For the latter see also
+- * drm_gem_gpuva_set_lock().
+- *
+- * However, the GPU VA manager contains lockdep checks to ensure callers of its
+- * API hold the corresponding lock whenever the &drm_gem_objects GPU VA list is
+- * accessed by functions such as drm_gpuva_link() or drm_gpuva_unlink().
++ * In terms of managing &drm_gpuva entries DRM GPUVM does not take care of
++ * locking itself, it is the drivers responsibility to take care about locking.
++ * Drivers might want to protect the following operations: inserting, removing
++ * and iterating &drm_gpuva objects as well as generating all kinds of
++ * operations, such as split / merge or prefetch.
++ *
++ * DRM GPUVM also does not take care of the locking of the backing
++ * &drm_gem_object buffers GPU VA lists and &drm_gpuvm_bo abstractions by
++ * itself; drivers are responsible to enforce mutual exclusion using either the
++ * GEMs dma_resv lock or alternatively a driver specific external lock. For the
++ * latter see also drm_gem_gpuva_set_lock().
++ *
++ * However, DRM GPUVM contains lockdep checks to ensure callers of its API hold
++ * the corresponding lock whenever the &drm_gem_objects GPU VA list is accessed
++ * by functions such as drm_gpuva_link() or drm_gpuva_unlink(), but also
++ * drm_gpuvm_bo_obtain() and drm_gpuvm_bo_put().
++ *
++ * The latter is required since on creation and destruction of a &drm_gpuvm_bo
++ * the &drm_gpuvm_bo is attached / removed from the &drm_gem_objects gpuva list.
++ * Subsequent calls to drm_gpuvm_bo_obtain() for the same &drm_gpuvm and
++ * &drm_gem_object must be able to observe previous creations and destructions
++ * of &drm_gpuvm_bos in order to keep instances unique.
+  */
+ 
+ /**
+@@ -439,6 +458,7 @@
+  *	{
+  *		struct drm_gpuva_ops *ops;
+  *		struct drm_gpuva_op *op
++ *		struct drm_gpuvm_bo *vm_bo;
+  *
+  *		driver_lock_va_space();
+  *		ops = drm_gpuvm_sm_map_ops_create(gpuvm, addr, range,
+@@ -446,6 +466,10 @@
+  *		if (IS_ERR(ops))
+  *			return PTR_ERR(ops);
+  *
++ *		vm_bo = drm_gpuvm_bo_obtain(gpuvm, obj);
++ *		if (IS_ERR(vm_bo))
++ *			return PTR_ERR(vm_bo);
++ *
+  *		drm_gpuva_for_each_op(op, ops) {
+  *			struct drm_gpuva *va;
+  *
+@@ -458,7 +482,7 @@
+  *
+  *				driver_vm_map();
+  *				drm_gpuva_map(gpuvm, va, &op->map);
+- *				drm_gpuva_link(va);
++ *				drm_gpuva_link(va, vm_bo);
+  *
+  *				break;
+  *			case DRM_GPUVA_OP_REMAP: {
+@@ -485,11 +509,11 @@
+  *				driver_vm_remap();
+  *				drm_gpuva_remap(prev, next, &op->remap);
+  *
+- *				drm_gpuva_unlink(va);
+  *				if (prev)
+- *					drm_gpuva_link(prev);
++ *					drm_gpuva_link(prev, va->vm_bo);
+  *				if (next)
+- *					drm_gpuva_link(next);
++ *					drm_gpuva_link(next, va->vm_bo);
++ *				drm_gpuva_unlink(va);
+  *
+  *				break;
+  *			}
+@@ -505,6 +529,7 @@
+  *				break;
+  *			}
+  *		}
++ *		drm_gpuvm_bo_put(vm_bo);
+  *		driver_unlock_va_space();
+  *
+  *		return 0;
+@@ -514,6 +539,7 @@
+  *
+  *	struct driver_context {
+  *		struct drm_gpuvm *gpuvm;
++ *		struct drm_gpuvm_bo *vm_bo;
+  *		struct drm_gpuva *new_va;
+  *		struct drm_gpuva *prev_va;
+  *		struct drm_gpuva *next_va;
+@@ -534,6 +560,7 @@
+  *				  struct drm_gem_object *obj, u64 offset)
+  *	{
+  *		struct driver_context ctx;
++ *		struct drm_gpuvm_bo *vm_bo;
+  *		struct drm_gpuva_ops *ops;
+  *		struct drm_gpuva_op *op;
+  *		int ret = 0;
+@@ -543,16 +570,23 @@
+  *		ctx.new_va = kzalloc(sizeof(*ctx.new_va), GFP_KERNEL);
+  *		ctx.prev_va = kzalloc(sizeof(*ctx.prev_va), GFP_KERNEL);
+  *		ctx.next_va = kzalloc(sizeof(*ctx.next_va), GFP_KERNEL);
+- *		if (!ctx.new_va || !ctx.prev_va || !ctx.next_va) {
++ *		ctx.vm_bo = drm_gpuvm_bo_create(gpuvm, obj);
++ *		if (!ctx.new_va || !ctx.prev_va || !ctx.next_va || !vm_bo) {
+  *			ret = -ENOMEM;
+  *			goto out;
+  *		}
+  *
++ *		// Typically protected with a driver specific GEM gpuva lock
++ *		// used in the fence signaling path for drm_gpuva_link() and
++ *		// drm_gpuva_unlink(), hence pre-allocate.
++ *		ctx.vm_bo = drm_gpuvm_bo_obtain_prealloc(ctx.vm_bo);
++ *
+  *		driver_lock_va_space();
+  *		ret = drm_gpuvm_sm_map(gpuvm, &ctx, addr, range, obj, offset);
+  *		driver_unlock_va_space();
+  *
+  *	out:
++ *		drm_gpuvm_bo_put(ctx.vm_bo);
+  *		kfree(ctx.new_va);
+  *		kfree(ctx.prev_va);
+  *		kfree(ctx.next_va);
+@@ -565,7 +599,7 @@
+  *
+  *		drm_gpuva_map(ctx->vm, ctx->new_va, &op->map);
+  *
+- *		drm_gpuva_link(ctx->new_va);
++ *		drm_gpuva_link(ctx->new_va, ctx->vm_bo);
+  *
+  *		// prevent the new GPUVA from being freed in
+  *		// driver_mapping_create()
+@@ -577,22 +611,23 @@
+  *	int driver_gpuva_remap(struct drm_gpuva_op *op, void *__ctx)
+  *	{
+  *		struct driver_context *ctx = __ctx;
++ *		struct drm_gpuva *va = op->remap.unmap->va;
+  *
+  *		drm_gpuva_remap(ctx->prev_va, ctx->next_va, &op->remap);
+  *
+- *		drm_gpuva_unlink(op->remap.unmap->va);
+- *		kfree(op->remap.unmap->va);
+- *
+  *		if (op->remap.prev) {
+- *			drm_gpuva_link(ctx->prev_va);
++ *			drm_gpuva_link(ctx->prev_va, va->vm_bo);
+  *			ctx->prev_va = NULL;
+  *		}
+  *
+  *		if (op->remap.next) {
+- *			drm_gpuva_link(ctx->next_va);
++ *			drm_gpuva_link(ctx->next_va, va->vm_bo);
+  *			ctx->next_va = NULL;
+  *		}
+  *
++ *		drm_gpuva_unlink(va);
++ *		kfree(va);
++ *
+  *		return 0;
+  *	}
+  *
+@@ -809,6 +844,199 @@ drm_gpuvm_put(struct drm_gpuvm *gpuvm)
+ }
+ EXPORT_SYMBOL_GPL(drm_gpuvm_put);
+ 
++/**
++ * drm_gpuvm_bo_create() - create a new instance of struct drm_gpuvm_bo
++ * @gpuvm: The &drm_gpuvm the @obj is mapped in.
++ * @obj: The &drm_gem_object being mapped in the @gpuvm.
++ *
++ * If provided by the driver, this function uses the &drm_gpuvm_ops
++ * vm_bo_alloc() callback to allocate.
++ *
++ * Returns: a pointer to the &drm_gpuvm_bo on success, NULL on failure
++ */
++struct drm_gpuvm_bo *
++drm_gpuvm_bo_create(struct drm_gpuvm *gpuvm,
++		    struct drm_gem_object *obj)
++{
++	const struct drm_gpuvm_ops *ops = gpuvm->ops;
++	struct drm_gpuvm_bo *vm_bo;
++
++	if (ops && ops->vm_bo_alloc)
++		vm_bo = ops->vm_bo_alloc();
++	else
++		vm_bo = kzalloc(sizeof(*vm_bo), GFP_KERNEL);
++
++	if (unlikely(!vm_bo))
++		return NULL;
++
++	vm_bo->vm = drm_gpuvm_get(gpuvm);
++	vm_bo->obj = obj;
++	drm_gem_object_get(obj);
++
++	kref_init(&vm_bo->kref);
++	INIT_LIST_HEAD(&vm_bo->list.gpuva);
++	INIT_LIST_HEAD(&vm_bo->list.entry.gem);
++
++	return vm_bo;
++}
++EXPORT_SYMBOL_GPL(drm_gpuvm_bo_create);
++
++static void
++drm_gpuvm_bo_destroy(struct kref *kref)
++{
++	struct drm_gpuvm_bo *vm_bo = container_of(kref, struct drm_gpuvm_bo,
++						  kref);
++	struct drm_gpuvm *gpuvm = vm_bo->vm;
++	const struct drm_gpuvm_ops *ops = gpuvm->ops;
++	struct drm_gem_object *obj = vm_bo->obj;
++	bool lock = !drm_gpuvm_resv_protected(gpuvm);
++
++	if (!lock)
++		drm_gpuvm_resv_assert_held(gpuvm);
++
++	drm_gem_gpuva_assert_lock_held(obj);
++	list_del(&vm_bo->list.entry.gem);
++
++	if (ops && ops->vm_bo_free)
++		ops->vm_bo_free(vm_bo);
++	else
++		kfree(vm_bo);
++
++	drm_gpuvm_put(gpuvm);
++	drm_gem_object_put(obj);
++}
++
++/**
++ * drm_gpuvm_bo_put() - drop a struct drm_gpuvm_bo reference
++ * @vm_bo: the &drm_gpuvm_bo to release the reference of
++ *
++ * This releases a reference to @vm_bo.
++ *
++ * If the reference count drops to zero, the &gpuvm_bo is destroyed, which
++ * includes removing it from the GEMs gpuva list. Hence, if a call to this
++ * function can potentially let the reference count drop to zero the caller must
++ * hold the dma-resv or driver specific GEM gpuva lock.
++ *
++ * This function may only be called from non-atomic context.
++ */
++void
++drm_gpuvm_bo_put(struct drm_gpuvm_bo *vm_bo)
++{
++	might_sleep();
++
++	if (vm_bo)
++		kref_put(&vm_bo->kref, drm_gpuvm_bo_destroy);
++}
++EXPORT_SYMBOL_GPL(drm_gpuvm_bo_put);
++
++static struct drm_gpuvm_bo *
++__drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm,
++		    struct drm_gem_object *obj)
++{
++	struct drm_gpuvm_bo *vm_bo;
++
++	drm_gem_gpuva_assert_lock_held(obj);
++	drm_gem_for_each_gpuvm_bo(vm_bo, obj)
++		if (vm_bo->vm == gpuvm)
++			return vm_bo;
++
++	return NULL;
++}
++
++/**
++ * drm_gpuvm_bo_find() - find the &drm_gpuvm_bo for the given
++ * &drm_gpuvm and &drm_gem_object
++ * @gpuvm: The &drm_gpuvm the @obj is mapped in.
++ * @obj: The &drm_gem_object being mapped in the @gpuvm.
++ *
++ * Find the &drm_gpuvm_bo representing the combination of the given
++ * &drm_gpuvm and &drm_gem_object. If found, increases the reference
++ * count of the &drm_gpuvm_bo accordingly.
++ *
++ * Returns: a pointer to the &drm_gpuvm_bo on success, NULL on failure
++ */
++struct drm_gpuvm_bo *
++drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm,
++		  struct drm_gem_object *obj)
++{
++	struct drm_gpuvm_bo *vm_bo = __drm_gpuvm_bo_find(gpuvm, obj);
++
++	return vm_bo ? drm_gpuvm_bo_get(vm_bo) : NULL;
++}
++EXPORT_SYMBOL_GPL(drm_gpuvm_bo_find);
++
++/**
++ * drm_gpuvm_bo_obtain() - obtains and instance of the &drm_gpuvm_bo for the
++ * given &drm_gpuvm and &drm_gem_object
++ * @gpuvm: The &drm_gpuvm the @obj is mapped in.
++ * @obj: The &drm_gem_object being mapped in the @gpuvm.
++ *
++ * Find the &drm_gpuvm_bo representing the combination of the given
++ * &drm_gpuvm and &drm_gem_object. If found, increases the reference
++ * count of the &drm_gpuvm_bo accordingly. If not found, allocates a new
++ * &drm_gpuvm_bo.
++ *
++ * A new &drm_gpuvm_bo is added to the GEMs gpuva list.
++ *
++ * Returns: a pointer to the &drm_gpuvm_bo on success, an ERR_PTR on failure
++ */
++struct drm_gpuvm_bo *
++drm_gpuvm_bo_obtain(struct drm_gpuvm *gpuvm,
++		    struct drm_gem_object *obj)
++{
++	struct drm_gpuvm_bo *vm_bo;
++
++	vm_bo = drm_gpuvm_bo_find(gpuvm, obj);
++	if (vm_bo)
++		return vm_bo;
++
++	vm_bo = drm_gpuvm_bo_create(gpuvm, obj);
++	if (!vm_bo)
++		return ERR_PTR(-ENOMEM);
++
++	drm_gem_gpuva_assert_lock_held(obj);
++	list_add_tail(&vm_bo->list.entry.gem, &obj->gpuva.list);
++
++	return vm_bo;
++}
++EXPORT_SYMBOL_GPL(drm_gpuvm_bo_obtain);
++
++/**
++ * drm_gpuvm_bo_obtain_prealloc() - obtains and instance of the &drm_gpuvm_bo
++ * for the given &drm_gpuvm and &drm_gem_object
++ * @__vm_bo: A pre-allocated struct drm_gpuvm_bo.
++ *
++ * Find the &drm_gpuvm_bo representing the combination of the given
++ * &drm_gpuvm and &drm_gem_object. If found, increases the reference
++ * count of the found &drm_gpuvm_bo accordingly, while the @__vm_bo reference
++ * count is decreased. If not found @__vm_bo is returned without further
++ * increase of the reference count.
++ *
++ * A new &drm_gpuvm_bo is added to the GEMs gpuva list.
++ *
++ * Returns: a pointer to the found &drm_gpuvm_bo or @__vm_bo if no existing
++ * &drm_gpuvm_bo was found
++ */
++struct drm_gpuvm_bo *
++drm_gpuvm_bo_obtain_prealloc(struct drm_gpuvm_bo *__vm_bo)
++{
++	struct drm_gpuvm *gpuvm = __vm_bo->vm;
++	struct drm_gem_object *obj = __vm_bo->obj;
++	struct drm_gpuvm_bo *vm_bo;
++
++	vm_bo = drm_gpuvm_bo_find(gpuvm, obj);
++	if (vm_bo) {
++		drm_gpuvm_bo_put(__vm_bo);
++		return vm_bo;
++	}
++
++	drm_gem_gpuva_assert_lock_held(obj);
++	list_add_tail(&__vm_bo->list.entry.gem, &obj->gpuva.list);
++
++	return __vm_bo;
++}
++EXPORT_SYMBOL_GPL(drm_gpuvm_bo_obtain_prealloc);
++
+ static int
+ __drm_gpuva_insert(struct drm_gpuvm *gpuvm,
+ 		   struct drm_gpuva *va)
+@@ -910,24 +1138,33 @@ EXPORT_SYMBOL_GPL(drm_gpuva_remove);
+ /**
+  * drm_gpuva_link() - link a &drm_gpuva
+  * @va: the &drm_gpuva to link
++ * @vm_bo: the &drm_gpuvm_bo to add the &drm_gpuva to
+  *
+- * This adds the given &va to the GPU VA list of the &drm_gem_object it is
+- * associated with.
++ * This adds the given &va to the GPU VA list of the &drm_gpuvm_bo and the
++ * &drm_gpuvm_bo to the &drm_gem_object it is associated with.
++ *
++ * For every &drm_gpuva entry added to the &drm_gpuvm_bo an additional
++ * reference of the latter is taken.
+  *
+  * This function expects the caller to protect the GEM's GPUVA list against
+- * concurrent access using the GEMs dma_resv lock.
++ * concurrent access using either the GEMs dma_resv lock or a driver specific
++ * lock set through drm_gem_gpuva_set_lock().
+  */
+ void
+-drm_gpuva_link(struct drm_gpuva *va)
++drm_gpuva_link(struct drm_gpuva *va, struct drm_gpuvm_bo *vm_bo)
+ {
+ 	struct drm_gem_object *obj = va->gem.obj;
++	struct drm_gpuvm *gpuvm = va->vm;
+ 
+ 	if (unlikely(!obj))
+ 		return;
+ 
+-	drm_gem_gpuva_assert_lock_held(obj);
++	drm_WARN_ON(gpuvm->drm, obj != vm_bo->obj);
+ 
+-	list_add_tail(&va->gem.entry, &obj->gpuva.list);
++	va->vm_bo = drm_gpuvm_bo_get(vm_bo);
++
++	drm_gem_gpuva_assert_lock_held(obj);
++	list_add_tail(&va->gem.entry, &vm_bo->list.gpuva);
+ }
+ EXPORT_SYMBOL_GPL(drm_gpuva_link);
+ 
+@@ -938,20 +1175,31 @@ EXPORT_SYMBOL_GPL(drm_gpuva_link);
+  * This removes the given &va from the GPU VA list of the &drm_gem_object it is
+  * associated with.
+  *
++ * This removes the given &va from the GPU VA list of the &drm_gpuvm_bo and
++ * the &drm_gpuvm_bo from the &drm_gem_object it is associated with in case
++ * this call unlinks the last &drm_gpuva from the &drm_gpuvm_bo.
++ *
++ * For every &drm_gpuva entry removed from the &drm_gpuvm_bo a reference of
++ * the latter is dropped.
++ *
+  * This function expects the caller to protect the GEM's GPUVA list against
+- * concurrent access using the GEMs dma_resv lock.
++ * concurrent access using either the GEMs dma_resv lock or a driver specific
++ * lock set through drm_gem_gpuva_set_lock().
+  */
+ void
+ drm_gpuva_unlink(struct drm_gpuva *va)
+ {
+ 	struct drm_gem_object *obj = va->gem.obj;
++	struct drm_gpuvm_bo *vm_bo = va->vm_bo;
+ 
+ 	if (unlikely(!obj))
+ 		return;
+ 
+ 	drm_gem_gpuva_assert_lock_held(obj);
+-
+ 	list_del_init(&va->gem.entry);
++
++	va->vm_bo = NULL;
++	drm_gpuvm_bo_put(vm_bo);
+ }
+ EXPORT_SYMBOL_GPL(drm_gpuva_unlink);
+ 
+@@ -1096,10 +1344,10 @@ drm_gpuva_remap(struct drm_gpuva *prev,
+ 		struct drm_gpuva *next,
+ 		struct drm_gpuva_op_remap *op)
+ {
+-	struct drm_gpuva *curr = op->unmap->va;
+-	struct drm_gpuvm *gpuvm = curr->vm;
++	struct drm_gpuva *va = op->unmap->va;
++	struct drm_gpuvm *gpuvm = va->vm;
+ 
+-	drm_gpuva_remove(curr);
++	drm_gpuva_remove(va);
+ 
+ 	if (op->prev) {
+ 		drm_gpuva_init_from_op(prev, op->prev);
+@@ -1741,9 +1989,8 @@ err_free_ops:
+ EXPORT_SYMBOL_GPL(drm_gpuvm_prefetch_ops_create);
+ 
+ /**
+- * drm_gpuvm_gem_unmap_ops_create() - creates the &drm_gpuva_ops to unmap a GEM
+- * @gpuvm: the &drm_gpuvm representing the GPU VA space
+- * @obj: the &drm_gem_object to unmap
++ * drm_gpuvm_bo_unmap_ops_create() - creates the &drm_gpuva_ops to unmap a GEM
++ * @vm_bo: the &drm_gpuvm_bo abstraction
+  *
+  * This function creates a list of operations to perform unmapping for every
+  * GPUVA attached to a GEM.
+@@ -1760,15 +2007,14 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_prefetch_ops
+  * Returns: a pointer to the &drm_gpuva_ops on success, an ERR_PTR on failure
+  */
+ struct drm_gpuva_ops *
+-drm_gpuvm_gem_unmap_ops_create(struct drm_gpuvm *gpuvm,
+-			       struct drm_gem_object *obj)
++drm_gpuvm_bo_unmap_ops_create(struct drm_gpuvm_bo *vm_bo)
+ {
+ 	struct drm_gpuva_ops *ops;
+ 	struct drm_gpuva_op *op;
+ 	struct drm_gpuva *va;
+ 	int ret;
+ 
+-	drm_gem_gpuva_assert_lock_held(obj);
++	drm_gem_gpuva_assert_lock_held(vm_bo->obj);
+ 
+ 	ops = kzalloc(sizeof(*ops), GFP_KERNEL);
+ 	if (!ops)
+@@ -1776,8 +2022,8 @@ drm_gpuvm_gem_unmap_ops_create(struct dr
+ 
+ 	INIT_LIST_HEAD(&ops->list);
+ 
+-	drm_gem_for_each_gpuva(va, obj) {
+-		op = gpuva_op_alloc(gpuvm);
++	drm_gpuvm_bo_for_each_va(va, vm_bo) {
++		op = gpuva_op_alloc(vm_bo->vm);
+ 		if (!op) {
+ 			ret = -ENOMEM;
+ 			goto err_free_ops;
+@@ -1791,10 +2037,10 @@ drm_gpuvm_gem_unmap_ops_create(struct dr
+ 	return ops;
+ 
+ err_free_ops:
+-	drm_gpuva_ops_free(gpuvm, ops);
++	drm_gpuva_ops_free(vm_bo->vm, ops);
+ 	return ERR_PTR(ret);
+ }
+-EXPORT_SYMBOL_GPL(drm_gpuvm_gem_unmap_ops_create);
++EXPORT_SYMBOL_GPL(drm_gpuvm_bo_unmap_ops_create);
+ 
+ /**
+  * drm_gpuva_ops_free() - free the given &drm_gpuva_ops
+--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+@@ -62,6 +62,8 @@ struct bind_job_op {
+ 	enum vm_bind_op op;
+ 	u32 flags;
+ 
++	struct drm_gpuvm_bo *vm_bo;
++
+ 	struct {
+ 		u64 addr;
+ 		u64 range;
+@@ -1101,22 +1103,28 @@ bind_validate_region(struct nouveau_job
+ }
+ 
+ static void
+-bind_link_gpuvas(struct drm_gpuva_ops *ops, struct nouveau_uvma_prealloc *new)
++bind_link_gpuvas(struct bind_job_op *bop)
+ {
++	struct nouveau_uvma_prealloc *new = &bop->new;
++	struct drm_gpuvm_bo *vm_bo = bop->vm_bo;
++	struct drm_gpuva_ops *ops = bop->ops;
+ 	struct drm_gpuva_op *op;
+ 
+ 	drm_gpuva_for_each_op(op, ops) {
+ 		switch (op->op) {
+ 		case DRM_GPUVA_OP_MAP:
+-			drm_gpuva_link(&new->map->va);
++			drm_gpuva_link(&new->map->va, vm_bo);
+ 			break;
+-		case DRM_GPUVA_OP_REMAP:
++		case DRM_GPUVA_OP_REMAP: {
++			struct drm_gpuva *va = op->remap.unmap->va;
++
+ 			if (op->remap.prev)
+-				drm_gpuva_link(&new->prev->va);
++				drm_gpuva_link(&new->prev->va, va->vm_bo);
+ 			if (op->remap.next)
+-				drm_gpuva_link(&new->next->va);
+-			drm_gpuva_unlink(op->remap.unmap->va);
++				drm_gpuva_link(&new->next->va, va->vm_bo);
++			drm_gpuva_unlink(va);
+ 			break;
++		}
+ 		case DRM_GPUVA_OP_UNMAP:
+ 			drm_gpuva_unlink(op->unmap.va);
+ 			break;
+@@ -1138,10 +1146,17 @@ nouveau_uvmm_bind_job_submit(struct nouv
+ 
+ 	list_for_each_op(op, &bind_job->ops) {
+ 		if (op->op == OP_MAP) {
+-			op->gem.obj = drm_gem_object_lookup(job->file_priv,
+-							    op->gem.handle);
+-			if (!op->gem.obj)
++			struct drm_gem_object *obj = op->gem.obj =
++				drm_gem_object_lookup(job->file_priv,
++						      op->gem.handle);
++			if (!obj)
+ 				return -ENOENT;
++
++			dma_resv_lock(obj->resv, NULL);
++			op->vm_bo = drm_gpuvm_bo_obtain(&uvmm->base, obj);
++			dma_resv_unlock(obj->resv);
++			if (IS_ERR(op->vm_bo))
++				return PTR_ERR(op->vm_bo);
+ 		}
+ 
+ 		ret = bind_validate_op(job, op);
+@@ -1352,7 +1367,7 @@ nouveau_uvmm_bind_job_submit(struct nouv
+ 		case OP_UNMAP_SPARSE:
+ 		case OP_MAP:
+ 		case OP_UNMAP:
+-			bind_link_gpuvas(op->ops, &op->new);
++			bind_link_gpuvas(op);
+ 			break;
+ 		default:
+ 			break;
+@@ -1499,6 +1514,12 @@ nouveau_uvmm_bind_job_free_work_fn(struc
+ 		if (!IS_ERR_OR_NULL(op->ops))
+ 			drm_gpuva_ops_free(&uvmm->base, op->ops);
+ 
++		if (!IS_ERR_OR_NULL(op->vm_bo)) {
++			dma_resv_lock(obj->resv, NULL);
++			drm_gpuvm_bo_put(op->vm_bo);
++			dma_resv_unlock(obj->resv);
++		}
++
+ 		if (obj)
+ 			drm_gem_object_put(obj);
+ 	}
+@@ -1752,15 +1773,18 @@ void
+ nouveau_uvmm_bo_map_all(struct nouveau_bo *nvbo, struct nouveau_mem *mem)
+ {
+ 	struct drm_gem_object *obj = &nvbo->bo.base;
++	struct drm_gpuvm_bo *vm_bo;
+ 	struct drm_gpuva *va;
+ 
+ 	dma_resv_assert_held(obj->resv);
+ 
+-	drm_gem_for_each_gpuva(va, obj) {
+-		struct nouveau_uvma *uvma = uvma_from_va(va);
++	drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
++		drm_gpuvm_bo_for_each_va(va, vm_bo) {
++			struct nouveau_uvma *uvma = uvma_from_va(va);
+ 
+-		nouveau_uvma_map(uvma, mem);
+-		drm_gpuva_invalidate(va, false);
++			nouveau_uvma_map(uvma, mem);
++			drm_gpuva_invalidate(va, false);
++		}
+ 	}
+ }
+ 
+@@ -1768,15 +1792,18 @@ void
+ nouveau_uvmm_bo_unmap_all(struct nouveau_bo *nvbo)
+ {
+ 	struct drm_gem_object *obj = &nvbo->bo.base;
++	struct drm_gpuvm_bo *vm_bo;
+ 	struct drm_gpuva *va;
+ 
+ 	dma_resv_assert_held(obj->resv);
+ 
+-	drm_gem_for_each_gpuva(va, obj) {
+-		struct nouveau_uvma *uvma = uvma_from_va(va);
++	drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
++		drm_gpuvm_bo_for_each_va(va, vm_bo) {
++			struct nouveau_uvma *uvma = uvma_from_va(va);
+ 
+-		nouveau_uvma_unmap(uvma);
+-		drm_gpuva_invalidate(va, true);
++			nouveau_uvma_unmap(uvma);
++			drm_gpuva_invalidate(va, true);
++		}
+ 	}
+ }
+ 
+--- a/include/drm/drm_gem.h
++++ b/include/drm/drm_gem.h
+@@ -584,7 +584,7 @@ static inline bool drm_gem_object_is_sha
+  * drm_gem_gpuva_init() - initialize the gpuva list of a GEM object
+  * @obj: the &drm_gem_object
+  *
+- * This initializes the &drm_gem_object's &drm_gpuva list.
++ * This initializes the &drm_gem_object's &drm_gpuvm_bo list.
+  *
+  * Calling this function is only necessary for drivers intending to support the
+  * &drm_driver_feature DRIVER_GEM_GPUVA.
+@@ -597,28 +597,28 @@ static inline void drm_gem_gpuva_init(st
+ }
+ 
+ /**
+- * drm_gem_for_each_gpuva() - iternator to walk over a list of gpuvas
+- * @entry__: &drm_gpuva structure to assign to in each iteration step
+- * @obj__: the &drm_gem_object the &drm_gpuvas to walk are associated with
++ * drm_gem_for_each_gpuvm_bo() - iterator to walk over a list of &drm_gpuvm_bo
++ * @entry__: &drm_gpuvm_bo structure to assign to in each iteration step
++ * @obj__: the &drm_gem_object the &drm_gpuvm_bo to walk are associated with
+  *
+- * This iterator walks over all &drm_gpuva structures associated with the
+- * &drm_gpuva_manager.
++ * This iterator walks over all &drm_gpuvm_bo structures associated with the
++ * &drm_gem_object.
+  */
+-#define drm_gem_for_each_gpuva(entry__, obj__) \
+-	list_for_each_entry(entry__, &(obj__)->gpuva.list, gem.entry)
++#define drm_gem_for_each_gpuvm_bo(entry__, obj__) \
++	list_for_each_entry(entry__, &(obj__)->gpuva.list, list.entry.gem)
+ 
+ /**
+- * drm_gem_for_each_gpuva_safe() - iternator to safely walk over a list of
+- * gpuvas
+- * @entry__: &drm_gpuva structure to assign to in each iteration step
+- * @next__: &next &drm_gpuva to store the next step
+- * @obj__: the &drm_gem_object the &drm_gpuvas to walk are associated with
++ * drm_gem_for_each_gpuvm_bo_safe() - iterator to safely walk over a list of
++ * &drm_gpuvm_bo
++ * @entry__: &drm_gpuvm_bostructure to assign to in each iteration step
++ * @next__: &next &drm_gpuvm_bo to store the next step
++ * @obj__: the &drm_gem_object the &drm_gpuvm_bo to walk are associated with
+  *
+- * This iterator walks over all &drm_gpuva structures associated with the
++ * This iterator walks over all &drm_gpuvm_bo structures associated with the
+  * &drm_gem_object. It is implemented with list_for_each_entry_safe(), hence
+  * it is save against removal of elements.
+  */
+-#define drm_gem_for_each_gpuva_safe(entry__, next__, obj__) \
+-	list_for_each_entry_safe(entry__, next__, &(obj__)->gpuva.list, gem.entry)
++#define drm_gem_for_each_gpuvm_bo_safe(entry__, next__, obj__) \
++	list_for_each_entry_safe(entry__, next__, &(obj__)->gpuva.list, list.entry.gem)
+ 
+ #endif /* __DRM_GEM_H__ */
+--- a/include/drm/drm_gpuvm.h
++++ b/include/drm/drm_gpuvm.h
+@@ -25,6 +25,7 @@
+  * OTHER DEALINGS IN THE SOFTWARE.
+  */
+ 
++#include <linux/dma-resv.h>
+ #include <linux/list.h>
+ #include <linux/rbtree.h>
+ #include <linux/types.h>
+@@ -33,6 +34,7 @@
+ #include <drm/drm_gem.h>
+ 
+ struct drm_gpuvm;
++struct drm_gpuvm_bo;
+ struct drm_gpuvm_ops;
+ 
+ /**
+@@ -74,6 +76,12 @@ struct drm_gpuva {
+ 	struct drm_gpuvm *vm;
+ 
+ 	/**
++	 * @vm_bo: the &drm_gpuvm_bo abstraction for the mapped
++	 * &drm_gem_object
++	 */
++	struct drm_gpuvm_bo *vm_bo;
++
++	/**
+ 	 * @flags: the &drm_gpuva_flags for this mapping
+ 	 */
+ 	enum drm_gpuva_flags flags;
+@@ -108,7 +116,7 @@ struct drm_gpuva {
+ 		struct drm_gem_object *obj;
+ 
+ 		/**
+-		 * @entry: the &list_head to attach this object to a &drm_gem_object
++		 * @entry: the &list_head to attach this object to a &drm_gpuvm_bo
+ 		 */
+ 		struct list_head entry;
+ 	} gem;
+@@ -141,7 +149,7 @@ struct drm_gpuva {
+ int drm_gpuva_insert(struct drm_gpuvm *gpuvm, struct drm_gpuva *va);
+ void drm_gpuva_remove(struct drm_gpuva *va);
+ 
+-void drm_gpuva_link(struct drm_gpuva *va);
++void drm_gpuva_link(struct drm_gpuva *va, struct drm_gpuvm_bo *vm_bo);
+ void drm_gpuva_unlink(struct drm_gpuva *va);
+ 
+ struct drm_gpuva *drm_gpuva_find(struct drm_gpuvm *gpuvm,
+@@ -189,9 +197,15 @@ static inline bool drm_gpuva_invalidated
+  */
+ enum drm_gpuvm_flags {
+ 	/**
++	 * @DRM_GPUVM_RESV_PROTECTED: GPUVM is protected externally by the
++	 * GPUVM's &dma_resv lock
++	 */
++	DRM_GPUVM_RESV_PROTECTED = BIT(0),
++
++	/**
+ 	 * @DRM_GPUVM_USERBITS: user defined bits
+ 	 */
+-	DRM_GPUVM_USERBITS = BIT(0),
++	DRM_GPUVM_USERBITS = BIT(1),
+ };
+ 
+ /**
+@@ -303,6 +317,19 @@ struct drm_gem_object *
+ drm_gpuvm_resv_object_alloc(struct drm_device *drm);
+ 
+ /**
++ * drm_gpuvm_resv_protected() - indicates whether &DRM_GPUVM_RESV_PROTECTED is
++ * set
++ * @gpuvm: the &drm_gpuvm
++ *
++ * Returns: true if &DRM_GPUVM_RESV_PROTECTED is set, false otherwise.
++ */
++static inline bool
++drm_gpuvm_resv_protected(struct drm_gpuvm *gpuvm)
++{
++	return gpuvm->flags & DRM_GPUVM_RESV_PROTECTED;
++}
++
++/**
+  * drm_gpuvm_resv() - returns the &drm_gpuvm's &dma_resv
+  * @gpuvm__: the &drm_gpuvm
+  *
+@@ -326,6 +353,12 @@ drm_gpuvm_resv_object_alloc(struct drm_d
+ #define drm_gpuvm_resv_assert_held(gpuvm__) \
+ 	dma_resv_assert_held(drm_gpuvm_resv(gpuvm__))
+ 
++#define drm_gpuvm_resv_held(gpuvm__) \
++	dma_resv_held(drm_gpuvm_resv(gpuvm__))
++
++#define drm_gpuvm_resv_assert_held(gpuvm__) \
++	dma_resv_assert_held(drm_gpuvm_resv(gpuvm__))
++
+ static inline struct drm_gpuva *
+ __drm_gpuva_next(struct drm_gpuva *va)
+ {
+@@ -405,6 +438,125 @@ __drm_gpuva_next(struct drm_gpuva *va)
+ 	list_for_each_entry_safe(va__, next__, &(gpuvm__)->rb.list, rb.entry)
+ 
+ /**
++ * struct drm_gpuvm_bo - structure representing a &drm_gpuvm and
++ * &drm_gem_object combination
++ *
++ * This structure is an abstraction representing a &drm_gpuvm and
++ * &drm_gem_object combination. It serves as an indirection to accelerate
++ * iterating all &drm_gpuvas within a &drm_gpuvm backed by the same
++ * &drm_gem_object.
++ *
++ * Furthermore it is used cache evicted GEM objects for a certain GPU-VM to
++ * accelerate validation.
++ *
++ * Typically, drivers want to create an instance of a struct drm_gpuvm_bo once
++ * a GEM object is mapped first in a GPU-VM and release the instance once the
++ * last mapping of the GEM object in this GPU-VM is unmapped.
++ */
++struct drm_gpuvm_bo {
++	/**
++	 * @vm: The &drm_gpuvm the @obj is mapped in. This is a reference
++	 * counted pointer.
++	 */
++	struct drm_gpuvm *vm;
++
++	/**
++	 * @obj: The &drm_gem_object being mapped in @vm. This is a reference
++	 * counted pointer.
++	 */
++	struct drm_gem_object *obj;
++
++	/**
++	 * @kref: The reference count for this &drm_gpuvm_bo.
++	 */
++	struct kref kref;
++
++	/**
++	 * @list: Structure containing all &list_heads.
++	 */
++	struct {
++		/**
++		 * @gpuva: The list of linked &drm_gpuvas.
++		 *
++		 * It is safe to access entries from this list as long as the
++		 * GEM's gpuva lock is held. See also struct drm_gem_object.
++		 */
++		struct list_head gpuva;
++
++		/**
++		 * @entry: Structure containing all &list_heads serving as
++		 * entry.
++		 */
++		struct {
++			/**
++			 * @gem: List entry to attach to the &drm_gem_objects
++			 * gpuva list.
++			 */
++			struct list_head gem;
++		} entry;
++	} list;
++};
++
++struct drm_gpuvm_bo *
++drm_gpuvm_bo_create(struct drm_gpuvm *gpuvm,
++		    struct drm_gem_object *obj);
++
++struct drm_gpuvm_bo *
++drm_gpuvm_bo_obtain(struct drm_gpuvm *gpuvm,
++		    struct drm_gem_object *obj);
++struct drm_gpuvm_bo *
++drm_gpuvm_bo_obtain_prealloc(struct drm_gpuvm_bo *vm_bo);
++
++/**
++ * drm_gpuvm_bo_get() - acquire a struct drm_gpuvm_bo reference
++ * @vm_bo: the &drm_gpuvm_bo to acquire the reference of
++ *
++ * This function acquires an additional reference to @vm_bo. It is illegal to
++ * call this without already holding a reference. No locks required.
++ */
++static inline struct drm_gpuvm_bo *
++drm_gpuvm_bo_get(struct drm_gpuvm_bo *vm_bo)
++{
++	kref_get(&vm_bo->kref);
++	return vm_bo;
++}
++
++void drm_gpuvm_bo_put(struct drm_gpuvm_bo *vm_bo);
++
++struct drm_gpuvm_bo *
++drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm,
++		  struct drm_gem_object *obj);
++
++/**
++ * drm_gpuvm_bo_for_each_va() - iterator to walk over a list of &drm_gpuva
++ * @va__: &drm_gpuva structure to assign to in each iteration step
++ * @vm_bo__: the &drm_gpuvm_bo the &drm_gpuva to walk are associated with
++ *
++ * This iterator walks over all &drm_gpuva structures associated with the
++ * &drm_gpuvm_bo.
++ *
++ * The caller must hold the GEM's gpuva lock.
++ */
++#define drm_gpuvm_bo_for_each_va(va__, vm_bo__) \
++	list_for_each_entry(va__, &(vm_bo)->list.gpuva, gem.entry)
++
++/**
++ * drm_gpuvm_bo_for_each_va_safe() - iterator to safely walk over a list of
++ * &drm_gpuva
++ * @va__: &drm_gpuva structure to assign to in each iteration step
++ * @next__: &next &drm_gpuva to store the next step
++ * @vm_bo__: the &drm_gpuvm_bo the &drm_gpuva to walk are associated with
++ *
++ * This iterator walks over all &drm_gpuva structures associated with the
++ * &drm_gpuvm_bo. It is implemented with list_for_each_entry_safe(), hence
++ * it is save against removal of elements.
++ *
++ * The caller must hold the GEM's gpuva lock.
++ */
++#define drm_gpuvm_bo_for_each_va_safe(va__, next__, vm_bo__) \
++	list_for_each_entry_safe(va__, next__, &(vm_bo)->list.gpuva, gem.entry)
++
++/**
+  * enum drm_gpuva_op_type - GPU VA operation type
+  *
+  * Operations to alter the GPU VA mappings tracked by the &drm_gpuvm.
+@@ -673,8 +825,7 @@ drm_gpuvm_prefetch_ops_create(struct drm
+ 				 u64 addr, u64 range);
+ 
+ struct drm_gpuva_ops *
+-drm_gpuvm_gem_unmap_ops_create(struct drm_gpuvm *gpuvm,
+-			       struct drm_gem_object *obj);
++drm_gpuvm_bo_unmap_ops_create(struct drm_gpuvm_bo *vm_bo);
+ 
+ void drm_gpuva_ops_free(struct drm_gpuvm *gpuvm,
+ 			struct drm_gpuva_ops *ops);
+@@ -727,6 +878,30 @@ struct drm_gpuvm_ops {
+ 	void (*op_free)(struct drm_gpuva_op *op);
+ 
+ 	/**
++	 * @vm_bo_alloc: called when the &drm_gpuvm allocates
++	 * a struct drm_gpuvm_bo
++	 *
++	 * Some drivers may want to embed struct drm_gpuvm_bo into driver
++	 * specific structures. By implementing this callback drivers can
++	 * allocate memory accordingly.
++	 *
++	 * This callback is optional.
++	 */
++	struct drm_gpuvm_bo *(*vm_bo_alloc)(void);
++
++	/**
++	 * @vm_bo_free: called when the &drm_gpuvm frees a
++	 * struct drm_gpuvm_bo
++	 *
++	 * Some drivers may want to embed struct drm_gpuvm_bo into driver
++	 * specific structures. By implementing this callback drivers can
++	 * free the previously allocated memory accordingly.
++	 *
++	 * This callback is optional.
++	 */
++	void (*vm_bo_free)(struct drm_gpuvm_bo *vm_bo);
++
++	/**
+ 	 * @sm_step_map: called from &drm_gpuvm_sm_map to finally insert the
+ 	 * mapping once all previous steps were completed
+ 	 *
diff --git a/patches-6.6/034-14-v6.8-drm-gpuvm-track-lock-validate-external-evicted-objects.patch b/patches-6.6/034-14-v6.8-drm-gpuvm-track-lock-validate-external-evicted-objects.patch
new file mode 100644
index 0000000..71f0a7e
--- /dev/null
+++ b/patches-6.6/034-14-v6.8-drm-gpuvm-track-lock-validate-external-evicted-objects.patch
@@ -0,0 +1,1052 @@
+From 50c1a36f594bb3dd33f3f9386c5d960cd12327d8 Mon Sep 17 00:00:00 2001
+From: Danilo Krummrich <dakr@redhat.com>
+Date: Wed, 8 Nov 2023 01:12:41 +0100
+Subject: [PATCH] drm/gpuvm: track/lock/validate external/evicted objects
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Currently the DRM GPUVM offers common infrastructure to track GPU VA
+allocations and mappings, generically connect GPU VA mappings to their
+backing buffers and perform more complex mapping operations on the GPU VA
+space.
+
+However, there are more design patterns commonly used by drivers, which
+can potentially be generalized in order to make the DRM GPUVM represent
+a basis for GPU-VM implementations. In this context, this patch aims
+at generalizing the following elements.
+
+1) Provide a common dma-resv for GEM objects not being used outside of
+   this GPU-VM.
+
+2) Provide tracking of external GEM objects (GEM objects which are
+   shared with other GPU-VMs).
+
+3) Provide functions to efficiently lock all GEM objects dma-resv the
+   GPU-VM contains mappings of.
+
+4) Provide tracking of evicted GEM objects the GPU-VM contains mappings
+   of, such that validation of evicted GEM objects is accelerated.
+
+5) Provide some convinience functions for common patterns.
+
+Big thanks to Boris Brezillon for his help to figure out locking for
+drivers updating the GPU VA space within the fence signalling path.
+
+Acked-by: Christian König <christian.koenig@amd.com>
+Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
+Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
+Suggested-by: Matthew Brost <matthew.brost@intel.com>
+Signed-off-by: Danilo Krummrich <dakr@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231108001259.15123-12-dakr@redhat.com
+---
+ drivers/gpu/drm/drm_gpuvm.c | 633 ++++++++++++++++++++++++++++++++++++
+ include/drm/drm_gpuvm.h     | 250 ++++++++++++++
+ 2 files changed, 883 insertions(+)
+
+--- a/drivers/gpu/drm/drm_gpuvm.c
++++ b/drivers/gpu/drm/drm_gpuvm.c
+@@ -82,6 +82,21 @@
+  * &drm_gem_object list of &drm_gpuvm_bos for an existing instance of this
+  * particular combination. If not existent a new instance is created and linked
+  * to the &drm_gem_object.
++ *
++ * &drm_gpuvm_bo structures, since unique for a given &drm_gpuvm, are also used
++ * as entry for the &drm_gpuvm's lists of external and evicted objects. Those
++ * lists are maintained in order to accelerate locking of dma-resv locks and
++ * validation of evicted objects bound in a &drm_gpuvm. For instance, all
++ * &drm_gem_object's &dma_resv of a given &drm_gpuvm can be locked by calling
++ * drm_gpuvm_exec_lock(). Once locked drivers can call drm_gpuvm_validate() in
++ * order to validate all evicted &drm_gem_objects. It is also possible to lock
++ * additional &drm_gem_objects by providing the corresponding parameters to
++ * drm_gpuvm_exec_lock() as well as open code the &drm_exec loop while making
++ * use of helper functions such as drm_gpuvm_prepare_range() or
++ * drm_gpuvm_prepare_objects().
++ *
++ * Every bound &drm_gem_object is treated as external object when its &dma_resv
++ * structure is different than the &drm_gpuvm's common &dma_resv structure.
+  */
+ 
+ /**
+@@ -429,6 +444,20 @@
+  * Subsequent calls to drm_gpuvm_bo_obtain() for the same &drm_gpuvm and
+  * &drm_gem_object must be able to observe previous creations and destructions
+  * of &drm_gpuvm_bos in order to keep instances unique.
++ *
++ * The &drm_gpuvm's lists for keeping track of external and evicted objects are
++ * protected against concurrent insertion / removal and iteration internally.
++ *
++ * However, drivers still need ensure to protect concurrent calls to functions
++ * iterating those lists, namely drm_gpuvm_prepare_objects() and
++ * drm_gpuvm_validate().
++ *
++ * Alternatively, drivers can set the &DRM_GPUVM_RESV_PROTECTED flag to indicate
++ * that the corresponding &dma_resv locks are held in order to protect the
++ * lists. If &DRM_GPUVM_RESV_PROTECTED is set, internal locking is disabled and
++ * the corresponding lockdep checks are enabled. This is an optimization for
++ * drivers which are capable of taking the corresponding &dma_resv locks and
++ * hence do not require internal locking.
+  */
+ 
+ /**
+@@ -641,6 +670,201 @@
+  *	}
+  */
+ 
++/**
++ * get_next_vm_bo_from_list() - get the next vm_bo element
++ * @__gpuvm: the &drm_gpuvm
++ * @__list_name: the name of the list we're iterating on
++ * @__local_list: a pointer to the local list used to store already iterated items
++ * @__prev_vm_bo: the previous element we got from get_next_vm_bo_from_list()
++ *
++ * This helper is here to provide lockless list iteration. Lockless as in, the
++ * iterator releases the lock immediately after picking the first element from
++ * the list, so list insertion deletion can happen concurrently.
++ *
++ * Elements popped from the original list are kept in a local list, so removal
++ * and is_empty checks can still happen while we're iterating the list.
++ */
++#define get_next_vm_bo_from_list(__gpuvm, __list_name, __local_list, __prev_vm_bo)	\
++	({										\
++		struct drm_gpuvm_bo *__vm_bo = NULL;					\
++											\
++		drm_gpuvm_bo_put(__prev_vm_bo);						\
++											\
++		spin_lock(&(__gpuvm)->__list_name.lock);				\
++		if (!(__gpuvm)->__list_name.local_list)					\
++			(__gpuvm)->__list_name.local_list = __local_list;		\
++		else									\
++			drm_WARN_ON((__gpuvm)->drm,					\
++				    (__gpuvm)->__list_name.local_list != __local_list);	\
++											\
++		while (!list_empty(&(__gpuvm)->__list_name.list)) {			\
++			__vm_bo = list_first_entry(&(__gpuvm)->__list_name.list,	\
++						   struct drm_gpuvm_bo,			\
++						   list.entry.__list_name);		\
++			if (kref_get_unless_zero(&__vm_bo->kref)) {			\
++				list_move_tail(&(__vm_bo)->list.entry.__list_name,	\
++					       __local_list);				\
++				break;							\
++			} else {							\
++				list_del_init(&(__vm_bo)->list.entry.__list_name);	\
++				__vm_bo = NULL;						\
++			}								\
++		}									\
++		spin_unlock(&(__gpuvm)->__list_name.lock);				\
++											\
++		__vm_bo;								\
++	})
++
++/**
++ * for_each_vm_bo_in_list() - internal vm_bo list iterator
++ * @__gpuvm: the &drm_gpuvm
++ * @__list_name: the name of the list we're iterating on
++ * @__local_list: a pointer to the local list used to store already iterated items
++ * @__vm_bo: the struct drm_gpuvm_bo to assign in each iteration step
++ *
++ * This helper is here to provide lockless list iteration. Lockless as in, the
++ * iterator releases the lock immediately after picking the first element from the
++ * list, hence list insertion and deletion can happen concurrently.
++ *
++ * It is not allowed to re-assign the vm_bo pointer from inside this loop.
++ *
++ * Typical use:
++ *
++ *	struct drm_gpuvm_bo *vm_bo;
++ *	LIST_HEAD(my_local_list);
++ *
++ *	ret = 0;
++ *	for_each_vm_bo_in_list(gpuvm, <list_name>, &my_local_list, vm_bo) {
++ *		ret = do_something_with_vm_bo(..., vm_bo);
++ *		if (ret)
++ *			break;
++ *	}
++ *	// Drop ref in case we break out of the loop.
++ *	drm_gpuvm_bo_put(vm_bo);
++ *	restore_vm_bo_list(gpuvm, <list_name>, &my_local_list);
++ *
++ *
++ * Only used for internal list iterations, not meant to be exposed to the outside
++ * world.
++ */
++#define for_each_vm_bo_in_list(__gpuvm, __list_name, __local_list, __vm_bo)	\
++	for (__vm_bo = get_next_vm_bo_from_list(__gpuvm, __list_name,		\
++						__local_list, NULL);		\
++	     __vm_bo;								\
++	     __vm_bo = get_next_vm_bo_from_list(__gpuvm, __list_name,		\
++						__local_list, __vm_bo))
++
++static void
++__restore_vm_bo_list(struct drm_gpuvm *gpuvm, spinlock_t *lock,
++		     struct list_head *list, struct list_head **local_list)
++{
++	/* Merge back the two lists, moving local list elements to the
++	 * head to preserve previous ordering, in case it matters.
++	 */
++	spin_lock(lock);
++	if (*local_list) {
++		list_splice(*local_list, list);
++		*local_list = NULL;
++	}
++	spin_unlock(lock);
++}
++
++/**
++ * restore_vm_bo_list() - move vm_bo elements back to their original list
++ * @__gpuvm: the &drm_gpuvm
++ * @__list_name: the name of the list we're iterating on
++ *
++ * When we're done iterating a vm_bo list, we should call restore_vm_bo_list()
++ * to restore the original state and let new iterations take place.
++ */
++#define restore_vm_bo_list(__gpuvm, __list_name)			\
++	__restore_vm_bo_list((__gpuvm), &(__gpuvm)->__list_name.lock,	\
++			     &(__gpuvm)->__list_name.list,		\
++			     &(__gpuvm)->__list_name.local_list)
++
++static void
++cond_spin_lock(spinlock_t *lock, bool cond)
++{
++	if (cond)
++		spin_lock(lock);
++}
++
++static void
++cond_spin_unlock(spinlock_t *lock, bool cond)
++{
++	if (cond)
++		spin_unlock(lock);
++}
++
++static void
++__drm_gpuvm_bo_list_add(struct drm_gpuvm *gpuvm, spinlock_t *lock,
++			struct list_head *entry, struct list_head *list)
++{
++	cond_spin_lock(lock, !!lock);
++	if (list_empty(entry))
++		list_add_tail(entry, list);
++	cond_spin_unlock(lock, !!lock);
++}
++
++/**
++ * drm_gpuvm_bo_list_add() - insert a vm_bo into the given list
++ * @__vm_bo: the &drm_gpuvm_bo
++ * @__list_name: the name of the list to insert into
++ * @__lock: whether to lock with the internal spinlock
++ *
++ * Inserts the given @__vm_bo into the list specified by @__list_name.
++ */
++#define drm_gpuvm_bo_list_add(__vm_bo, __list_name, __lock)			\
++	__drm_gpuvm_bo_list_add((__vm_bo)->vm,					\
++				__lock ? &(__vm_bo)->vm->__list_name.lock :	\
++					 NULL,					\
++				&(__vm_bo)->list.entry.__list_name,		\
++				&(__vm_bo)->vm->__list_name.list)
++
++static void
++__drm_gpuvm_bo_list_del(struct drm_gpuvm *gpuvm, spinlock_t *lock,
++			struct list_head *entry, bool init)
++{
++	cond_spin_lock(lock, !!lock);
++	if (init) {
++		if (!list_empty(entry))
++			list_del_init(entry);
++	} else {
++		list_del(entry);
++	}
++	cond_spin_unlock(lock, !!lock);
++}
++
++/**
++ * drm_gpuvm_bo_list_del_init() - remove a vm_bo from the given list
++ * @__vm_bo: the &drm_gpuvm_bo
++ * @__list_name: the name of the list to insert into
++ * @__lock: whether to lock with the internal spinlock
++ *
++ * Removes the given @__vm_bo from the list specified by @__list_name.
++ */
++#define drm_gpuvm_bo_list_del_init(__vm_bo, __list_name, __lock)		\
++	__drm_gpuvm_bo_list_del((__vm_bo)->vm,					\
++				__lock ? &(__vm_bo)->vm->__list_name.lock :	\
++					 NULL,					\
++				&(__vm_bo)->list.entry.__list_name,		\
++				true)
++
++/**
++ * drm_gpuvm_bo_list_del() - remove a vm_bo from the given list
++ * @__vm_bo: the &drm_gpuvm_bo
++ * @__list_name: the name of the list to insert into
++ * @__lock: whether to lock with the internal spinlock
++ *
++ * Removes the given @__vm_bo from the list specified by @__list_name.
++ */
++#define drm_gpuvm_bo_list_del(__vm_bo, __list_name, __lock)			\
++	__drm_gpuvm_bo_list_del((__vm_bo)->vm,					\
++				__lock ? &(__vm_bo)->vm->__list_name.lock :	\
++					 NULL,					\
++				&(__vm_bo)->list.entry.__list_name,		\
++				false)
++
+ #define to_drm_gpuva(__node)	container_of((__node), struct drm_gpuva, rb.node)
+ 
+ #define GPUVA_START(node) ((node)->va.addr)
+@@ -775,6 +999,12 @@ drm_gpuvm_init(struct drm_gpuvm *gpuvm,
+ 	gpuvm->rb.tree = RB_ROOT_CACHED;
+ 	INIT_LIST_HEAD(&gpuvm->rb.list);
+ 
++	INIT_LIST_HEAD(&gpuvm->extobj.list);
++	spin_lock_init(&gpuvm->extobj.lock);
++
++	INIT_LIST_HEAD(&gpuvm->evict.list);
++	spin_lock_init(&gpuvm->evict.lock);
++
+ 	kref_init(&gpuvm->kref);
+ 
+ 	gpuvm->name = name ? name : "unknown";
+@@ -812,6 +1042,11 @@ drm_gpuvm_fini(struct drm_gpuvm *gpuvm)
+ 	drm_WARN(gpuvm->drm, !RB_EMPTY_ROOT(&gpuvm->rb.tree.rb_root),
+ 		 "GPUVA tree is not empty, potentially leaking memory.\n");
+ 
++	drm_WARN(gpuvm->drm, !list_empty(&gpuvm->extobj.list),
++		 "Extobj list should be empty.\n");
++	drm_WARN(gpuvm->drm, !list_empty(&gpuvm->evict.list),
++		 "Evict list should be empty.\n");
++
+ 	drm_gem_object_put(gpuvm->r_obj);
+ }
+ 
+@@ -844,6 +1079,343 @@ drm_gpuvm_put(struct drm_gpuvm *gpuvm)
+ }
+ EXPORT_SYMBOL_GPL(drm_gpuvm_put);
+ 
++static int
++__drm_gpuvm_prepare_objects(struct drm_gpuvm *gpuvm,
++			    struct drm_exec *exec,
++			    unsigned int num_fences)
++{
++	struct drm_gpuvm_bo *vm_bo;
++	LIST_HEAD(extobjs);
++	int ret = 0;
++
++	for_each_vm_bo_in_list(gpuvm, extobj, &extobjs, vm_bo) {
++		ret = drm_exec_prepare_obj(exec, vm_bo->obj, num_fences);
++		if (ret)
++			break;
++	}
++	/* Drop ref in case we break out of the loop. */
++	drm_gpuvm_bo_put(vm_bo);
++	restore_vm_bo_list(gpuvm, extobj);
++
++	return ret;
++}
++
++static int
++drm_gpuvm_prepare_objects_locked(struct drm_gpuvm *gpuvm,
++				 struct drm_exec *exec,
++				 unsigned int num_fences)
++{
++	struct drm_gpuvm_bo *vm_bo;
++	int ret = 0;
++
++	drm_gpuvm_resv_assert_held(gpuvm);
++	list_for_each_entry(vm_bo, &gpuvm->extobj.list, list.entry.extobj) {
++		ret = drm_exec_prepare_obj(exec, vm_bo->obj, num_fences);
++		if (ret)
++			break;
++
++		if (vm_bo->evicted)
++			drm_gpuvm_bo_list_add(vm_bo, evict, false);
++	}
++
++	return ret;
++}
++
++/**
++ * drm_gpuvm_prepare_objects() - prepare all assoiciated BOs
++ * @gpuvm: the &drm_gpuvm
++ * @exec: the &drm_exec locking context
++ * @num_fences: the amount of &dma_fences to reserve
++ *
++ * Calls drm_exec_prepare_obj() for all &drm_gem_objects the given
++ * &drm_gpuvm contains mappings of.
++ *
++ * Using this function directly, it is the drivers responsibility to call
++ * drm_exec_init() and drm_exec_fini() accordingly.
++ *
++ * Note: This function is safe against concurrent insertion and removal of
++ * external objects, however it is not safe against concurrent usage itself.
++ *
++ * Drivers need to make sure to protect this case with either an outer VM lock
++ * or by calling drm_gpuvm_prepare_vm() before this function within the
++ * drm_exec_until_all_locked() loop, such that the GPUVM's dma-resv lock ensures
++ * mutual exclusion.
++ *
++ * Returns: 0 on success, negative error code on failure.
++ */
++int
++drm_gpuvm_prepare_objects(struct drm_gpuvm *gpuvm,
++			  struct drm_exec *exec,
++			  unsigned int num_fences)
++{
++	if (drm_gpuvm_resv_protected(gpuvm))
++		return drm_gpuvm_prepare_objects_locked(gpuvm, exec,
++							num_fences);
++	else
++		return __drm_gpuvm_prepare_objects(gpuvm, exec, num_fences);
++}
++EXPORT_SYMBOL_GPL(drm_gpuvm_prepare_objects);
++
++/**
++ * drm_gpuvm_prepare_range() - prepare all BOs mapped within a given range
++ * @gpuvm: the &drm_gpuvm
++ * @exec: the &drm_exec locking context
++ * @addr: the start address within the VA space
++ * @range: the range to iterate within the VA space
++ * @num_fences: the amount of &dma_fences to reserve
++ *
++ * Calls drm_exec_prepare_obj() for all &drm_gem_objects mapped between @addr
++ * and @addr + @range.
++ *
++ * Returns: 0 on success, negative error code on failure.
++ */
++int
++drm_gpuvm_prepare_range(struct drm_gpuvm *gpuvm, struct drm_exec *exec,
++			u64 addr, u64 range, unsigned int num_fences)
++{
++	struct drm_gpuva *va;
++	u64 end = addr + range;
++	int ret;
++
++	drm_gpuvm_for_each_va_range(va, gpuvm, addr, end) {
++		struct drm_gem_object *obj = va->gem.obj;
++
++		ret = drm_exec_prepare_obj(exec, obj, num_fences);
++		if (ret)
++			return ret;
++	}
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(drm_gpuvm_prepare_range);
++
++/**
++ * drm_gpuvm_exec_lock() - lock all dma-resv of all assoiciated BOs
++ * @vm_exec: the &drm_gpuvm_exec wrapper
++ *
++ * Acquires all dma-resv locks of all &drm_gem_objects the given
++ * &drm_gpuvm contains mappings of.
++ *
++ * Addionally, when calling this function with struct drm_gpuvm_exec::extra
++ * being set the driver receives the given @fn callback to lock additional
++ * dma-resv in the context of the &drm_gpuvm_exec instance. Typically, drivers
++ * would call drm_exec_prepare_obj() from within this callback.
++ *
++ * Returns: 0 on success, negative error code on failure.
++ */
++int
++drm_gpuvm_exec_lock(struct drm_gpuvm_exec *vm_exec)
++{
++	struct drm_gpuvm *gpuvm = vm_exec->vm;
++	struct drm_exec *exec = &vm_exec->exec;
++	unsigned int num_fences = vm_exec->num_fences;
++	int ret;
++
++	drm_exec_init(exec, vm_exec->flags);
++
++	drm_exec_until_all_locked(exec) {
++		ret = drm_gpuvm_prepare_vm(gpuvm, exec, num_fences);
++		drm_exec_retry_on_contention(exec);
++		if (ret)
++			goto err;
++
++		ret = drm_gpuvm_prepare_objects(gpuvm, exec, num_fences);
++		drm_exec_retry_on_contention(exec);
++		if (ret)
++			goto err;
++
++		if (vm_exec->extra.fn) {
++			ret = vm_exec->extra.fn(vm_exec);
++			drm_exec_retry_on_contention(exec);
++			if (ret)
++				goto err;
++		}
++	}
++
++	return 0;
++
++err:
++	drm_exec_fini(exec);
++	return ret;
++}
++EXPORT_SYMBOL_GPL(drm_gpuvm_exec_lock);
++
++static int
++fn_lock_array(struct drm_gpuvm_exec *vm_exec)
++{
++	struct {
++		struct drm_gem_object **objs;
++		unsigned int num_objs;
++	} *args = vm_exec->extra.priv;
++
++	return drm_exec_prepare_array(&vm_exec->exec, args->objs,
++				      args->num_objs, vm_exec->num_fences);
++}
++
++/**
++ * drm_gpuvm_exec_lock_array() - lock all dma-resv of all assoiciated BOs
++ * @vm_exec: the &drm_gpuvm_exec wrapper
++ * @objs: additional &drm_gem_objects to lock
++ * @num_objs: the number of additional &drm_gem_objects to lock
++ *
++ * Acquires all dma-resv locks of all &drm_gem_objects the given &drm_gpuvm
++ * contains mappings of, plus the ones given through @objs.
++ *
++ * Returns: 0 on success, negative error code on failure.
++ */
++int
++drm_gpuvm_exec_lock_array(struct drm_gpuvm_exec *vm_exec,
++			  struct drm_gem_object **objs,
++			  unsigned int num_objs)
++{
++	struct {
++		struct drm_gem_object **objs;
++		unsigned int num_objs;
++	} args;
++
++	args.objs = objs;
++	args.num_objs = num_objs;
++
++	vm_exec->extra.fn = fn_lock_array;
++	vm_exec->extra.priv = &args;
++
++	return drm_gpuvm_exec_lock(vm_exec);
++}
++EXPORT_SYMBOL_GPL(drm_gpuvm_exec_lock_array);
++
++/**
++ * drm_gpuvm_exec_lock_range() - prepare all BOs mapped within a given range
++ * @vm_exec: the &drm_gpuvm_exec wrapper
++ * @addr: the start address within the VA space
++ * @range: the range to iterate within the VA space
++ *
++ * Acquires all dma-resv locks of all &drm_gem_objects mapped between @addr and
++ * @addr + @range.
++ *
++ * Returns: 0 on success, negative error code on failure.
++ */
++int
++drm_gpuvm_exec_lock_range(struct drm_gpuvm_exec *vm_exec,
++			  u64 addr, u64 range)
++{
++	struct drm_gpuvm *gpuvm = vm_exec->vm;
++	struct drm_exec *exec = &vm_exec->exec;
++	int ret;
++
++	drm_exec_init(exec, vm_exec->flags);
++
++	drm_exec_until_all_locked(exec) {
++		ret = drm_gpuvm_prepare_range(gpuvm, exec, addr, range,
++					      vm_exec->num_fences);
++		drm_exec_retry_on_contention(exec);
++		if (ret)
++			goto err;
++	}
++
++	return ret;
++
++err:
++	drm_exec_fini(exec);
++	return ret;
++}
++EXPORT_SYMBOL_GPL(drm_gpuvm_exec_lock_range);
++
++static int
++__drm_gpuvm_validate(struct drm_gpuvm *gpuvm, struct drm_exec *exec)
++{
++	const struct drm_gpuvm_ops *ops = gpuvm->ops;
++	struct drm_gpuvm_bo *vm_bo;
++	LIST_HEAD(evict);
++	int ret = 0;
++
++	for_each_vm_bo_in_list(gpuvm, evict, &evict, vm_bo) {
++		ret = ops->vm_bo_validate(vm_bo, exec);
++		if (ret)
++			break;
++	}
++	/* Drop ref in case we break out of the loop. */
++	drm_gpuvm_bo_put(vm_bo);
++	restore_vm_bo_list(gpuvm, evict);
++
++	return ret;
++}
++
++static int
++drm_gpuvm_validate_locked(struct drm_gpuvm *gpuvm, struct drm_exec *exec)
++{
++	const struct drm_gpuvm_ops *ops = gpuvm->ops;
++	struct drm_gpuvm_bo *vm_bo, *next;
++	int ret = 0;
++
++	drm_gpuvm_resv_assert_held(gpuvm);
++
++	list_for_each_entry_safe(vm_bo, next, &gpuvm->evict.list,
++				 list.entry.evict) {
++		ret = ops->vm_bo_validate(vm_bo, exec);
++		if (ret)
++			break;
++
++		dma_resv_assert_held(vm_bo->obj->resv);
++		if (!vm_bo->evicted)
++			drm_gpuvm_bo_list_del_init(vm_bo, evict, false);
++	}
++
++	return ret;
++}
++
++/**
++ * drm_gpuvm_validate() - validate all BOs marked as evicted
++ * @gpuvm: the &drm_gpuvm to validate evicted BOs
++ * @exec: the &drm_exec instance used for locking the GPUVM
++ *
++ * Calls the &drm_gpuvm_ops::vm_bo_validate callback for all evicted buffer
++ * objects being mapped in the given &drm_gpuvm.
++ *
++ * Returns: 0 on success, negative error code on failure.
++ */
++int
++drm_gpuvm_validate(struct drm_gpuvm *gpuvm, struct drm_exec *exec)
++{
++	const struct drm_gpuvm_ops *ops = gpuvm->ops;
++
++	if (unlikely(!ops || !ops->vm_bo_validate))
++		return -EOPNOTSUPP;
++
++	if (drm_gpuvm_resv_protected(gpuvm))
++		return drm_gpuvm_validate_locked(gpuvm, exec);
++	else
++		return __drm_gpuvm_validate(gpuvm, exec);
++}
++EXPORT_SYMBOL_GPL(drm_gpuvm_validate);
++
++/**
++ * drm_gpuvm_resv_add_fence - add fence to private and all extobj
++ * dma-resv
++ * @gpuvm: the &drm_gpuvm to add a fence to
++ * @exec: the &drm_exec locking context
++ * @fence: fence to add
++ * @private_usage: private dma-resv usage
++ * @extobj_usage: extobj dma-resv usage
++ */
++void
++drm_gpuvm_resv_add_fence(struct drm_gpuvm *gpuvm,
++			 struct drm_exec *exec,
++			 struct dma_fence *fence,
++			 enum dma_resv_usage private_usage,
++			 enum dma_resv_usage extobj_usage)
++{
++	struct drm_gem_object *obj;
++	unsigned long index;
++
++	drm_exec_for_each_locked_object(exec, index, obj) {
++		dma_resv_assert_held(obj->resv);
++		dma_resv_add_fence(obj->resv, fence,
++				   drm_gpuvm_is_extobj(gpuvm, obj) ?
++				   extobj_usage : private_usage);
++	}
++}
++EXPORT_SYMBOL_GPL(drm_gpuvm_resv_add_fence);
++
+ /**
+  * drm_gpuvm_bo_create() - create a new instance of struct drm_gpuvm_bo
+  * @gpuvm: The &drm_gpuvm the @obj is mapped in.
+@@ -877,6 +1449,9 @@ drm_gpuvm_bo_create(struct drm_gpuvm *gp
+ 	INIT_LIST_HEAD(&vm_bo->list.gpuva);
+ 	INIT_LIST_HEAD(&vm_bo->list.entry.gem);
+ 
++	INIT_LIST_HEAD(&vm_bo->list.entry.extobj);
++	INIT_LIST_HEAD(&vm_bo->list.entry.evict);
++
+ 	return vm_bo;
+ }
+ EXPORT_SYMBOL_GPL(drm_gpuvm_bo_create);
+@@ -894,6 +1469,9 @@ drm_gpuvm_bo_destroy(struct kref *kref)
+ 	if (!lock)
+ 		drm_gpuvm_resv_assert_held(gpuvm);
+ 
++	drm_gpuvm_bo_list_del(vm_bo, extobj, lock);
++	drm_gpuvm_bo_list_del(vm_bo, evict, lock);
++
+ 	drm_gem_gpuva_assert_lock_held(obj);
+ 	list_del(&vm_bo->list.entry.gem);
+ 
+@@ -1037,6 +1615,61 @@ drm_gpuvm_bo_obtain_prealloc(struct drm_
+ }
+ EXPORT_SYMBOL_GPL(drm_gpuvm_bo_obtain_prealloc);
+ 
++/**
++ * drm_gpuvm_bo_extobj_add() - adds the &drm_gpuvm_bo to its &drm_gpuvm's
++ * extobj list
++ * @vm_bo: The &drm_gpuvm_bo to add to its &drm_gpuvm's the extobj list.
++ *
++ * Adds the given @vm_bo to its &drm_gpuvm's extobj list if not on the list
++ * already and if the corresponding &drm_gem_object is an external object,
++ * actually.
++ */
++void
++drm_gpuvm_bo_extobj_add(struct drm_gpuvm_bo *vm_bo)
++{
++	struct drm_gpuvm *gpuvm = vm_bo->vm;
++	bool lock = !drm_gpuvm_resv_protected(gpuvm);
++
++	if (!lock)
++		drm_gpuvm_resv_assert_held(gpuvm);
++
++	if (drm_gpuvm_is_extobj(gpuvm, vm_bo->obj))
++		drm_gpuvm_bo_list_add(vm_bo, extobj, lock);
++}
++EXPORT_SYMBOL_GPL(drm_gpuvm_bo_extobj_add);
++
++/**
++ * drm_gpuvm_bo_evict() - add / remove a &drm_gpuvm_bo to / from the &drm_gpuvms
++ * evicted list
++ * @vm_bo: the &drm_gpuvm_bo to add or remove
++ * @evict: indicates whether the object is evicted
++ *
++ * Adds a &drm_gpuvm_bo to or removes it from the &drm_gpuvms evicted list.
++ */
++void
++drm_gpuvm_bo_evict(struct drm_gpuvm_bo *vm_bo, bool evict)
++{
++	struct drm_gpuvm *gpuvm = vm_bo->vm;
++	struct drm_gem_object *obj = vm_bo->obj;
++	bool lock = !drm_gpuvm_resv_protected(gpuvm);
++
++	dma_resv_assert_held(obj->resv);
++	vm_bo->evicted = evict;
++
++	/* Can't add external objects to the evicted list directly if not using
++	 * internal spinlocks, since in this case the evicted list is protected
++	 * with the VM's common dma-resv lock.
++	 */
++	if (drm_gpuvm_is_extobj(gpuvm, obj) && !lock)
++		return;
++
++	if (evict)
++		drm_gpuvm_bo_list_add(vm_bo, evict, lock);
++	else
++		drm_gpuvm_bo_list_del_init(vm_bo, evict, lock);
++}
++EXPORT_SYMBOL_GPL(drm_gpuvm_bo_evict);
++
+ static int
+ __drm_gpuva_insert(struct drm_gpuvm *gpuvm,
+ 		   struct drm_gpuva *va)
+--- a/include/drm/drm_gpuvm.h
++++ b/include/drm/drm_gpuvm.h
+@@ -32,6 +32,7 @@
+ 
+ #include <drm/drm_device.h>
+ #include <drm/drm_gem.h>
++#include <drm/drm_exec.h>
+ 
+ struct drm_gpuvm;
+ struct drm_gpuvm_bo;
+@@ -283,6 +284,50 @@ struct drm_gpuvm {
+ 	 * @r_obj: Resv GEM object; representing the GPUVM's common &dma_resv.
+ 	 */
+ 	struct drm_gem_object *r_obj;
++
++	/**
++	 * @extobj: structure holding the extobj list
++	 */
++	struct {
++		/**
++		 * @list: &list_head storing &drm_gpuvm_bos serving as
++		 * external object
++		 */
++		struct list_head list;
++
++		/**
++		 * @local_list: pointer to the local list temporarily storing
++		 * entries from the external object list
++		 */
++		struct list_head *local_list;
++
++		/**
++		 * @lock: spinlock to protect the extobj list
++		 */
++		spinlock_t lock;
++	} extobj;
++
++	/**
++	 * @evict: structure holding the evict list and evict list lock
++	 */
++	struct {
++		/**
++		 * @list: &list_head storing &drm_gpuvm_bos currently being
++		 * evicted
++		 */
++		struct list_head list;
++
++		/**
++		 * @local_list: pointer to the local list temporarily storing
++		 * entries from the evicted object list
++		 */
++		struct list_head *local_list;
++
++		/**
++		 * @lock: spinlock to protect the evict list
++		 */
++		spinlock_t lock;
++	} evict;
+ };
+ 
+ void drm_gpuvm_init(struct drm_gpuvm *gpuvm, const char *name,
+@@ -359,6 +404,22 @@ drm_gpuvm_resv_protected(struct drm_gpuv
+ #define drm_gpuvm_resv_assert_held(gpuvm__) \
+ 	dma_resv_assert_held(drm_gpuvm_resv(gpuvm__))
+ 
++/**
++ * drm_gpuvm_is_extobj() - indicates whether the given &drm_gem_object is an
++ * external object
++ * @gpuvm: the &drm_gpuvm to check
++ * @obj: the &drm_gem_object to check
++ *
++ * Returns: true if the &drm_gem_object &dma_resv differs from the
++ * &drm_gpuvms &dma_resv, false otherwise
++ */
++static inline bool
++drm_gpuvm_is_extobj(struct drm_gpuvm *gpuvm,
++		    struct drm_gem_object *obj)
++{
++	return obj && obj->resv != drm_gpuvm_resv(gpuvm);
++}
++
+ static inline struct drm_gpuva *
+ __drm_gpuva_next(struct drm_gpuva *va)
+ {
+@@ -438,6 +499,144 @@ __drm_gpuva_next(struct drm_gpuva *va)
+ 	list_for_each_entry_safe(va__, next__, &(gpuvm__)->rb.list, rb.entry)
+ 
+ /**
++ * struct drm_gpuvm_exec - &drm_gpuvm abstraction of &drm_exec
++ *
++ * This structure should be created on the stack as &drm_exec should be.
++ *
++ * Optionally, @extra can be set in order to lock additional &drm_gem_objects.
++ */
++struct drm_gpuvm_exec {
++	/**
++	 * @exec: the &drm_exec structure
++	 */
++	struct drm_exec exec;
++
++	/**
++	 * @flags: the flags for the struct drm_exec
++	 */
++	uint32_t flags;
++
++	/**
++	 * @vm: the &drm_gpuvm to lock its DMA reservations
++	 */
++	struct drm_gpuvm *vm;
++
++	/**
++	 * @num_fences: the number of fences to reserve for the &dma_resv of the
++	 * locked &drm_gem_objects
++	 */
++	unsigned int num_fences;
++
++	/**
++	 * @extra: Callback and corresponding private data for the driver to
++	 * lock arbitrary additional &drm_gem_objects.
++	 */
++	struct {
++		/**
++		 * @fn: The driver callback to lock additional &drm_gem_objects.
++		 */
++		int (*fn)(struct drm_gpuvm_exec *vm_exec);
++
++		/**
++		 * @priv: driver private data for the @fn callback
++		 */
++		void *priv;
++	} extra;
++};
++
++/**
++ * drm_gpuvm_prepare_vm() - prepare the GPUVMs common dma-resv
++ * @gpuvm: the &drm_gpuvm
++ * @exec: the &drm_exec context
++ * @num_fences: the amount of &dma_fences to reserve
++ *
++ * Calls drm_exec_prepare_obj() for the GPUVMs dummy &drm_gem_object.
++ *
++ * Using this function directly, it is the drivers responsibility to call
++ * drm_exec_init() and drm_exec_fini() accordingly.
++ *
++ * Returns: 0 on success, negative error code on failure.
++ */
++static inline int
++drm_gpuvm_prepare_vm(struct drm_gpuvm *gpuvm,
++		     struct drm_exec *exec,
++		     unsigned int num_fences)
++{
++	return drm_exec_prepare_obj(exec, gpuvm->r_obj, num_fences);
++}
++
++int drm_gpuvm_prepare_objects(struct drm_gpuvm *gpuvm,
++			      struct drm_exec *exec,
++			      unsigned int num_fences);
++
++int drm_gpuvm_prepare_range(struct drm_gpuvm *gpuvm,
++			    struct drm_exec *exec,
++			    u64 addr, u64 range,
++			    unsigned int num_fences);
++
++int drm_gpuvm_exec_lock(struct drm_gpuvm_exec *vm_exec);
++
++int drm_gpuvm_exec_lock_array(struct drm_gpuvm_exec *vm_exec,
++			      struct drm_gem_object **objs,
++			      unsigned int num_objs);
++
++int drm_gpuvm_exec_lock_range(struct drm_gpuvm_exec *vm_exec,
++			      u64 addr, u64 range);
++
++/**
++ * drm_gpuvm_exec_unlock() - lock all dma-resv of all assoiciated BOs
++ * @vm_exec: the &drm_gpuvm_exec wrapper
++ *
++ * Releases all dma-resv locks of all &drm_gem_objects previously acquired
++ * through drm_gpuvm_exec_lock() or its variants.
++ *
++ * Returns: 0 on success, negative error code on failure.
++ */
++static inline void
++drm_gpuvm_exec_unlock(struct drm_gpuvm_exec *vm_exec)
++{
++	drm_exec_fini(&vm_exec->exec);
++}
++
++int drm_gpuvm_validate(struct drm_gpuvm *gpuvm, struct drm_exec *exec);
++void drm_gpuvm_resv_add_fence(struct drm_gpuvm *gpuvm,
++			      struct drm_exec *exec,
++			      struct dma_fence *fence,
++			      enum dma_resv_usage private_usage,
++			      enum dma_resv_usage extobj_usage);
++
++/**
++ * drm_gpuvm_exec_resv_add_fence()
++ * @vm_exec: the &drm_gpuvm_exec wrapper
++ * @fence: fence to add
++ * @private_usage: private dma-resv usage
++ * @extobj_usage: extobj dma-resv usage
++ *
++ * See drm_gpuvm_resv_add_fence().
++ */
++static inline void
++drm_gpuvm_exec_resv_add_fence(struct drm_gpuvm_exec *vm_exec,
++			      struct dma_fence *fence,
++			      enum dma_resv_usage private_usage,
++			      enum dma_resv_usage extobj_usage)
++{
++	drm_gpuvm_resv_add_fence(vm_exec->vm, &vm_exec->exec, fence,
++				 private_usage, extobj_usage);
++}
++
++/**
++ * drm_gpuvm_exec_validate()
++ * @vm_exec: the &drm_gpuvm_exec wrapper
++ *
++ * See drm_gpuvm_validate().
++ */
++static inline int
++drm_gpuvm_exec_validate(struct drm_gpuvm_exec *vm_exec)
++{
++	return drm_gpuvm_validate(vm_exec->vm, &vm_exec->exec);
++}
++
++/**
+  * struct drm_gpuvm_bo - structure representing a &drm_gpuvm and
+  * &drm_gem_object combination
+  *
+@@ -467,6 +666,12 @@ struct drm_gpuvm_bo {
+ 	struct drm_gem_object *obj;
+ 
+ 	/**
++	 * @evicted: Indicates whether the &drm_gem_object is evicted; field
++	 * protected by the &drm_gem_object's dma-resv lock.
++	 */
++	bool evicted;
++
++	/**
+ 	 * @kref: The reference count for this &drm_gpuvm_bo.
+ 	 */
+ 	struct kref kref;
+@@ -493,6 +698,18 @@ struct drm_gpuvm_bo {
+ 			 * gpuva list.
+ 			 */
+ 			struct list_head gem;
++
++			/**
++			 * @evict: List entry to attach to the &drm_gpuvms
++			 * extobj list.
++			 */
++			struct list_head extobj;
++
++			/**
++			 * @evict: List entry to attach to the &drm_gpuvms evict
++			 * list.
++			 */
++			struct list_head evict;
+ 		} entry;
+ 	} list;
+ };
+@@ -527,6 +744,27 @@ struct drm_gpuvm_bo *
+ drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm,
+ 		  struct drm_gem_object *obj);
+ 
++void drm_gpuvm_bo_evict(struct drm_gpuvm_bo *vm_bo, bool evict);
++
++/**
++ * drm_gpuvm_bo_gem_evict()
++ * @obj: the &drm_gem_object
++ * @evict: indicates whether @obj is evicted
++ *
++ * See drm_gpuvm_bo_evict().
++ */
++static inline void
++drm_gpuvm_bo_gem_evict(struct drm_gem_object *obj, bool evict)
++{
++	struct drm_gpuvm_bo *vm_bo;
++
++	drm_gem_gpuva_assert_lock_held(obj);
++	drm_gem_for_each_gpuvm_bo(vm_bo, obj)
++		drm_gpuvm_bo_evict(vm_bo, evict);
++}
++
++void drm_gpuvm_bo_extobj_add(struct drm_gpuvm_bo *vm_bo);
++
+ /**
+  * drm_gpuvm_bo_for_each_va() - iterator to walk over a list of &drm_gpuva
+  * @va__: &drm_gpuva structure to assign to in each iteration step
+@@ -902,6 +1140,18 @@ struct drm_gpuvm_ops {
+ 	void (*vm_bo_free)(struct drm_gpuvm_bo *vm_bo);
+ 
+ 	/**
++	 * @vm_bo_validate: called from drm_gpuvm_validate()
++	 *
++	 * Drivers receive this callback for every evicted &drm_gem_object being
++	 * mapped in the corresponding &drm_gpuvm.
++	 *
++	 * Typically, drivers would call their driver specific variant of
++	 * ttm_bo_validate() from within this callback.
++	 */
++	int (*vm_bo_validate)(struct drm_gpuvm_bo *vm_bo,
++			      struct drm_exec *exec);
++
++	/**
+ 	 * @sm_step_map: called from &drm_gpuvm_sm_map to finally insert the
+ 	 * mapping once all previous steps were completed
+ 	 *
diff --git a/patches-6.6/034-15-v6.8-drm-nouveau-use-GPUVM-common-infrastructure.patch b/patches-6.6/034-15-v6.8-drm-nouveau-use-GPUVM-common-infrastructure.patch
new file mode 100644
index 0000000..f90fc12
--- /dev/null
+++ b/patches-6.6/034-15-v6.8-drm-nouveau-use-GPUVM-common-infrastructure.patch
@@ -0,0 +1,448 @@
+From 014f831abcb82738e57c0b00db66dfef0798ed67 Mon Sep 17 00:00:00 2001
+From: Danilo Krummrich <dakr@redhat.com>
+Date: Mon, 13 Nov 2023 23:12:00 +0100
+Subject: [PATCH] drm/nouveau: use GPUVM common infrastructure
+
+GPUVM provides common infrastructure to track external and evicted GEM
+objects as well as locking and validation helpers.
+
+Especially external and evicted object tracking is a huge improvement
+compared to the current brute force approach of iterating all mappings
+in order to lock and validate the GPUVM's GEM objects. Hence, make us of
+it.
+
+Signed-off-by: Danilo Krummrich <dakr@redhat.com>
+Reviewed-by: Dave Airlie <airlied@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231113221202.7203-1-dakr@redhat.com
+---
+ drivers/gpu/drm/nouveau/nouveau_bo.c    |   4 +-
+ drivers/gpu/drm/nouveau/nouveau_exec.c  |  57 +++-------
+ drivers/gpu/drm/nouveau/nouveau_exec.h  |   4 -
+ drivers/gpu/drm/nouveau/nouveau_sched.c |   9 +-
+ drivers/gpu/drm/nouveau/nouveau_sched.h |   7 +-
+ drivers/gpu/drm/nouveau/nouveau_uvmm.c  | 134 +++++++++++++-----------
+ 6 files changed, 100 insertions(+), 115 deletions(-)
+
+--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
++++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
+@@ -1056,17 +1056,18 @@ nouveau_bo_move(struct ttm_buffer_object
+ {
+ 	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
+ 	struct nouveau_bo *nvbo = nouveau_bo(bo);
++	struct drm_gem_object *obj = &bo->base;
+ 	struct ttm_resource *old_reg = bo->resource;
+ 	struct nouveau_drm_tile *new_tile = NULL;
+ 	int ret = 0;
+ 
+-
+ 	if (new_reg->mem_type == TTM_PL_TT) {
+ 		ret = nouveau_ttm_tt_bind(bo->bdev, bo->ttm, new_reg);
+ 		if (ret)
+ 			return ret;
+ 	}
+ 
++	drm_gpuvm_bo_gem_evict(obj, evict);
+ 	nouveau_bo_move_ntfy(bo, new_reg);
+ 	ret = ttm_bo_wait_ctx(bo, ctx);
+ 	if (ret)
+@@ -1131,6 +1132,7 @@ out:
+ out_ntfy:
+ 	if (ret) {
+ 		nouveau_bo_move_ntfy(bo, bo->resource);
++		drm_gpuvm_bo_gem_evict(obj, !evict);
+ 	}
+ 	return ret;
+ }
+--- a/drivers/gpu/drm/nouveau/nouveau_exec.c
++++ b/drivers/gpu/drm/nouveau/nouveau_exec.c
+@@ -1,7 +1,5 @@
+ // SPDX-License-Identifier: MIT
+ 
+-#include <drm/drm_exec.h>
+-
+ #include "nouveau_drv.h"
+ #include "nouveau_gem.h"
+ #include "nouveau_mem.h"
+@@ -86,14 +84,12 @@
+  */
+ 
+ static int
+-nouveau_exec_job_submit(struct nouveau_job *job)
++nouveau_exec_job_submit(struct nouveau_job *job,
++			struct drm_gpuvm_exec *vme)
+ {
+ 	struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
+ 	struct nouveau_cli *cli = job->cli;
+ 	struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli);
+-	struct drm_exec *exec = &job->exec;
+-	struct drm_gem_object *obj;
+-	unsigned long index;
+ 	int ret;
+ 
+ 	/* Create a new fence, but do not emit yet. */
+@@ -102,52 +98,29 @@ nouveau_exec_job_submit(struct nouveau_j
+ 		return ret;
+ 
+ 	nouveau_uvmm_lock(uvmm);
+-	drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
+-			    DRM_EXEC_IGNORE_DUPLICATES);
+-	drm_exec_until_all_locked(exec) {
+-		struct drm_gpuva *va;
+-
+-		drm_gpuvm_for_each_va(va, &uvmm->base) {
+-			if (unlikely(va == &uvmm->base.kernel_alloc_node))
+-				continue;
+-
+-			ret = drm_exec_prepare_obj(exec, va->gem.obj, 1);
+-			drm_exec_retry_on_contention(exec);
+-			if (ret)
+-				goto err_uvmm_unlock;
+-		}
++	ret = drm_gpuvm_exec_lock(vme);
++	if (ret) {
++		nouveau_uvmm_unlock(uvmm);
++		return ret;
+ 	}
+ 	nouveau_uvmm_unlock(uvmm);
+ 
+-	drm_exec_for_each_locked_object(exec, index, obj) {
+-		struct nouveau_bo *nvbo = nouveau_gem_object(obj);
+-
+-		ret = nouveau_bo_validate(nvbo, true, false);
+-		if (ret)
+-			goto err_exec_fini;
++	ret = drm_gpuvm_exec_validate(vme);
++	if (ret) {
++		drm_gpuvm_exec_unlock(vme);
++		return ret;
+ 	}
+ 
+ 	return 0;
+-
+-err_uvmm_unlock:
+-	nouveau_uvmm_unlock(uvmm);
+-err_exec_fini:
+-	drm_exec_fini(exec);
+-	return ret;
+-
+ }
+ 
+ static void
+-nouveau_exec_job_armed_submit(struct nouveau_job *job)
++nouveau_exec_job_armed_submit(struct nouveau_job *job,
++			      struct drm_gpuvm_exec *vme)
+ {
+-	struct drm_exec *exec = &job->exec;
+-	struct drm_gem_object *obj;
+-	unsigned long index;
+-
+-	drm_exec_for_each_locked_object(exec, index, obj)
+-		dma_resv_add_fence(obj->resv, job->done_fence, job->resv_usage);
+-
+-	drm_exec_fini(exec);
++	drm_gpuvm_exec_resv_add_fence(vme, job->done_fence,
++				      job->resv_usage, job->resv_usage);
++	drm_gpuvm_exec_unlock(vme);
+ }
+ 
+ static struct dma_fence *
+--- a/drivers/gpu/drm/nouveau/nouveau_exec.h
++++ b/drivers/gpu/drm/nouveau/nouveau_exec.h
+@@ -3,16 +3,12 @@
+ #ifndef __NOUVEAU_EXEC_H__
+ #define __NOUVEAU_EXEC_H__
+ 
+-#include <drm/drm_exec.h>
+-
+ #include "nouveau_drv.h"
+ #include "nouveau_sched.h"
+ 
+ struct nouveau_exec_job_args {
+ 	struct drm_file *file_priv;
+ 	struct nouveau_sched_entity *sched_entity;
+-
+-	struct drm_exec exec;
+ 	struct nouveau_channel *chan;
+ 
+ 	struct {
+--- a/drivers/gpu/drm/nouveau/nouveau_sched.c
++++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
+@@ -263,6 +263,11 @@ nouveau_job_submit(struct nouveau_job *j
+ {
+ 	struct nouveau_sched_entity *entity = to_nouveau_sched_entity(job->base.entity);
+ 	struct dma_fence *done_fence = NULL;
++	struct drm_gpuvm_exec vm_exec = {
++		.vm = &nouveau_cli_uvmm(job->cli)->base,
++		.flags = DRM_EXEC_IGNORE_DUPLICATES,
++		.num_fences = 1,
++	};
+ 	int ret;
+ 
+ 	ret = nouveau_job_add_deps(job);
+@@ -282,7 +287,7 @@ nouveau_job_submit(struct nouveau_job *j
+ 	 * successfully.
+ 	 */
+ 	if (job->ops->submit) {
+-		ret = job->ops->submit(job);
++		ret = job->ops->submit(job, &vm_exec);
+ 		if (ret)
+ 			goto err_cleanup;
+ 	}
+@@ -315,7 +320,7 @@ nouveau_job_submit(struct nouveau_job *j
+ 	set_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &job->done_fence->flags);
+ 
+ 	if (job->ops->armed_submit)
+-		job->ops->armed_submit(job);
++		job->ops->armed_submit(job, &vm_exec);
+ 
+ 	nouveau_job_fence_attach(job);
+ 
+--- a/drivers/gpu/drm/nouveau/nouveau_sched.h
++++ b/drivers/gpu/drm/nouveau/nouveau_sched.h
+@@ -5,7 +5,7 @@
+ 
+ #include <linux/types.h>
+ 
+-#include <drm/drm_exec.h>
++#include <drm/drm_gpuvm.h>
+ #include <drm/gpu_scheduler.h>
+ 
+ #include "nouveau_drv.h"
+@@ -54,7 +54,6 @@ struct nouveau_job {
+ 	struct drm_file *file_priv;
+ 	struct nouveau_cli *cli;
+ 
+-	struct drm_exec exec;
+ 	enum dma_resv_usage resv_usage;
+ 	struct dma_fence *done_fence;
+ 
+@@ -76,8 +75,8 @@ struct nouveau_job {
+ 		/* If .submit() returns without any error, it is guaranteed that
+ 		 * armed_submit() is called.
+ 		 */
+-		int (*submit)(struct nouveau_job *);
+-		void (*armed_submit)(struct nouveau_job *);
++		int (*submit)(struct nouveau_job *, struct drm_gpuvm_exec *);
++		void (*armed_submit)(struct nouveau_job *, struct drm_gpuvm_exec *);
+ 		struct dma_fence *(*run)(struct nouveau_job *);
+ 		void (*free)(struct nouveau_job *);
+ 		enum drm_gpu_sched_stat (*timeout)(struct nouveau_job *);
+--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
++++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+@@ -438,8 +438,9 @@ nouveau_uvma_region_complete(struct nouv
+ static void
+ op_map_prepare_unwind(struct nouveau_uvma *uvma)
+ {
++	struct drm_gpuva *va = &uvma->va;
+ 	nouveau_uvma_gem_put(uvma);
+-	drm_gpuva_remove(&uvma->va);
++	drm_gpuva_remove(va);
+ 	nouveau_uvma_free(uvma);
+ }
+ 
+@@ -468,6 +469,7 @@ nouveau_uvmm_sm_prepare_unwind(struct no
+ 			break;
+ 		case DRM_GPUVA_OP_REMAP: {
+ 			struct drm_gpuva_op_remap *r = &op->remap;
++			struct drm_gpuva *va = r->unmap->va;
+ 
+ 			if (r->next)
+ 				op_map_prepare_unwind(new->next);
+@@ -475,7 +477,7 @@ nouveau_uvmm_sm_prepare_unwind(struct no
+ 			if (r->prev)
+ 				op_map_prepare_unwind(new->prev);
+ 
+-			op_unmap_prepare_unwind(r->unmap->va);
++			op_unmap_prepare_unwind(va);
+ 			break;
+ 		}
+ 		case DRM_GPUVA_OP_UNMAP:
+@@ -634,6 +636,7 @@ nouveau_uvmm_sm_prepare(struct nouveau_u
+ 					goto unwind;
+ 				}
+ 			}
++
+ 			break;
+ 		}
+ 		case DRM_GPUVA_OP_REMAP: {
+@@ -1135,12 +1138,53 @@ bind_link_gpuvas(struct bind_job_op *bop
+ }
+ 
+ static int
+-nouveau_uvmm_bind_job_submit(struct nouveau_job *job)
++bind_lock_validate(struct nouveau_job *job, struct drm_exec *exec,
++		   unsigned int num_fences)
++{
++	struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job);
++	struct bind_job_op *op;
++	int ret;
++
++	list_for_each_op(op, &bind_job->ops) {
++		struct drm_gpuva_op *va_op;
++
++		if (!op->ops)
++			continue;
++
++		drm_gpuva_for_each_op(va_op, op->ops) {
++			struct drm_gem_object *obj = op_gem_obj(va_op);
++
++			if (unlikely(!obj))
++				continue;
++
++			ret = drm_exec_prepare_obj(exec, obj, num_fences);
++			if (ret)
++				return ret;
++
++			/* Don't validate GEMs backing mappings we're about to
++			 * unmap, it's not worth the effort.
++			 */
++			if (va_op->op == DRM_GPUVA_OP_UNMAP)
++				continue;
++
++			ret = nouveau_bo_validate(nouveau_gem_object(obj),
++						  true, false);
++			if (ret)
++				return ret;
++		}
++	}
++
++	return 0;
++}
++
++static int
++nouveau_uvmm_bind_job_submit(struct nouveau_job *job,
++			     struct drm_gpuvm_exec *vme)
+ {
+ 	struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(job->cli);
+ 	struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job);
+ 	struct nouveau_sched_entity *entity = job->entity;
+-	struct drm_exec *exec = &job->exec;
++	struct drm_exec *exec = &vme->exec;
+ 	struct bind_job_op *op;
+ 	int ret;
+ 
+@@ -1157,6 +1201,8 @@ nouveau_uvmm_bind_job_submit(struct nouv
+ 			dma_resv_unlock(obj->resv);
+ 			if (IS_ERR(op->vm_bo))
+ 				return PTR_ERR(op->vm_bo);
++
++			drm_gpuvm_bo_extobj_add(op->vm_bo);
+ 		}
+ 
+ 		ret = bind_validate_op(job, op);
+@@ -1179,6 +1225,7 @@ nouveau_uvmm_bind_job_submit(struct nouv
+ 	 * unwind all GPU VA space changes on failure.
+ 	 */
+ 	nouveau_uvmm_lock(uvmm);
++
+ 	list_for_each_op(op, &bind_job->ops) {
+ 		switch (op->op) {
+ 		case OP_MAP_SPARSE:
+@@ -1290,55 +1337,13 @@ nouveau_uvmm_bind_job_submit(struct nouv
+ 		}
+ 	}
+ 
+-	drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
+-			    DRM_EXEC_IGNORE_DUPLICATES);
++	drm_exec_init(exec, vme->flags);
+ 	drm_exec_until_all_locked(exec) {
+-		list_for_each_op(op, &bind_job->ops) {
+-			struct drm_gpuva_op *va_op;
+-
+-			if (IS_ERR_OR_NULL(op->ops))
+-				continue;
+-
+-			drm_gpuva_for_each_op(va_op, op->ops) {
+-				struct drm_gem_object *obj = op_gem_obj(va_op);
+-
+-				if (unlikely(!obj))
+-					continue;
+-
+-				ret = drm_exec_prepare_obj(exec, obj, 1);
+-				drm_exec_retry_on_contention(exec);
+-				if (ret) {
+-					op = list_last_op(&bind_job->ops);
+-					goto unwind;
+-				}
+-			}
+-		}
+-	}
+-
+-	list_for_each_op(op, &bind_job->ops) {
+-		struct drm_gpuva_op *va_op;
+-
+-		if (IS_ERR_OR_NULL(op->ops))
+-			continue;
+-
+-		drm_gpuva_for_each_op(va_op, op->ops) {
+-			struct drm_gem_object *obj = op_gem_obj(va_op);
+-
+-			if (unlikely(!obj))
+-				continue;
+-
+-			/* Don't validate GEMs backing mappings we're about to
+-			 * unmap, it's not worth the effort.
+-			 */
+-			if (unlikely(va_op->op == DRM_GPUVA_OP_UNMAP))
+-				continue;
+-
+-			ret = nouveau_bo_validate(nouveau_gem_object(obj),
+-						  true, false);
+-			if (ret) {
+-				op = list_last_op(&bind_job->ops);
+-				goto unwind;
+-			}
++		ret = bind_lock_validate(job, exec, vme->num_fences);
++		drm_exec_retry_on_contention(exec);
++		if (ret) {
++			op = list_last_op(&bind_job->ops);
++			goto unwind;
+ 		}
+ 	}
+ 
+@@ -1413,21 +1418,17 @@ unwind:
+ 	}
+ 
+ 	nouveau_uvmm_unlock(uvmm);
+-	drm_exec_fini(exec);
++	drm_gpuvm_exec_unlock(vme);
+ 	return ret;
+ }
+ 
+ static void
+-nouveau_uvmm_bind_job_armed_submit(struct nouveau_job *job)
++nouveau_uvmm_bind_job_armed_submit(struct nouveau_job *job,
++				   struct drm_gpuvm_exec *vme)
+ {
+-	struct drm_exec *exec = &job->exec;
+-	struct drm_gem_object *obj;
+-	unsigned long index;
+-
+-	drm_exec_for_each_locked_object(exec, index, obj)
+-		dma_resv_add_fence(obj->resv, job->done_fence, job->resv_usage);
+-
+-	drm_exec_fini(exec);
++	drm_gpuvm_exec_resv_add_fence(vme, job->done_fence,
++				      job->resv_usage, job->resv_usage);
++	drm_gpuvm_exec_unlock(vme);
+ }
+ 
+ static struct dma_fence *
+@@ -1815,8 +1816,17 @@ nouveau_uvmm_free(struct drm_gpuvm *gpuv
+ 	kfree(uvmm);
+ }
+ 
++static int
++nouveau_uvmm_bo_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
++{
++	struct nouveau_bo *nvbo = nouveau_gem_object(vm_bo->obj);
++
++	return nouveau_bo_validate(nvbo, true, false);
++}
++
+ static const struct drm_gpuvm_ops gpuvm_ops = {
+ 	.vm_free = nouveau_uvmm_free,
++	.vm_bo_validate = nouveau_uvmm_bo_validate,
+ };
+ 
+ int
diff --git a/patches-6.6/034-16-v6.8-drm-gpuvm-Helper-to-get-range-of-unmap-from-a-remap-op..patch b/patches-6.6/034-16-v6.8-drm-gpuvm-Helper-to-get-range-of-unmap-from-a-remap-op..patch
new file mode 100644
index 0000000..10cbd78
--- /dev/null
+++ b/patches-6.6/034-16-v6.8-drm-gpuvm-Helper-to-get-range-of-unmap-from-a-remap-op..patch
@@ -0,0 +1,60 @@
+From a191f73d85484f804284674c14f2d9f572c18adb Mon Sep 17 00:00:00 2001
+From: Donald Robson <donald.robson@imgtec.com>
+Date: Wed, 22 Nov 2023 16:34:23 +0000
+Subject: [PATCH] drm/gpuvm: Helper to get range of unmap from a remap op.
+
+Determining the start and range of the unmap stage of a remap op is a
+common piece of code currently implemented by multiple drivers. Add a
+helper for this.
+
+Changes since v7:
+- Renamed helper to drm_gpuva_op_remap_to_unmap_range()
+- Improved documentation
+
+Changes since v6:
+- Remove use of __always_inline
+
+Signed-off-by: Donald Robson <donald.robson@imgtec.com>
+Signed-off-by: Sarah Walker <sarah.walker@imgtec.com>
+Reviewed-by: Danilo Krummrich <dakr@redhat.com>
+Link: https://lore.kernel.org/r/8a0a5b5eeec459d3c60fcdaa5a638ad14a18a59e.1700668843.git.donald.robson@imgtec.com
+Signed-off-by: Maxime Ripard <mripard@kernel.org>
+---
+ include/drm/drm_gpuvm.h | 28 ++++++++++++++++++++++++++++
+ 1 file changed, 28 insertions(+)
+
+--- a/include/drm/drm_gpuvm.h
++++ b/include/drm/drm_gpuvm.h
+@@ -1213,4 +1213,32 @@ void drm_gpuva_remap(struct drm_gpuva *p
+ 
+ void drm_gpuva_unmap(struct drm_gpuva_op_unmap *op);
+ 
++/**
++ * drm_gpuva_op_remap_to_unmap_range() - Helper to get the start and range of
++ * the unmap stage of a remap op.
++ * @op: Remap op.
++ * @start_addr: Output pointer for the start of the required unmap.
++ * @range: Output pointer for the length of the required unmap.
++ *
++ * The given start address and range will be set such that they represent the
++ * range of the address space that was previously covered by the mapping being
++ * re-mapped, but is now empty.
++ */
++static inline void
++drm_gpuva_op_remap_to_unmap_range(const struct drm_gpuva_op_remap *op,
++				  u64 *start_addr, u64 *range)
++{
++	const u64 va_start = op->prev ?
++			     op->prev->va.addr + op->prev->va.range :
++			     op->unmap->va->va.addr;
++	const u64 va_end = op->next ?
++			   op->next->va.addr :
++			   op->unmap->va->va.addr + op->unmap->va->va.range;
++
++	if (start_addr)
++		*start_addr = va_start;
++	if (range)
++		*range = va_end - va_start;
++}
++
+ #endif /* __DRM_GPUVM_H__ */
diff --git a/patches-6.6/034-17-v6.8-drm-gpuvm-Fix-deprecated-license-identifier.patch b/patches-6.6/034-17-v6.8-drm-gpuvm-Fix-deprecated-license-identifier.patch
new file mode 100644
index 0000000..f7686f8
--- /dev/null
+++ b/patches-6.6/034-17-v6.8-drm-gpuvm-Fix-deprecated-license-identifier.patch
@@ -0,0 +1,41 @@
+From b9c02e1052650af56d4487efa5fade3fb70e3653 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
+Date: Mon, 6 Nov 2023 12:48:27 +0100
+Subject: [PATCH] drm/gpuvm: Fix deprecated license identifier
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+"GPL-2.0-only" in the license header was incorrectly changed to the
+now deprecated "GPL-2.0". Fix.
+
+Cc: Maxime Ripard <mripard@kernel.org>
+Cc: Danilo Krummrich <dakr@redhat.com>
+Reported-by: David Edelsohn <dje.gcc@gmail.com>
+Closes: https://lore.kernel.org/dri-devel/5lfrhdpkwhpgzipgngojs3tyqfqbesifzu5nf4l5q3nhfdhcf2@25nmiq7tfrew/T/#m5c356d68815711eea30dd94cc6f7ea8cd4344fe3
+Fixes: f7749a549b4f ("drm/gpuvm: Dual-licence the drm_gpuvm code GPL-2.0 OR MIT")
+Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
+Acked-by: Maxime Ripard <mripard@kernel.org>
+Acked-by: Danilo Krummrich <dakr@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231106114827.62492-1-thomas.hellstrom@linux.intel.com
+---
+ drivers/gpu/drm/drm_gpuvm.c | 2 +-
+ include/drm/drm_gpuvm.h     | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/drm_gpuvm.c
++++ b/drivers/gpu/drm/drm_gpuvm.c
+@@ -1,4 +1,4 @@
+-// SPDX-License-Identifier: GPL-2.0 OR MIT
++// SPDX-License-Identifier: GPL-2.0-only OR MIT
+ /*
+  * Copyright (c) 2022 Red Hat.
+  *
+--- a/include/drm/drm_gpuvm.h
++++ b/include/drm/drm_gpuvm.h
+@@ -1,4 +1,4 @@
+-/* SPDX-License-Identifier: GPL-2.0 OR MIT */
++/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+ 
+ #ifndef __DRM_GPUVM_H__
+ #define __DRM_GPUVM_H__
diff --git a/patches-6.6/034-18-v6.8-drm-gpuvm-fall-back-to-drm_exec_lock_obj.patch b/patches-6.6/034-18-v6.8-drm-gpuvm-fall-back-to-drm_exec_lock_obj.patch
new file mode 100644
index 0000000..c38b2aa
--- /dev/null
+++ b/patches-6.6/034-18-v6.8-drm-gpuvm-fall-back-to-drm_exec_lock_obj.patch
@@ -0,0 +1,142 @@
+From e759f2ca29d918d3db57a61cdf838025beb03465 Mon Sep 17 00:00:00 2001
+From: Danilo Krummrich <dakr@redhat.com>
+Date: Wed, 29 Nov 2023 23:08:00 +0100
+Subject: [PATCH] drm/gpuvm: fall back to drm_exec_lock_obj()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Fall back to drm_exec_lock_obj() if num_fences is zero for the
+drm_gpuvm_prepare_* function family.
+
+Otherwise dma_resv_reserve_fences() would actually allocate slots even
+though num_fences is zero.
+
+Cc: Christian König <christian.koenig@amd.com>
+Acked-by: Donald Robson <donald.robson@imgtec.com>
+Signed-off-by: Danilo Krummrich <dakr@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231129220835.297885-2-dakr@redhat.com
+---
+ drivers/gpu/drm/drm_gpuvm.c | 43 ++++++++++++++++++++++++++++++++-----
+ include/drm/drm_gpuvm.h     | 23 +++-----------------
+ 2 files changed, 41 insertions(+), 25 deletions(-)
+
+--- a/drivers/gpu/drm/drm_gpuvm.c
++++ b/drivers/gpu/drm/drm_gpuvm.c
+@@ -1080,6 +1080,37 @@ drm_gpuvm_put(struct drm_gpuvm *gpuvm)
+ EXPORT_SYMBOL_GPL(drm_gpuvm_put);
+ 
+ static int
++exec_prepare_obj(struct drm_exec *exec, struct drm_gem_object *obj,
++		 unsigned int num_fences)
++{
++	return num_fences ? drm_exec_prepare_obj(exec, obj, num_fences) :
++			    drm_exec_lock_obj(exec, obj);
++}
++
++/**
++ * drm_gpuvm_prepare_vm() - prepare the GPUVMs common dma-resv
++ * @gpuvm: the &drm_gpuvm
++ * @exec: the &drm_exec context
++ * @num_fences: the amount of &dma_fences to reserve
++ *
++ * Calls drm_exec_prepare_obj() for the GPUVMs dummy &drm_gem_object; if
++ * @num_fences is zero drm_exec_lock_obj() is called instead.
++ *
++ * Using this function directly, it is the drivers responsibility to call
++ * drm_exec_init() and drm_exec_fini() accordingly.
++ *
++ * Returns: 0 on success, negative error code on failure.
++ */
++int
++drm_gpuvm_prepare_vm(struct drm_gpuvm *gpuvm,
++		     struct drm_exec *exec,
++		     unsigned int num_fences)
++{
++	return exec_prepare_obj(exec, gpuvm->r_obj, num_fences);
++}
++EXPORT_SYMBOL_GPL(drm_gpuvm_prepare_vm);
++
++static int
+ __drm_gpuvm_prepare_objects(struct drm_gpuvm *gpuvm,
+ 			    struct drm_exec *exec,
+ 			    unsigned int num_fences)
+@@ -1089,7 +1120,7 @@ __drm_gpuvm_prepare_objects(struct drm_g
+ 	int ret = 0;
+ 
+ 	for_each_vm_bo_in_list(gpuvm, extobj, &extobjs, vm_bo) {
+-		ret = drm_exec_prepare_obj(exec, vm_bo->obj, num_fences);
++		ret = exec_prepare_obj(exec, vm_bo->obj, num_fences);
+ 		if (ret)
+ 			break;
+ 	}
+@@ -1110,7 +1141,7 @@ drm_gpuvm_prepare_objects_locked(struct
+ 
+ 	drm_gpuvm_resv_assert_held(gpuvm);
+ 	list_for_each_entry(vm_bo, &gpuvm->extobj.list, list.entry.extobj) {
+-		ret = drm_exec_prepare_obj(exec, vm_bo->obj, num_fences);
++		ret = exec_prepare_obj(exec, vm_bo->obj, num_fences);
+ 		if (ret)
+ 			break;
+ 
+@@ -1128,7 +1159,8 @@ drm_gpuvm_prepare_objects_locked(struct
+  * @num_fences: the amount of &dma_fences to reserve
+  *
+  * Calls drm_exec_prepare_obj() for all &drm_gem_objects the given
+- * &drm_gpuvm contains mappings of.
++ * &drm_gpuvm contains mappings of; if @num_fences is zero drm_exec_lock_obj()
++ * is called instead.
+  *
+  * Using this function directly, it is the drivers responsibility to call
+  * drm_exec_init() and drm_exec_fini() accordingly.
+@@ -1165,7 +1197,8 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_prepare_obje
+  * @num_fences: the amount of &dma_fences to reserve
+  *
+  * Calls drm_exec_prepare_obj() for all &drm_gem_objects mapped between @addr
+- * and @addr + @range.
++ * and @addr + @range; if @num_fences is zero drm_exec_lock_obj() is called
++ * instead.
+  *
+  * Returns: 0 on success, negative error code on failure.
+  */
+@@ -1180,7 +1213,7 @@ drm_gpuvm_prepare_range(struct drm_gpuvm
+ 	drm_gpuvm_for_each_va_range(va, gpuvm, addr, end) {
+ 		struct drm_gem_object *obj = va->gem.obj;
+ 
+-		ret = drm_exec_prepare_obj(exec, obj, num_fences);
++		ret = exec_prepare_obj(exec, obj, num_fences);
+ 		if (ret)
+ 			return ret;
+ 	}
+--- a/include/drm/drm_gpuvm.h
++++ b/include/drm/drm_gpuvm.h
+@@ -544,26 +544,9 @@ struct drm_gpuvm_exec {
+ 	} extra;
+ };
+ 
+-/**
+- * drm_gpuvm_prepare_vm() - prepare the GPUVMs common dma-resv
+- * @gpuvm: the &drm_gpuvm
+- * @exec: the &drm_exec context
+- * @num_fences: the amount of &dma_fences to reserve
+- *
+- * Calls drm_exec_prepare_obj() for the GPUVMs dummy &drm_gem_object.
+- *
+- * Using this function directly, it is the drivers responsibility to call
+- * drm_exec_init() and drm_exec_fini() accordingly.
+- *
+- * Returns: 0 on success, negative error code on failure.
+- */
+-static inline int
+-drm_gpuvm_prepare_vm(struct drm_gpuvm *gpuvm,
+-		     struct drm_exec *exec,
+-		     unsigned int num_fences)
+-{
+-	return drm_exec_prepare_obj(exec, gpuvm->r_obj, num_fences);
+-}
++int drm_gpuvm_prepare_vm(struct drm_gpuvm *gpuvm,
++			 struct drm_exec *exec,
++			 unsigned int num_fences);
+ 
+ int drm_gpuvm_prepare_objects(struct drm_gpuvm *gpuvm,
+ 			      struct drm_exec *exec,
diff --git a/patches-6.6/034-19-v6.8-drm-gpuvm-Let-drm_gpuvm_bo_put-report-when-the-vm_bo.patch b/patches-6.6/034-19-v6.8-drm-gpuvm-Let-drm_gpuvm_bo_put-report-when-the-vm_bo.patch
new file mode 100644
index 0000000..b58d296
--- /dev/null
+++ b/patches-6.6/034-19-v6.8-drm-gpuvm-Let-drm_gpuvm_bo_put-report-when-the-vm_bo.patch
@@ -0,0 +1,59 @@
+From c50a291d621aa7abaa27b05f56d450a388b64948 Mon Sep 17 00:00:00 2001
+From: Boris Brezillon <boris.brezillon@collabora.com>
+Date: Mon, 4 Dec 2023 16:14:06 +0100
+Subject: [PATCH] drm/gpuvm: Let drm_gpuvm_bo_put() report when the vm_bo
+ object is destroyed
+
+Some users need to release resources attached to the vm_bo object when
+it's destroyed. In Panthor's case, we need to release the pin ref so
+BO pages can be returned to the system when all GPU mappings are gone.
+
+This could be done through a custom drm_gpuvm::vm_bo_free() hook, but
+this has all sort of locking implications that would force us to expose
+a drm_gem_shmem_unpin_locked() helper, not to mention the fact that
+having a ::vm_bo_free() implementation without a ::vm_bo_alloc() one
+seems odd. So let's keep things simple, and extend drm_gpuvm_bo_put()
+to report when the object is destroyed.
+
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Reviewed-by: Danilo Krummrich <dakr@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231204151406.1977285-1-boris.brezillon@collabora.com
+---
+ drivers/gpu/drm/drm_gpuvm.c | 8 ++++++--
+ include/drm/drm_gpuvm.h     | 2 +-
+ 2 files changed, 7 insertions(+), 3 deletions(-)
+
+--- a/drivers/gpu/drm/drm_gpuvm.c
++++ b/drivers/gpu/drm/drm_gpuvm.c
+@@ -1529,14 +1529,18 @@ drm_gpuvm_bo_destroy(struct kref *kref)
+  * hold the dma-resv or driver specific GEM gpuva lock.
+  *
+  * This function may only be called from non-atomic context.
++ *
++ * Returns: true if vm_bo was destroyed, false otherwise.
+  */
+-void
++bool
+ drm_gpuvm_bo_put(struct drm_gpuvm_bo *vm_bo)
+ {
+ 	might_sleep();
+ 
+ 	if (vm_bo)
+-		kref_put(&vm_bo->kref, drm_gpuvm_bo_destroy);
++		return !!kref_put(&vm_bo->kref, drm_gpuvm_bo_destroy);
++
++	return false;
+ }
+ EXPORT_SYMBOL_GPL(drm_gpuvm_bo_put);
+ 
+--- a/include/drm/drm_gpuvm.h
++++ b/include/drm/drm_gpuvm.h
+@@ -721,7 +721,7 @@ drm_gpuvm_bo_get(struct drm_gpuvm_bo *vm
+ 	return vm_bo;
+ }
+ 
+-void drm_gpuvm_bo_put(struct drm_gpuvm_bo *vm_bo);
++bool drm_gpuvm_bo_put(struct drm_gpuvm_bo *vm_bo);
+ 
+ struct drm_gpuvm_bo *
+ drm_gpuvm_bo_find(struct drm_gpuvm *gpuvm,
diff --git a/patches-6.6/034-20-v6.7-drm-sched-Convert-the-GPU-scheduler-to-variable-number-of.patch b/patches-6.6/034-20-v6.7-drm-sched-Convert-the-GPU-scheduler-to-variable-number-of.patch
new file mode 100644
index 0000000..5539118
--- /dev/null
+++ b/patches-6.6/034-20-v6.7-drm-sched-Convert-the-GPU-scheduler-to-variable-number-of.patch
@@ -0,0 +1,405 @@
+From 56e449603f0ac580700621a356d35d5716a62ce5 Mon Sep 17 00:00:00 2001
+From: Luben Tuikov <luben.tuikov@amd.com>
+Date: Sat, 14 Oct 2023 21:15:35 -0400
+Subject: [PATCH] drm/sched: Convert the GPU scheduler to variable number of
+ run-queues
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The GPU scheduler has now a variable number of run-queues, which are set up at
+drm_sched_init() time. This way, each driver announces how many run-queues it
+requires (supports) per each GPU scheduler it creates. Note, that run-queues
+correspond to scheduler "priorities", thus if the number of run-queues is set
+to 1 at drm_sched_init(), then that scheduler supports a single run-queue,
+i.e. single "priority". If a driver further sets a single entity per
+run-queue, then this creates a 1-to-1 correspondence between a scheduler and
+a scheduled entity.
+
+Cc: Lucas Stach <l.stach@pengutronix.de>
+Cc: Russell King <linux+etnaviv@armlinux.org.uk>
+Cc: Qiang Yu <yuq825@gmail.com>
+Cc: Rob Clark <robdclark@gmail.com>
+Cc: Abhinav Kumar <quic_abhinavk@quicinc.com>
+Cc: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Cc: Danilo Krummrich <dakr@redhat.com>
+Cc: Matthew Brost <matthew.brost@intel.com>
+Cc: Boris Brezillon <boris.brezillon@collabora.com>
+Cc: Alex Deucher <alexander.deucher@amd.com>
+Cc: Christian König <christian.koenig@amd.com>
+Cc: Emma Anholt <emma@anholt.net>
+Cc: etnaviv@lists.freedesktop.org
+Cc: lima@lists.freedesktop.org
+Cc: linux-arm-msm@vger.kernel.org
+Cc: freedreno@lists.freedesktop.org
+Cc: nouveau@lists.freedesktop.org
+Cc: dri-devel@lists.freedesktop.org
+Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
+Acked-by: Christian König <christian.koenig@amd.com>
+Link: https://lore.kernel.org/r/20231023032251.164775-1-luben.tuikov@amd.com
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  1 +
+ drivers/gpu/drm/amd/amdgpu/amdgpu_job.c    |  4 +-
+ drivers/gpu/drm/etnaviv/etnaviv_sched.c    |  1 +
+ drivers/gpu/drm/lima/lima_sched.c          |  4 +-
+ drivers/gpu/drm/msm/msm_ringbuffer.c       |  5 +-
+ drivers/gpu/drm/nouveau/nouveau_sched.c    |  1 +
+ drivers/gpu/drm/panfrost/panfrost_job.c    |  1 +
+ drivers/gpu/drm/scheduler/sched_entity.c   | 18 +++++-
+ drivers/gpu/drm/scheduler/sched_main.c     | 74 ++++++++++++++++++----
+ drivers/gpu/drm/v3d/v3d_sched.c            |  5 ++
+ include/drm/gpu_scheduler.h                |  9 ++-
+ 11 files changed, 98 insertions(+), 25 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -2277,6 +2277,7 @@ static int amdgpu_device_init_schedulers
+ 		}
+ 
+ 		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
++				   DRM_SCHED_PRIORITY_COUNT,
+ 				   ring->num_hw_submission, 0,
+ 				   timeout, adev->reset_domain->wq,
+ 				   ring->sched_score, ring->name,
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+@@ -328,8 +328,8 @@ void amdgpu_job_stop_all_jobs_on_sched(s
+ 	int i;
+ 
+ 	/* Signal all jobs not yet scheduled */
+-	for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
+-		struct drm_sched_rq *rq = &sched->sched_rq[i];
++	for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
++		struct drm_sched_rq *rq = sched->sched_rq[i];
+ 		spin_lock(&rq->lock);
+ 		list_for_each_entry(s_entity, &rq->entities, list) {
+ 			while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) {
+--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
++++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+@@ -135,6 +135,7 @@ int etnaviv_sched_init(struct etnaviv_gp
+ 	int ret;
+ 
+ 	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops,
++			     DRM_SCHED_PRIORITY_COUNT,
+ 			     etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
+ 			     msecs_to_jiffies(500), NULL, NULL,
+ 			     dev_name(gpu->dev), gpu->dev);
+--- a/drivers/gpu/drm/lima/lima_sched.c
++++ b/drivers/gpu/drm/lima/lima_sched.c
+@@ -495,7 +495,9 @@ int lima_sched_pipe_init(struct lima_sch
+ 
+ 	INIT_WORK(&pipe->recover_work, lima_sched_recover_work);
+ 
+-	return drm_sched_init(&pipe->base, &lima_sched_ops, 1,
++	return drm_sched_init(&pipe->base, &lima_sched_ops,
++			      DRM_SCHED_PRIORITY_COUNT,
++			      1,
+ 			      lima_job_hang_limit,
+ 			      msecs_to_jiffies(timeout), NULL,
+ 			      NULL, name, pipe->ldev->dev);
+--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
++++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
+@@ -98,8 +98,9 @@ struct msm_ringbuffer *msm_ringbuffer_ne
+ 	sched_timeout = MAX_SCHEDULE_TIMEOUT;
+ 
+ 	ret = drm_sched_init(&ring->sched, &msm_sched_ops,
+-			num_hw_submissions, 0, sched_timeout,
+-			NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev);
++			     DRM_SCHED_PRIORITY_COUNT,
++			     num_hw_submissions, 0, sched_timeout,
++			     NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev);
+ 	if (ret) {
+ 		goto fail;
+ 	}
+--- a/drivers/gpu/drm/nouveau/nouveau_sched.c
++++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
+@@ -441,6 +441,7 @@ int nouveau_sched_init(struct nouveau_dr
+ 		return -ENOMEM;
+ 
+ 	return drm_sched_init(sched, &nouveau_sched_ops,
++			      DRM_SCHED_PRIORITY_COUNT,
+ 			      NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit,
+ 			      NULL, NULL, "nouveau_sched", drm->dev->dev);
+ }
+--- a/drivers/gpu/drm/panfrost/panfrost_job.c
++++ b/drivers/gpu/drm/panfrost/panfrost_job.c
+@@ -832,6 +832,7 @@ int panfrost_job_init(struct panfrost_de
+ 
+ 		ret = drm_sched_init(&js->queue[j].sched,
+ 				     &panfrost_sched_ops,
++				     DRM_SCHED_PRIORITY_COUNT,
+ 				     nentries, 0,
+ 				     msecs_to_jiffies(JOB_TIMEOUT_MS),
+ 				     pfdev->reset.wq,
+--- a/drivers/gpu/drm/scheduler/sched_entity.c
++++ b/drivers/gpu/drm/scheduler/sched_entity.c
+@@ -75,8 +75,20 @@ int drm_sched_entity_init(struct drm_sch
+ 	RCU_INIT_POINTER(entity->last_scheduled, NULL);
+ 	RB_CLEAR_NODE(&entity->rb_tree_node);
+ 
+-	if(num_sched_list)
+-		entity->rq = &sched_list[0]->sched_rq[entity->priority];
++	if (!sched_list[0]->sched_rq) {
++		/* Warn drivers not to do this and to fix their DRM
++		 * calling order.
++		 */
++		pr_warn("%s: called with uninitialized scheduler\n", __func__);
++	} else if (num_sched_list) {
++		/* The "priority" of an entity cannot exceed the number
++		 * of run-queues of a scheduler.
++		 */
++		if (entity->priority >= sched_list[0]->num_rqs)
++			entity->priority = max_t(u32, sched_list[0]->num_rqs,
++						 DRM_SCHED_PRIORITY_MIN);
++		entity->rq = sched_list[0]->sched_rq[entity->priority];
++	}
+ 
+ 	init_completion(&entity->entity_idle);
+ 
+@@ -533,7 +545,7 @@ void drm_sched_entity_select_rq(struct d
+ 
+ 	spin_lock(&entity->rq_lock);
+ 	sched = drm_sched_pick_best(entity->sched_list, entity->num_sched_list);
+-	rq = sched ? &sched->sched_rq[entity->priority] : NULL;
++	rq = sched ? sched->sched_rq[entity->priority] : NULL;
+ 	if (rq != entity->rq) {
+ 		drm_sched_rq_remove_entity(entity->rq, entity);
+ 		entity->rq = rq;
+--- a/drivers/gpu/drm/scheduler/sched_main.c
++++ b/drivers/gpu/drm/scheduler/sched_main.c
+@@ -632,8 +632,14 @@ int drm_sched_job_init(struct drm_sched_
+ 		       struct drm_sched_entity *entity,
+ 		       void *owner)
+ {
+-	if (!entity->rq)
++	if (!entity->rq) {
++		/* This will most likely be followed by missing frames
++		 * or worse--a blank screen--leave a trail in the
++		 * logs, so this can be debugged easier.
++		 */
++		drm_err(job->sched, "%s: entity has no rq!\n", __func__);
+ 		return -ENOENT;
++	}
+ 
+ 	job->entity = entity;
+ 	job->s_fence = drm_sched_fence_alloc(entity, owner);
+@@ -671,7 +677,7 @@ void drm_sched_job_arm(struct drm_sched_
+ 	sched = entity->rq->sched;
+ 
+ 	job->sched = sched;
+-	job->s_priority = entity->rq - sched->sched_rq;
++	job->s_priority = entity->priority;
+ 	job->id = atomic64_inc_return(&sched->job_id_count);
+ 
+ 	drm_sched_fence_init(job->s_fence, job->entity);
+@@ -888,10 +894,10 @@ drm_sched_select_entity(struct drm_gpu_s
+ 		return NULL;
+ 
+ 	/* Kernel run queue has higher priority than normal run queue*/
+-	for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
++	for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
+ 		entity = drm_sched_policy == DRM_SCHED_POLICY_FIFO ?
+-			drm_sched_rq_select_entity_fifo(&sched->sched_rq[i]) :
+-			drm_sched_rq_select_entity_rr(&sched->sched_rq[i]);
++			drm_sched_rq_select_entity_fifo(sched->sched_rq[i]) :
++			drm_sched_rq_select_entity_rr(sched->sched_rq[i]);
+ 		if (entity)
+ 			break;
+ 	}
+@@ -1071,6 +1077,7 @@ static int drm_sched_main(void *param)
+  *
+  * @sched: scheduler instance
+  * @ops: backend operations for this scheduler
++ * @num_rqs: number of runqueues, one for each priority, up to DRM_SCHED_PRIORITY_COUNT
+  * @hw_submission: number of hw submissions that can be in flight
+  * @hang_limit: number of times to allow a job to hang before dropping it
+  * @timeout: timeout value in jiffies for the scheduler
+@@ -1084,11 +1091,12 @@ static int drm_sched_main(void *param)
+  */
+ int drm_sched_init(struct drm_gpu_scheduler *sched,
+ 		   const struct drm_sched_backend_ops *ops,
+-		   unsigned hw_submission, unsigned hang_limit,
++		   u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit,
+ 		   long timeout, struct workqueue_struct *timeout_wq,
+ 		   atomic_t *score, const char *name, struct device *dev)
+ {
+ 	int i, ret;
++
+ 	sched->ops = ops;
+ 	sched->hw_submission_limit = hw_submission;
+ 	sched->name = name;
+@@ -1097,8 +1105,36 @@ int drm_sched_init(struct drm_gpu_schedu
+ 	sched->hang_limit = hang_limit;
+ 	sched->score = score ? score : &sched->_score;
+ 	sched->dev = dev;
+-	for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_COUNT; i++)
+-		drm_sched_rq_init(sched, &sched->sched_rq[i]);
++
++	if (num_rqs > DRM_SCHED_PRIORITY_COUNT) {
++		/* This is a gross violation--tell drivers what the  problem is.
++		 */
++		drm_err(sched, "%s: num_rqs cannot be greater than DRM_SCHED_PRIORITY_COUNT\n",
++			__func__);
++		return -EINVAL;
++	} else if (sched->sched_rq) {
++		/* Not an error, but warn anyway so drivers can
++		 * fine-tune their DRM calling order, and return all
++		 * is good.
++		 */
++		drm_warn(sched, "%s: scheduler already initialized!\n", __func__);
++		return 0;
++	}
++
++	sched->sched_rq = kmalloc_array(num_rqs, sizeof(*sched->sched_rq),
++					GFP_KERNEL | __GFP_ZERO);
++	if (!sched->sched_rq) {
++		drm_err(sched, "%s: out of memory for sched_rq\n", __func__);
++		return -ENOMEM;
++	}
++	sched->num_rqs = num_rqs;
++	ret = -ENOMEM;
++	for (i = DRM_SCHED_PRIORITY_MIN; i < sched->num_rqs; i++) {
++		sched->sched_rq[i] = kzalloc(sizeof(*sched->sched_rq[i]), GFP_KERNEL);
++		if (!sched->sched_rq[i])
++			goto Out_unroll;
++		drm_sched_rq_init(sched, sched->sched_rq[i]);
++	}
+ 
+ 	init_waitqueue_head(&sched->wake_up_worker);
+ 	init_waitqueue_head(&sched->job_scheduled);
+@@ -1115,11 +1151,18 @@ int drm_sched_init(struct drm_gpu_schedu
+ 		ret = PTR_ERR(sched->thread);
+ 		sched->thread = NULL;
+ 		DRM_DEV_ERROR(sched->dev, "Failed to create scheduler for %s.\n", name);
+-		return ret;
++		goto Out_unroll;
+ 	}
+ 
+ 	sched->ready = true;
+ 	return 0;
++Out_unroll:
++	for (--i ; i >= DRM_SCHED_PRIORITY_MIN; i--)
++		kfree(sched->sched_rq[i]);
++	kfree(sched->sched_rq);
++	sched->sched_rq = NULL;
++	drm_err(sched, "%s: Failed to setup GPU scheduler--out of memory\n", __func__);
++	return ret;
+ }
+ EXPORT_SYMBOL(drm_sched_init);
+ 
+@@ -1138,8 +1181,8 @@ void drm_sched_fini(struct drm_gpu_sched
+ 	if (sched->thread)
+ 		kthread_stop(sched->thread);
+ 
+-	for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
+-		struct drm_sched_rq *rq = &sched->sched_rq[i];
++	for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
++		struct drm_sched_rq *rq = sched->sched_rq[i];
+ 
+ 		spin_lock(&rq->lock);
+ 		list_for_each_entry(s_entity, &rq->entities, list)
+@@ -1150,7 +1193,7 @@ void drm_sched_fini(struct drm_gpu_sched
+ 			 */
+ 			s_entity->stopped = true;
+ 		spin_unlock(&rq->lock);
+-
++		kfree(sched->sched_rq[i]);
+ 	}
+ 
+ 	/* Wakeup everyone stuck in drm_sched_entity_flush for this scheduler */
+@@ -1160,6 +1203,8 @@ void drm_sched_fini(struct drm_gpu_sched
+ 	cancel_delayed_work_sync(&sched->work_tdr);
+ 
+ 	sched->ready = false;
++	kfree(sched->sched_rq);
++	sched->sched_rq = NULL;
+ }
+ EXPORT_SYMBOL(drm_sched_fini);
+ 
+@@ -1186,9 +1231,10 @@ void drm_sched_increase_karma(struct drm
+ 	if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
+ 		atomic_inc(&bad->karma);
+ 
+-		for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL;
++		for (i = DRM_SCHED_PRIORITY_MIN;
++		     i < min_t(typeof(sched->num_rqs), sched->num_rqs, DRM_SCHED_PRIORITY_KERNEL);
+ 		     i++) {
+-			struct drm_sched_rq *rq = &sched->sched_rq[i];
++			struct drm_sched_rq *rq = sched->sched_rq[i];
+ 
+ 			spin_lock(&rq->lock);
+ 			list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
+--- a/drivers/gpu/drm/v3d/v3d_sched.c
++++ b/drivers/gpu/drm/v3d/v3d_sched.c
+@@ -389,6 +389,7 @@ v3d_sched_init(struct v3d_dev *v3d)
+ 
+ 	ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
+ 			     &v3d_bin_sched_ops,
++			     DRM_SCHED_PRIORITY_COUNT,
+ 			     hw_jobs_limit, job_hang_limit,
+ 			     msecs_to_jiffies(hang_limit_ms), NULL,
+ 			     NULL, "v3d_bin", v3d->drm.dev);
+@@ -397,6 +398,7 @@ v3d_sched_init(struct v3d_dev *v3d)
+ 
+ 	ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched,
+ 			     &v3d_render_sched_ops,
++			     DRM_SCHED_PRIORITY_COUNT,
+ 			     hw_jobs_limit, job_hang_limit,
+ 			     msecs_to_jiffies(hang_limit_ms), NULL,
+ 			     NULL, "v3d_render", v3d->drm.dev);
+@@ -405,6 +407,7 @@ v3d_sched_init(struct v3d_dev *v3d)
+ 
+ 	ret = drm_sched_init(&v3d->queue[V3D_TFU].sched,
+ 			     &v3d_tfu_sched_ops,
++			     DRM_SCHED_PRIORITY_COUNT,
+ 			     hw_jobs_limit, job_hang_limit,
+ 			     msecs_to_jiffies(hang_limit_ms), NULL,
+ 			     NULL, "v3d_tfu", v3d->drm.dev);
+@@ -414,6 +417,7 @@ v3d_sched_init(struct v3d_dev *v3d)
+ 	if (v3d_has_csd(v3d)) {
+ 		ret = drm_sched_init(&v3d->queue[V3D_CSD].sched,
+ 				     &v3d_csd_sched_ops,
++				     DRM_SCHED_PRIORITY_COUNT,
+ 				     hw_jobs_limit, job_hang_limit,
+ 				     msecs_to_jiffies(hang_limit_ms), NULL,
+ 				     NULL, "v3d_csd", v3d->drm.dev);
+@@ -422,6 +426,7 @@ v3d_sched_init(struct v3d_dev *v3d)
+ 
+ 		ret = drm_sched_init(&v3d->queue[V3D_CACHE_CLEAN].sched,
+ 				     &v3d_cache_clean_sched_ops,
++				     DRM_SCHED_PRIORITY_COUNT,
+ 				     hw_jobs_limit, job_hang_limit,
+ 				     msecs_to_jiffies(hang_limit_ms), NULL,
+ 				     NULL, "v3d_cache_clean", v3d->drm.dev);
+--- a/include/drm/gpu_scheduler.h
++++ b/include/drm/gpu_scheduler.h
+@@ -471,7 +471,9 @@ struct drm_sched_backend_ops {
+  * @hw_submission_limit: the max size of the hardware queue.
+  * @timeout: the time after which a job is removed from the scheduler.
+  * @name: name of the ring for which this scheduler is being used.
+- * @sched_rq: priority wise array of run queues.
++ * @num_rqs: Number of run-queues. This is at most DRM_SCHED_PRIORITY_COUNT,
++ *           as there's usually one run-queue per priority, but could be less.
++ * @sched_rq: An allocated array of run-queues of size @num_rqs;
+  * @wake_up_worker: the wait queue on which the scheduler sleeps until a job
+  *                  is ready to be scheduled.
+  * @job_scheduled: once @drm_sched_entity_do_release is called the scheduler
+@@ -500,7 +502,8 @@ struct drm_gpu_scheduler {
+ 	uint32_t			hw_submission_limit;
+ 	long				timeout;
+ 	const char			*name;
+-	struct drm_sched_rq		sched_rq[DRM_SCHED_PRIORITY_COUNT];
++	u32                             num_rqs;
++	struct drm_sched_rq             **sched_rq;
+ 	wait_queue_head_t		wake_up_worker;
+ 	wait_queue_head_t		job_scheduled;
+ 	atomic_t			hw_rq_count;
+@@ -520,7 +523,7 @@ struct drm_gpu_scheduler {
+ 
+ int drm_sched_init(struct drm_gpu_scheduler *sched,
+ 		   const struct drm_sched_backend_ops *ops,
+-		   uint32_t hw_submission, unsigned hang_limit,
++		   u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit,
+ 		   long timeout, struct workqueue_struct *timeout_wq,
+ 		   atomic_t *score, const char *name, struct device *dev);
+ 
diff --git a/patches-6.6/034-21-v6.8-drm-sched-Add-drm_sched_wqueue_-helpers.patch b/patches-6.6/034-21-v6.8-drm-sched-Add-drm_sched_wqueue_-helpers.patch
new file mode 100644
index 0000000..2385d67
--- /dev/null
+++ b/patches-6.6/034-21-v6.8-drm-sched-Add-drm_sched_wqueue_-helpers.patch
@@ -0,0 +1,241 @@
+From 35963cf2cd25eeea8bdb4d02853dac1e66fb13a0 Mon Sep 17 00:00:00 2001
+From: Matthew Brost <matthew.brost@intel.com>
+Date: Mon, 30 Oct 2023 20:24:35 -0700
+Subject: [PATCH] drm/sched: Add drm_sched_wqueue_* helpers
+
+Add scheduler wqueue ready, stop, and start helpers to hide the
+implementation details of the scheduler from the drivers.
+
+v2:
+  - s/sched_wqueue/sched_wqueue (Luben)
+  - Remove the extra white line after the return-statement (Luben)
+  - update drm_sched_wqueue_ready comment (Luben)
+
+Cc: Luben Tuikov <luben.tuikov@amd.com>
+Signed-off-by: Matthew Brost <matthew.brost@intel.com>
+Reviewed-by: Luben Tuikov <luben.tuikov@amd.com>
+Link: https://lore.kernel.org/r/20231031032439.1558703-2-matthew.brost@intel.com
+Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
+---
+ .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |  2 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c   | 15 +++----
+ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    | 12 +++---
+ drivers/gpu/drm/msm/adreno/adreno_device.c    |  6 ++-
+ drivers/gpu/drm/scheduler/sched_main.c        | 39 ++++++++++++++++++-
+ include/drm/gpu_scheduler.h                   |  3 ++
+ 6 files changed, 59 insertions(+), 18 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+@@ -290,7 +290,7 @@ static int suspend_resume_compute_schedu
+ 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+ 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
+ 
+-		if (!(ring && ring->sched.thread))
++		if (!(ring && drm_sched_wqueue_ready(&ring->sched)))
+ 			continue;
+ 
+ 		/* stop secheduler and drain ring. */
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+@@ -1671,9 +1671,9 @@ static int amdgpu_debugfs_test_ib_show(s
+ 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+ 		struct amdgpu_ring *ring = adev->rings[i];
+ 
+-		if (!ring || !ring->sched.thread)
++		if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+ 			continue;
+-		kthread_park(ring->sched.thread);
++		drm_sched_wqueue_stop(&ring->sched);
+ 	}
+ 
+ 	seq_puts(m, "run ib test:\n");
+@@ -1687,9 +1687,9 @@ static int amdgpu_debugfs_test_ib_show(s
+ 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+ 		struct amdgpu_ring *ring = adev->rings[i];
+ 
+-		if (!ring || !ring->sched.thread)
++		if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+ 			continue;
+-		kthread_unpark(ring->sched.thread);
++		drm_sched_wqueue_start(&ring->sched);
+ 	}
+ 
+ 	up_write(&adev->reset_domain->sem);
+@@ -1909,7 +1909,8 @@ static int amdgpu_debugfs_ib_preempt(voi
+ 
+ 	ring = adev->rings[val];
+ 
+-	if (!ring || !ring->funcs->preempt_ib || !ring->sched.thread)
++	if (!ring || !ring->funcs->preempt_ib ||
++	    !drm_sched_wqueue_ready(&ring->sched))
+ 		return -EINVAL;
+ 
+ 	/* the last preemption failed */
+@@ -1927,7 +1928,7 @@ static int amdgpu_debugfs_ib_preempt(voi
+ 		goto pro_end;
+ 
+ 	/* stop the scheduler */
+-	kthread_park(ring->sched.thread);
++	drm_sched_wqueue_stop(&ring->sched);
+ 
+ 	/* preempt the IB */
+ 	r = amdgpu_ring_preempt_ib(ring);
+@@ -1961,7 +1962,7 @@ static int amdgpu_debugfs_ib_preempt(voi
+ 
+ failure:
+ 	/* restart the scheduler */
+-	kthread_unpark(ring->sched.thread);
++	drm_sched_wqueue_start(&ring->sched);
+ 
+ 	up_read(&adev->reset_domain->sem);
+ 
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -4613,7 +4613,7 @@ bool amdgpu_device_has_job_running(struc
+ 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ 		struct amdgpu_ring *ring = adev->rings[i];
+ 
+-		if (!ring || !ring->sched.thread)
++		if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+ 			continue;
+ 
+ 		spin_lock(&ring->sched.job_list_lock);
+@@ -4755,7 +4755,7 @@ int amdgpu_device_pre_asic_reset(struct
+ 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ 		struct amdgpu_ring *ring = adev->rings[i];
+ 
+-		if (!ring || !ring->sched.thread)
++		if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+ 			continue;
+ 
+ 		/* Clear job fence from fence drv to avoid force_completion
+@@ -5295,7 +5295,7 @@ int amdgpu_device_gpu_recover(struct amd
+ 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ 			struct amdgpu_ring *ring = tmp_adev->rings[i];
+ 
+-			if (!ring || !ring->sched.thread)
++			if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+ 				continue;
+ 
+ 			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
+@@ -5370,7 +5370,7 @@ skip_hw_reset:
+ 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ 			struct amdgpu_ring *ring = tmp_adev->rings[i];
+ 
+-			if (!ring || !ring->sched.thread)
++			if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+ 				continue;
+ 
+ 			drm_sched_start(&ring->sched, true);
+@@ -5696,7 +5696,7 @@ pci_ers_result_t amdgpu_pci_error_detect
+ 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ 			struct amdgpu_ring *ring = adev->rings[i];
+ 
+-			if (!ring || !ring->sched.thread)
++			if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+ 				continue;
+ 
+ 			drm_sched_stop(&ring->sched, NULL);
+@@ -5824,7 +5824,7 @@ void amdgpu_pci_resume(struct pci_dev *p
+ 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ 		struct amdgpu_ring *ring = adev->rings[i];
+ 
+-		if (!ring || !ring->sched.thread)
++		if (!ring || !drm_sched_wqueue_ready(&ring->sched))
+ 			continue;
+ 
+ 		drm_sched_start(&ring->sched, true);
+--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
++++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
+@@ -810,7 +810,8 @@ static void suspend_scheduler(struct msm
+ 	 */
+ 	for (i = 0; i < gpu->nr_rings; i++) {
+ 		struct drm_gpu_scheduler *sched = &gpu->rb[i]->sched;
+-		kthread_park(sched->thread);
++
++		drm_sched_wqueue_stop(sched);
+ 	}
+ }
+ 
+@@ -820,7 +821,8 @@ static void resume_scheduler(struct msm_
+ 
+ 	for (i = 0; i < gpu->nr_rings; i++) {
+ 		struct drm_gpu_scheduler *sched = &gpu->rb[i]->sched;
+-		kthread_unpark(sched->thread);
++
++		drm_sched_wqueue_start(sched);
+ 	}
+ }
+ 
+--- a/drivers/gpu/drm/scheduler/sched_main.c
++++ b/drivers/gpu/drm/scheduler/sched_main.c
+@@ -439,7 +439,7 @@ void drm_sched_stop(struct drm_gpu_sched
+ {
+ 	struct drm_sched_job *s_job, *tmp;
+ 
+-	kthread_park(sched->thread);
++	drm_sched_wqueue_stop(sched);
+ 
+ 	/*
+ 	 * Reinsert back the bad job here - now it's safe as
+@@ -552,7 +552,7 @@ void drm_sched_start(struct drm_gpu_sche
+ 		spin_unlock(&sched->job_list_lock);
+ 	}
+ 
+-	kthread_unpark(sched->thread);
++	drm_sched_wqueue_start(sched);
+ }
+ EXPORT_SYMBOL(drm_sched_start);
+ 
+@@ -1252,3 +1252,38 @@ void drm_sched_increase_karma(struct drm
+ 	}
+ }
+ EXPORT_SYMBOL(drm_sched_increase_karma);
++
++/**
++ * drm_sched_wqueue_ready - Is the scheduler ready for submission
++ *
++ * @sched: scheduler instance
++ *
++ * Returns true if submission is ready
++ */
++bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched)
++{
++	return !!sched->thread;
++}
++EXPORT_SYMBOL(drm_sched_wqueue_ready);
++
++/**
++ * drm_sched_wqueue_stop - stop scheduler submission
++ *
++ * @sched: scheduler instance
++ */
++void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched)
++{
++	kthread_park(sched->thread);
++}
++EXPORT_SYMBOL(drm_sched_wqueue_stop);
++
++/**
++ * drm_sched_wqueue_start - start scheduler submission
++ *
++ * @sched: scheduler instance
++ */
++void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched)
++{
++	kthread_unpark(sched->thread);
++}
++EXPORT_SYMBOL(drm_sched_wqueue_start);
+--- a/include/drm/gpu_scheduler.h
++++ b/include/drm/gpu_scheduler.h
+@@ -552,6 +552,9 @@ void drm_sched_entity_modify_sched(struc
+ 
+ void drm_sched_job_cleanup(struct drm_sched_job *job);
+ void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched);
++bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched);
++void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched);
++void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched);
+ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad);
+ void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery);
+ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched);
diff --git a/patches-6.6/034-22-v6.8-drm-sched-Convert-drm-scheduler-to-use-a-work-queue-rathe.patch b/patches-6.6/034-22-v6.8-drm-sched-Convert-drm-scheduler-to-use-a-work-queue-rathe.patch
new file mode 100644
index 0000000..4d231f3
--- /dev/null
+++ b/patches-6.6/034-22-v6.8-drm-sched-Convert-drm-scheduler-to-use-a-work-queue-rathe.patch
@@ -0,0 +1,507 @@
+From a6149f0393699308fb00149be913044977bceb56 Mon Sep 17 00:00:00 2001
+From: Matthew Brost <matthew.brost@intel.com>
+Date: Mon, 30 Oct 2023 20:24:36 -0700
+Subject: [PATCH] drm/sched: Convert drm scheduler to use a work queue rather
+ than kthread
+
+In Xe, the new Intel GPU driver, a choice has made to have a 1 to 1
+mapping between a drm_gpu_scheduler and drm_sched_entity. At first this
+seems a bit odd but let us explain the reasoning below.
+
+1. In Xe the submission order from multiple drm_sched_entity is not
+guaranteed to be the same completion even if targeting the same hardware
+engine. This is because in Xe we have a firmware scheduler, the GuC,
+which allowed to reorder, timeslice, and preempt submissions. If a using
+shared drm_gpu_scheduler across multiple drm_sched_entity, the TDR falls
+apart as the TDR expects submission order == completion order. Using a
+dedicated drm_gpu_scheduler per drm_sched_entity solve this problem.
+
+2. In Xe submissions are done via programming a ring buffer (circular
+buffer), a drm_gpu_scheduler provides a limit on number of jobs, if the
+limit of number jobs is set to RING_SIZE / MAX_SIZE_PER_JOB we get flow
+control on the ring for free.
+
+A problem with this design is currently a drm_gpu_scheduler uses a
+kthread for submission / job cleanup. This doesn't scale if a large
+number of drm_gpu_scheduler are used. To work around the scaling issue,
+use a worker rather than kthread for submission / job cleanup.
+
+v2:
+  - (Rob Clark) Fix msm build
+  - Pass in run work queue
+v3:
+  - (Boris) don't have loop in worker
+v4:
+  - (Tvrtko) break out submit ready, stop, start helpers into own patch
+v5:
+  - (Boris) default to ordered work queue
+v6:
+  - (Luben / checkpatch) fix alignment in msm_ringbuffer.c
+  - (Luben) s/drm_sched_submit_queue/drm_sched_wqueue_enqueue
+  - (Luben) Update comment for drm_sched_wqueue_enqueue
+  - (Luben) Positive check for submit_wq in drm_sched_init
+  - (Luben) s/alloc_submit_wq/own_submit_wq
+v7:
+  - (Luben) s/drm_sched_wqueue_enqueue/drm_sched_run_job_queue
+v8:
+  - (Luben) Adjust var names / comments
+
+Signed-off-by: Matthew Brost <matthew.brost@intel.com>
+Reviewed-by: Luben Tuikov <luben.tuikov@amd.com>
+Link: https://lore.kernel.org/r/20231031032439.1558703-3-matthew.brost@intel.com
+Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   2 +-
+ drivers/gpu/drm/etnaviv/etnaviv_sched.c    |   2 +-
+ drivers/gpu/drm/lima/lima_sched.c          |   2 +-
+ drivers/gpu/drm/msm/msm_ringbuffer.c       |   2 +-
+ drivers/gpu/drm/nouveau/nouveau_sched.c    |   2 +-
+ drivers/gpu/drm/panfrost/panfrost_job.c    |   2 +-
+ drivers/gpu/drm/scheduler/sched_main.c     | 131 +++++++++++----------
+ drivers/gpu/drm/v3d/v3d_sched.c            |  10 +-
+ include/drm/gpu_scheduler.h                |  14 ++-
+ 9 files changed, 86 insertions(+), 81 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -2276,7 +2276,7 @@ static int amdgpu_device_init_schedulers
+ 			break;
+ 		}
+ 
+-		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
++		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
+ 				   DRM_SCHED_PRIORITY_COUNT,
+ 				   ring->num_hw_submission, 0,
+ 				   timeout, adev->reset_domain->wq,
+--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
++++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+@@ -134,7 +134,7 @@ int etnaviv_sched_init(struct etnaviv_gp
+ {
+ 	int ret;
+ 
+-	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops,
++	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops, NULL,
+ 			     DRM_SCHED_PRIORITY_COUNT,
+ 			     etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
+ 			     msecs_to_jiffies(500), NULL, NULL,
+--- a/drivers/gpu/drm/lima/lima_sched.c
++++ b/drivers/gpu/drm/lima/lima_sched.c
+@@ -495,7 +495,7 @@ int lima_sched_pipe_init(struct lima_sch
+ 
+ 	INIT_WORK(&pipe->recover_work, lima_sched_recover_work);
+ 
+-	return drm_sched_init(&pipe->base, &lima_sched_ops,
++	return drm_sched_init(&pipe->base, &lima_sched_ops, NULL,
+ 			      DRM_SCHED_PRIORITY_COUNT,
+ 			      1,
+ 			      lima_job_hang_limit,
+--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
++++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
+@@ -97,7 +97,7 @@ struct msm_ringbuffer *msm_ringbuffer_ne
+ 	 /* currently managing hangcheck ourselves: */
+ 	sched_timeout = MAX_SCHEDULE_TIMEOUT;
+ 
+-	ret = drm_sched_init(&ring->sched, &msm_sched_ops,
++	ret = drm_sched_init(&ring->sched, &msm_sched_ops, NULL,
+ 			     DRM_SCHED_PRIORITY_COUNT,
+ 			     num_hw_submissions, 0, sched_timeout,
+ 			     NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev);
+--- a/drivers/gpu/drm/nouveau/nouveau_sched.c
++++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
+@@ -440,7 +440,7 @@ int nouveau_sched_init(struct nouveau_dr
+ 	if (!drm->sched_wq)
+ 		return -ENOMEM;
+ 
+-	return drm_sched_init(sched, &nouveau_sched_ops,
++	return drm_sched_init(sched, &nouveau_sched_ops, NULL,
+ 			      DRM_SCHED_PRIORITY_COUNT,
+ 			      NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit,
+ 			      NULL, NULL, "nouveau_sched", drm->dev->dev);
+--- a/drivers/gpu/drm/panfrost/panfrost_job.c
++++ b/drivers/gpu/drm/panfrost/panfrost_job.c
+@@ -831,7 +831,7 @@ int panfrost_job_init(struct panfrost_de
+ 		js->queue[j].fence_context = dma_fence_context_alloc(1);
+ 
+ 		ret = drm_sched_init(&js->queue[j].sched,
+-				     &panfrost_sched_ops,
++				     &panfrost_sched_ops, NULL,
+ 				     DRM_SCHED_PRIORITY_COUNT,
+ 				     nentries, 0,
+ 				     msecs_to_jiffies(JOB_TIMEOUT_MS),
+--- a/drivers/gpu/drm/scheduler/sched_main.c
++++ b/drivers/gpu/drm/scheduler/sched_main.c
+@@ -48,7 +48,6 @@
+  * through the jobs entity pointer.
+  */
+ 
+-#include <linux/kthread.h>
+ #include <linux/wait.h>
+ #include <linux/sched.h>
+ #include <linux/completion.h>
+@@ -257,6 +256,16 @@ drm_sched_rq_select_entity_fifo(struct d
+ }
+ 
+ /**
++ * drm_sched_run_job_queue - enqueue run-job work
++ * @sched: scheduler instance
++ */
++static void drm_sched_run_job_queue(struct drm_gpu_scheduler *sched)
++{
++	if (!READ_ONCE(sched->pause_submit))
++		queue_work(sched->submit_wq, &sched->work_run_job);
++}
++
++/**
+  * drm_sched_job_done - complete a job
+  * @s_job: pointer to the job which is done
+  *
+@@ -275,7 +284,7 @@ static void drm_sched_job_done(struct dr
+ 	dma_fence_get(&s_fence->finished);
+ 	drm_sched_fence_finished(s_fence, result);
+ 	dma_fence_put(&s_fence->finished);
+-	wake_up_interruptible(&sched->wake_up_worker);
++	drm_sched_run_job_queue(sched);
+ }
+ 
+ /**
+@@ -874,7 +883,7 @@ static bool drm_sched_can_queue(struct d
+ void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched)
+ {
+ 	if (drm_sched_can_queue(sched))
+-		wake_up_interruptible(&sched->wake_up_worker);
++		drm_sched_run_job_queue(sched);
+ }
+ 
+ /**
+@@ -985,60 +994,41 @@ drm_sched_pick_best(struct drm_gpu_sched
+ EXPORT_SYMBOL(drm_sched_pick_best);
+ 
+ /**
+- * drm_sched_blocked - check if the scheduler is blocked
++ * drm_sched_run_job_work - main scheduler thread
+  *
+- * @sched: scheduler instance
+- *
+- * Returns true if blocked, otherwise false.
++ * @w: run job work
+  */
+-static bool drm_sched_blocked(struct drm_gpu_scheduler *sched)
++static void drm_sched_run_job_work(struct work_struct *w)
+ {
+-	if (kthread_should_park()) {
+-		kthread_parkme();
+-		return true;
+-	}
+-
+-	return false;
+-}
+-
+-/**
+- * drm_sched_main - main scheduler thread
+- *
+- * @param: scheduler instance
+- *
+- * Returns 0.
+- */
+-static int drm_sched_main(void *param)
+-{
+-	struct drm_gpu_scheduler *sched = (struct drm_gpu_scheduler *)param;
++	struct drm_gpu_scheduler *sched =
++		container_of(w, struct drm_gpu_scheduler, work_run_job);
++	struct drm_sched_entity *entity;
++	struct drm_sched_job *cleanup_job;
+ 	int r;
+ 
+-	sched_set_fifo_low(current);
++	if (READ_ONCE(sched->pause_submit))
++		return;
+ 
+-	while (!kthread_should_stop()) {
+-		struct drm_sched_entity *entity = NULL;
+-		struct drm_sched_fence *s_fence;
+-		struct drm_sched_job *sched_job;
+-		struct dma_fence *fence;
+-		struct drm_sched_job *cleanup_job = NULL;
++	cleanup_job = drm_sched_get_cleanup_job(sched);
++	entity = drm_sched_select_entity(sched);
+ 
+-		wait_event_interruptible(sched->wake_up_worker,
+-					 (cleanup_job = drm_sched_get_cleanup_job(sched)) ||
+-					 (!drm_sched_blocked(sched) &&
+-					  (entity = drm_sched_select_entity(sched))) ||
+-					 kthread_should_stop());
++	if (!entity && !cleanup_job)
++		return;	/* No more work */
+ 
+-		if (cleanup_job)
+-			sched->ops->free_job(cleanup_job);
++	if (cleanup_job)
++		sched->ops->free_job(cleanup_job);
+ 
+-		if (!entity)
+-			continue;
++	if (entity) {
++		struct dma_fence *fence;
++		struct drm_sched_fence *s_fence;
++		struct drm_sched_job *sched_job;
+ 
+ 		sched_job = drm_sched_entity_pop_job(entity);
+-
+ 		if (!sched_job) {
+ 			complete_all(&entity->entity_idle);
+-			continue;
++			if (!cleanup_job)
++				return;	/* No more work */
++			goto again;
+ 		}
+ 
+ 		s_fence = sched_job->s_fence;
+@@ -1069,7 +1059,9 @@ static int drm_sched_main(void *param)
+ 
+ 		wake_up(&sched->job_scheduled);
+ 	}
+-	return 0;
++
++again:
++	drm_sched_run_job_queue(sched);
+ }
+ 
+ /**
+@@ -1077,6 +1069,8 @@ static int drm_sched_main(void *param)
+  *
+  * @sched: scheduler instance
+  * @ops: backend operations for this scheduler
++ * @submit_wq: workqueue to use for submission. If NULL, an ordered wq is
++ *	       allocated and used
+  * @num_rqs: number of runqueues, one for each priority, up to DRM_SCHED_PRIORITY_COUNT
+  * @hw_submission: number of hw submissions that can be in flight
+  * @hang_limit: number of times to allow a job to hang before dropping it
+@@ -1091,6 +1085,7 @@ static int drm_sched_main(void *param)
+  */
+ int drm_sched_init(struct drm_gpu_scheduler *sched,
+ 		   const struct drm_sched_backend_ops *ops,
++		   struct workqueue_struct *submit_wq,
+ 		   u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit,
+ 		   long timeout, struct workqueue_struct *timeout_wq,
+ 		   atomic_t *score, const char *name, struct device *dev)
+@@ -1121,14 +1116,22 @@ int drm_sched_init(struct drm_gpu_schedu
+ 		return 0;
+ 	}
+ 
++	if (submit_wq) {
++		sched->submit_wq = submit_wq;
++		sched->own_submit_wq = false;
++	} else {
++		sched->submit_wq = alloc_ordered_workqueue(name, 0);
++		if (!sched->submit_wq)
++			return -ENOMEM;
++
++		sched->own_submit_wq = true;
++	}
++	ret = -ENOMEM;
+ 	sched->sched_rq = kmalloc_array(num_rqs, sizeof(*sched->sched_rq),
+ 					GFP_KERNEL | __GFP_ZERO);
+-	if (!sched->sched_rq) {
+-		drm_err(sched, "%s: out of memory for sched_rq\n", __func__);
+-		return -ENOMEM;
+-	}
++	if (!sched->sched_rq)
++		goto Out_free;
+ 	sched->num_rqs = num_rqs;
+-	ret = -ENOMEM;
+ 	for (i = DRM_SCHED_PRIORITY_MIN; i < sched->num_rqs; i++) {
+ 		sched->sched_rq[i] = kzalloc(sizeof(*sched->sched_rq[i]), GFP_KERNEL);
+ 		if (!sched->sched_rq[i])
+@@ -1136,31 +1139,26 @@ int drm_sched_init(struct drm_gpu_schedu
+ 		drm_sched_rq_init(sched, sched->sched_rq[i]);
+ 	}
+ 
+-	init_waitqueue_head(&sched->wake_up_worker);
+ 	init_waitqueue_head(&sched->job_scheduled);
+ 	INIT_LIST_HEAD(&sched->pending_list);
+ 	spin_lock_init(&sched->job_list_lock);
+ 	atomic_set(&sched->hw_rq_count, 0);
+ 	INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
++	INIT_WORK(&sched->work_run_job, drm_sched_run_job_work);
+ 	atomic_set(&sched->_score, 0);
+ 	atomic64_set(&sched->job_id_count, 0);
+-
+-	/* Each scheduler will run on a seperate kernel thread */
+-	sched->thread = kthread_run(drm_sched_main, sched, sched->name);
+-	if (IS_ERR(sched->thread)) {
+-		ret = PTR_ERR(sched->thread);
+-		sched->thread = NULL;
+-		DRM_DEV_ERROR(sched->dev, "Failed to create scheduler for %s.\n", name);
+-		goto Out_unroll;
+-	}
++	sched->pause_submit = false;
+ 
+ 	sched->ready = true;
+ 	return 0;
+ Out_unroll:
+ 	for (--i ; i >= DRM_SCHED_PRIORITY_MIN; i--)
+ 		kfree(sched->sched_rq[i]);
++Out_free:
+ 	kfree(sched->sched_rq);
+ 	sched->sched_rq = NULL;
++	if (sched->own_submit_wq)
++		destroy_workqueue(sched->submit_wq);
+ 	drm_err(sched, "%s: Failed to setup GPU scheduler--out of memory\n", __func__);
+ 	return ret;
+ }
+@@ -1178,8 +1176,7 @@ void drm_sched_fini(struct drm_gpu_sched
+ 	struct drm_sched_entity *s_entity;
+ 	int i;
+ 
+-	if (sched->thread)
+-		kthread_stop(sched->thread);
++	drm_sched_wqueue_stop(sched);
+ 
+ 	for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
+ 		struct drm_sched_rq *rq = sched->sched_rq[i];
+@@ -1202,6 +1199,8 @@ void drm_sched_fini(struct drm_gpu_sched
+ 	/* Confirm no work left behind accessing device structures */
+ 	cancel_delayed_work_sync(&sched->work_tdr);
+ 
++	if (sched->own_submit_wq)
++		destroy_workqueue(sched->submit_wq);
+ 	sched->ready = false;
+ 	kfree(sched->sched_rq);
+ 	sched->sched_rq = NULL;
+@@ -1262,7 +1261,7 @@ EXPORT_SYMBOL(drm_sched_increase_karma);
+  */
+ bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched)
+ {
+-	return !!sched->thread;
++	return sched->ready;
+ }
+ EXPORT_SYMBOL(drm_sched_wqueue_ready);
+ 
+@@ -1273,7 +1272,8 @@ EXPORT_SYMBOL(drm_sched_wqueue_ready);
+  */
+ void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched)
+ {
+-	kthread_park(sched->thread);
++	WRITE_ONCE(sched->pause_submit, true);
++	cancel_work_sync(&sched->work_run_job);
+ }
+ EXPORT_SYMBOL(drm_sched_wqueue_stop);
+ 
+@@ -1284,6 +1284,7 @@ EXPORT_SYMBOL(drm_sched_wqueue_stop);
+  */
+ void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched)
+ {
+-	kthread_unpark(sched->thread);
++	WRITE_ONCE(sched->pause_submit, false);
++	queue_work(sched->submit_wq, &sched->work_run_job);
+ }
+ EXPORT_SYMBOL(drm_sched_wqueue_start);
+--- a/drivers/gpu/drm/v3d/v3d_sched.c
++++ b/drivers/gpu/drm/v3d/v3d_sched.c
+@@ -388,7 +388,7 @@ v3d_sched_init(struct v3d_dev *v3d)
+ 	int ret;
+ 
+ 	ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
+-			     &v3d_bin_sched_ops,
++			     &v3d_bin_sched_ops, NULL,
+ 			     DRM_SCHED_PRIORITY_COUNT,
+ 			     hw_jobs_limit, job_hang_limit,
+ 			     msecs_to_jiffies(hang_limit_ms), NULL,
+@@ -397,7 +397,7 @@ v3d_sched_init(struct v3d_dev *v3d)
+ 		return ret;
+ 
+ 	ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched,
+-			     &v3d_render_sched_ops,
++			     &v3d_render_sched_ops, NULL,
+ 			     DRM_SCHED_PRIORITY_COUNT,
+ 			     hw_jobs_limit, job_hang_limit,
+ 			     msecs_to_jiffies(hang_limit_ms), NULL,
+@@ -406,7 +406,7 @@ v3d_sched_init(struct v3d_dev *v3d)
+ 		goto fail;
+ 
+ 	ret = drm_sched_init(&v3d->queue[V3D_TFU].sched,
+-			     &v3d_tfu_sched_ops,
++			     &v3d_tfu_sched_ops, NULL,
+ 			     DRM_SCHED_PRIORITY_COUNT,
+ 			     hw_jobs_limit, job_hang_limit,
+ 			     msecs_to_jiffies(hang_limit_ms), NULL,
+@@ -416,7 +416,7 @@ v3d_sched_init(struct v3d_dev *v3d)
+ 
+ 	if (v3d_has_csd(v3d)) {
+ 		ret = drm_sched_init(&v3d->queue[V3D_CSD].sched,
+-				     &v3d_csd_sched_ops,
++				     &v3d_csd_sched_ops, NULL,
+ 				     DRM_SCHED_PRIORITY_COUNT,
+ 				     hw_jobs_limit, job_hang_limit,
+ 				     msecs_to_jiffies(hang_limit_ms), NULL,
+@@ -425,7 +425,7 @@ v3d_sched_init(struct v3d_dev *v3d)
+ 			goto fail;
+ 
+ 		ret = drm_sched_init(&v3d->queue[V3D_CACHE_CLEAN].sched,
+-				     &v3d_cache_clean_sched_ops,
++				     &v3d_cache_clean_sched_ops, NULL,
+ 				     DRM_SCHED_PRIORITY_COUNT,
+ 				     hw_jobs_limit, job_hang_limit,
+ 				     msecs_to_jiffies(hang_limit_ms), NULL,
+--- a/include/drm/gpu_scheduler.h
++++ b/include/drm/gpu_scheduler.h
+@@ -474,17 +474,16 @@ struct drm_sched_backend_ops {
+  * @num_rqs: Number of run-queues. This is at most DRM_SCHED_PRIORITY_COUNT,
+  *           as there's usually one run-queue per priority, but could be less.
+  * @sched_rq: An allocated array of run-queues of size @num_rqs;
+- * @wake_up_worker: the wait queue on which the scheduler sleeps until a job
+- *                  is ready to be scheduled.
+  * @job_scheduled: once @drm_sched_entity_do_release is called the scheduler
+  *                 waits on this wait queue until all the scheduled jobs are
+  *                 finished.
+  * @hw_rq_count: the number of jobs currently in the hardware queue.
+  * @job_id_count: used to assign unique id to the each job.
++ * @submit_wq: workqueue used to queue @work_run_job
+  * @timeout_wq: workqueue used to queue @work_tdr
++ * @work_run_job: work which calls run_job op of each scheduler.
+  * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the
+  *            timeout interval is over.
+- * @thread: the kthread on which the scheduler which run.
+  * @pending_list: the list of jobs which are currently in the job queue.
+  * @job_list_lock: lock to protect the pending_list.
+  * @hang_limit: once the hangs by a job crosses this limit then it is marked
+@@ -493,6 +492,8 @@ struct drm_sched_backend_ops {
+  * @_score: score used when the driver doesn't provide one
+  * @ready: marks if the underlying HW is ready to work
+  * @free_guilty: A hit to time out handler to free the guilty job.
++ * @pause_submit: pause queuing of @work_run_job on @submit_wq
++ * @own_submit_wq: scheduler owns allocation of @submit_wq
+  * @dev: system &struct device
+  *
+  * One scheduler is implemented for each hardware ring.
+@@ -504,13 +505,13 @@ struct drm_gpu_scheduler {
+ 	const char			*name;
+ 	u32                             num_rqs;
+ 	struct drm_sched_rq             **sched_rq;
+-	wait_queue_head_t		wake_up_worker;
+ 	wait_queue_head_t		job_scheduled;
+ 	atomic_t			hw_rq_count;
+ 	atomic64_t			job_id_count;
++	struct workqueue_struct		*submit_wq;
+ 	struct workqueue_struct		*timeout_wq;
++	struct work_struct		work_run_job;
+ 	struct delayed_work		work_tdr;
+-	struct task_struct		*thread;
+ 	struct list_head		pending_list;
+ 	spinlock_t			job_list_lock;
+ 	int				hang_limit;
+@@ -518,11 +519,14 @@ struct drm_gpu_scheduler {
+ 	atomic_t                        _score;
+ 	bool				ready;
+ 	bool				free_guilty;
++	bool				pause_submit;
++	bool				own_submit_wq;
+ 	struct device			*dev;
+ };
+ 
+ int drm_sched_init(struct drm_gpu_scheduler *sched,
+ 		   const struct drm_sched_backend_ops *ops,
++		   struct workqueue_struct *submit_wq,
+ 		   u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit,
+ 		   long timeout, struct workqueue_struct *timeout_wq,
+ 		   atomic_t *score, const char *name, struct device *dev);
diff --git a/patches-6.6/034-23-v6.8-drm-sched-Split-free_job-into-own-work-item.patch b/patches-6.6/034-23-v6.8-drm-sched-Split-free_job-into-own-work-item.patch
new file mode 100644
index 0000000..a2efa0f
--- /dev/null
+++ b/patches-6.6/034-23-v6.8-drm-sched-Split-free_job-into-own-work-item.patch
@@ -0,0 +1,275 @@
+From f7fe64ad0f22ff034f8ebcfbd7299ee9cc9b57d7 Mon Sep 17 00:00:00 2001
+From: Matthew Brost <matthew.brost@intel.com>
+Date: Mon, 30 Oct 2023 20:24:37 -0700
+Subject: [PATCH] drm/sched: Split free_job into own work item
+
+Rather than call free_job and run_job in same work item have a dedicated
+work item for each. This aligns with the design and intended use of work
+queues.
+
+v2:
+   - Test for DMA_FENCE_FLAG_TIMESTAMP_BIT before setting
+     timestamp in free_job() work item (Danilo)
+v3:
+  - Drop forward dec of drm_sched_select_entity (Boris)
+  - Return in drm_sched_run_job_work if entity NULL (Boris)
+v4:
+  - Replace dequeue with peek and invert logic (Luben)
+  - Wrap to 100 lines (Luben)
+  - Update comments for *_queue / *_queue_if_ready functions (Luben)
+v5:
+  - Drop peek argument, blindly reinit idle (Luben)
+  - s/drm_sched_free_job_queue_if_ready/drm_sched_free_job_queue_if_done (Luben)
+  - Update work_run_job & work_free_job kernel doc (Luben)
+v6:
+  - Do not move drm_sched_select_entity in file (Luben)
+
+Signed-off-by: Matthew Brost <matthew.brost@intel.com>
+Link: https://lore.kernel.org/r/20231031032439.1558703-4-matthew.brost@intel.com
+Reviewed-by: Luben Tuikov <ltuikov89@gmail.com>
+Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
+---
+ drivers/gpu/drm/scheduler/sched_main.c | 146 +++++++++++++++++--------
+ include/drm/gpu_scheduler.h            |   4 +-
+ 2 files changed, 101 insertions(+), 49 deletions(-)
+
+--- a/drivers/gpu/drm/scheduler/sched_main.c
++++ b/drivers/gpu/drm/scheduler/sched_main.c
+@@ -266,6 +266,32 @@ static void drm_sched_run_job_queue(stru
+ }
+ 
+ /**
++ * drm_sched_free_job_queue - enqueue free-job work
++ * @sched: scheduler instance
++ */
++static void drm_sched_free_job_queue(struct drm_gpu_scheduler *sched)
++{
++	if (!READ_ONCE(sched->pause_submit))
++		queue_work(sched->submit_wq, &sched->work_free_job);
++}
++
++/**
++ * drm_sched_free_job_queue_if_done - enqueue free-job work if ready
++ * @sched: scheduler instance
++ */
++static void drm_sched_free_job_queue_if_done(struct drm_gpu_scheduler *sched)
++{
++	struct drm_sched_job *job;
++
++	spin_lock(&sched->job_list_lock);
++	job = list_first_entry_or_null(&sched->pending_list,
++				       struct drm_sched_job, list);
++	if (job && dma_fence_is_signaled(&job->s_fence->finished))
++		drm_sched_free_job_queue(sched);
++	spin_unlock(&sched->job_list_lock);
++}
++
++/**
+  * drm_sched_job_done - complete a job
+  * @s_job: pointer to the job which is done
+  *
+@@ -284,7 +310,7 @@ static void drm_sched_job_done(struct dr
+ 	dma_fence_get(&s_fence->finished);
+ 	drm_sched_fence_finished(s_fence, result);
+ 	dma_fence_put(&s_fence->finished);
+-	drm_sched_run_job_queue(sched);
++	drm_sched_free_job_queue(sched);
+ }
+ 
+ /**
+@@ -943,8 +969,10 @@ drm_sched_get_cleanup_job(struct drm_gpu
+ 						typeof(*next), list);
+ 
+ 		if (next) {
+-			next->s_fence->scheduled.timestamp =
+-				dma_fence_timestamp(&job->s_fence->finished);
++			if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT,
++				     &next->s_fence->scheduled.flags))
++				next->s_fence->scheduled.timestamp =
++					dma_fence_timestamp(&job->s_fence->finished);
+ 			/* start TO timer for next job */
+ 			drm_sched_start_timeout(sched);
+ 		}
+@@ -994,7 +1022,40 @@ drm_sched_pick_best(struct drm_gpu_sched
+ EXPORT_SYMBOL(drm_sched_pick_best);
+ 
+ /**
+- * drm_sched_run_job_work - main scheduler thread
++ * drm_sched_run_job_queue_if_ready - enqueue run-job work if ready
++ * @sched: scheduler instance
++ */
++static void drm_sched_run_job_queue_if_ready(struct drm_gpu_scheduler *sched)
++{
++	if (drm_sched_select_entity(sched))
++		drm_sched_run_job_queue(sched);
++}
++
++/**
++ * drm_sched_free_job_work - worker to call free_job
++ *
++ * @w: free job work
++ */
++static void drm_sched_free_job_work(struct work_struct *w)
++{
++	struct drm_gpu_scheduler *sched =
++		container_of(w, struct drm_gpu_scheduler, work_free_job);
++	struct drm_sched_job *cleanup_job;
++
++	if (READ_ONCE(sched->pause_submit))
++		return;
++
++	cleanup_job = drm_sched_get_cleanup_job(sched);
++	if (cleanup_job) {
++		sched->ops->free_job(cleanup_job);
++
++		drm_sched_free_job_queue_if_done(sched);
++		drm_sched_run_job_queue_if_ready(sched);
++	}
++}
++
++/**
++ * drm_sched_run_job_work - worker to call run_job
+  *
+  * @w: run job work
+  */
+@@ -1003,65 +1064,51 @@ static void drm_sched_run_job_work(struc
+ 	struct drm_gpu_scheduler *sched =
+ 		container_of(w, struct drm_gpu_scheduler, work_run_job);
+ 	struct drm_sched_entity *entity;
+-	struct drm_sched_job *cleanup_job;
++	struct dma_fence *fence;
++	struct drm_sched_fence *s_fence;
++	struct drm_sched_job *sched_job;
+ 	int r;
+ 
+ 	if (READ_ONCE(sched->pause_submit))
+ 		return;
+ 
+-	cleanup_job = drm_sched_get_cleanup_job(sched);
+ 	entity = drm_sched_select_entity(sched);
++	if (!entity)
++		return;
+ 
+-	if (!entity && !cleanup_job)
++	sched_job = drm_sched_entity_pop_job(entity);
++	if (!sched_job) {
++		complete_all(&entity->entity_idle);
+ 		return;	/* No more work */
++	}
+ 
+-	if (cleanup_job)
+-		sched->ops->free_job(cleanup_job);
+-
+-	if (entity) {
+-		struct dma_fence *fence;
+-		struct drm_sched_fence *s_fence;
+-		struct drm_sched_job *sched_job;
+-
+-		sched_job = drm_sched_entity_pop_job(entity);
+-		if (!sched_job) {
+-			complete_all(&entity->entity_idle);
+-			if (!cleanup_job)
+-				return;	/* No more work */
+-			goto again;
+-		}
+-
+-		s_fence = sched_job->s_fence;
+-
+-		atomic_inc(&sched->hw_rq_count);
+-		drm_sched_job_begin(sched_job);
++	s_fence = sched_job->s_fence;
+ 
+-		trace_drm_run_job(sched_job, entity);
+-		fence = sched->ops->run_job(sched_job);
+-		complete_all(&entity->entity_idle);
+-		drm_sched_fence_scheduled(s_fence, fence);
++	atomic_inc(&sched->hw_rq_count);
++	drm_sched_job_begin(sched_job);
+ 
+-		if (!IS_ERR_OR_NULL(fence)) {
+-			/* Drop for original kref_init of the fence */
+-			dma_fence_put(fence);
++	trace_drm_run_job(sched_job, entity);
++	fence = sched->ops->run_job(sched_job);
++	complete_all(&entity->entity_idle);
++	drm_sched_fence_scheduled(s_fence, fence);
+ 
+-			r = dma_fence_add_callback(fence, &sched_job->cb,
+-						   drm_sched_job_done_cb);
+-			if (r == -ENOENT)
+-				drm_sched_job_done(sched_job, fence->error);
+-			else if (r)
+-				DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n",
+-					  r);
+-		} else {
+-			drm_sched_job_done(sched_job, IS_ERR(fence) ?
+-					   PTR_ERR(fence) : 0);
+-		}
++	if (!IS_ERR_OR_NULL(fence)) {
++		/* Drop for original kref_init of the fence */
++		dma_fence_put(fence);
+ 
+-		wake_up(&sched->job_scheduled);
++		r = dma_fence_add_callback(fence, &sched_job->cb,
++					   drm_sched_job_done_cb);
++		if (r == -ENOENT)
++			drm_sched_job_done(sched_job, fence->error);
++		else if (r)
++			DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n", r);
++	} else {
++		drm_sched_job_done(sched_job, IS_ERR(fence) ?
++				   PTR_ERR(fence) : 0);
+ 	}
+ 
+-again:
+-	drm_sched_run_job_queue(sched);
++	wake_up(&sched->job_scheduled);
++	drm_sched_run_job_queue_if_ready(sched);
+ }
+ 
+ /**
+@@ -1145,6 +1192,7 @@ int drm_sched_init(struct drm_gpu_schedu
+ 	atomic_set(&sched->hw_rq_count, 0);
+ 	INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
+ 	INIT_WORK(&sched->work_run_job, drm_sched_run_job_work);
++	INIT_WORK(&sched->work_free_job, drm_sched_free_job_work);
+ 	atomic_set(&sched->_score, 0);
+ 	atomic64_set(&sched->job_id_count, 0);
+ 	sched->pause_submit = false;
+@@ -1274,6 +1322,7 @@ void drm_sched_wqueue_stop(struct drm_gp
+ {
+ 	WRITE_ONCE(sched->pause_submit, true);
+ 	cancel_work_sync(&sched->work_run_job);
++	cancel_work_sync(&sched->work_free_job);
+ }
+ EXPORT_SYMBOL(drm_sched_wqueue_stop);
+ 
+@@ -1286,5 +1335,6 @@ void drm_sched_wqueue_start(struct drm_g
+ {
+ 	WRITE_ONCE(sched->pause_submit, false);
+ 	queue_work(sched->submit_wq, &sched->work_run_job);
++	queue_work(sched->submit_wq, &sched->work_free_job);
+ }
+ EXPORT_SYMBOL(drm_sched_wqueue_start);
+--- a/include/drm/gpu_scheduler.h
++++ b/include/drm/gpu_scheduler.h
+@@ -479,9 +479,10 @@ struct drm_sched_backend_ops {
+  *                 finished.
+  * @hw_rq_count: the number of jobs currently in the hardware queue.
+  * @job_id_count: used to assign unique id to the each job.
+- * @submit_wq: workqueue used to queue @work_run_job
++ * @submit_wq: workqueue used to queue @work_run_job and @work_free_job
+  * @timeout_wq: workqueue used to queue @work_tdr
+  * @work_run_job: work which calls run_job op of each scheduler.
++ * @work_free_job: work which calls free_job op of each scheduler.
+  * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the
+  *            timeout interval is over.
+  * @pending_list: the list of jobs which are currently in the job queue.
+@@ -511,6 +512,7 @@ struct drm_gpu_scheduler {
+ 	struct workqueue_struct		*submit_wq;
+ 	struct workqueue_struct		*timeout_wq;
+ 	struct work_struct		work_run_job;
++	struct work_struct		work_free_job;
+ 	struct delayed_work		work_tdr;
+ 	struct list_head		pending_list;
+ 	spinlock_t			job_list_lock;
diff --git a/patches-6.6/034-24-v6.8-drm-sched-Add-a-helper-to-queue-TDR-immediately.patch b/patches-6.6/034-24-v6.8-drm-sched-Add-a-helper-to-queue-TDR-immediately.patch
new file mode 100644
index 0000000..8ed2d99
--- /dev/null
+++ b/patches-6.6/034-24-v6.8-drm-sched-Add-a-helper-to-queue-TDR-immediately.patch
@@ -0,0 +1,70 @@
+From 3c6c7ca4508b6cb1a033ac954c50a1b2c97af883 Mon Sep 17 00:00:00 2001
+From: Matthew Brost <matthew.brost@intel.com>
+Date: Mon, 30 Oct 2023 20:24:39 -0700
+Subject: [PATCH] drm/sched: Add a helper to queue TDR immediately
+
+Add a helper whereby a driver can invoke TDR immediately.
+
+v2:
+ - Drop timeout args, rename function, use mod delayed work (Luben)
+v3:
+ - s/XE/Xe (Luben)
+ - present tense in commit message (Luben)
+ - Adjust comment for drm_sched_tdr_queue_imm (Luben)
+v4:
+ - Adjust commit message (Luben)
+
+Cc: Luben Tuikov <luben.tuikov@amd.com>
+Signed-off-by: Matthew Brost <matthew.brost@intel.com>
+Reviewed-by: Luben Tuikov <luben.tuikov@amd.com>
+Link: https://lore.kernel.org/r/20231031032439.1558703-6-matthew.brost@intel.com
+Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
+---
+ drivers/gpu/drm/scheduler/sched_main.c | 18 +++++++++++++++++-
+ include/drm/gpu_scheduler.h            |  1 +
+ 2 files changed, 18 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/scheduler/sched_main.c
++++ b/drivers/gpu/drm/scheduler/sched_main.c
+@@ -336,7 +336,7 @@ static void drm_sched_start_timeout(stru
+ {
+ 	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
+ 	    !list_empty(&sched->pending_list))
+-		queue_delayed_work(sched->timeout_wq, &sched->work_tdr, sched->timeout);
++		mod_delayed_work(sched->timeout_wq, &sched->work_tdr, sched->timeout);
+ }
+ 
+ /**
+@@ -354,6 +354,22 @@ void drm_sched_fault(struct drm_gpu_sche
+ EXPORT_SYMBOL(drm_sched_fault);
+ 
+ /**
++ * drm_sched_tdr_queue_imm: - immediately start job timeout handler
++ *
++ * @sched: scheduler for which the timeout handling should be started.
++ *
++ * Start timeout handling immediately for the named scheduler.
++ */
++void drm_sched_tdr_queue_imm(struct drm_gpu_scheduler *sched)
++{
++	spin_lock(&sched->job_list_lock);
++	sched->timeout = 0;
++	drm_sched_start_timeout(sched);
++	spin_unlock(&sched->job_list_lock);
++}
++EXPORT_SYMBOL(drm_sched_tdr_queue_imm);
++
++/**
+  * drm_sched_suspend_timeout - Suspend scheduler job timeout
+  *
+  * @sched: scheduler instance for which to suspend the timeout
+--- a/include/drm/gpu_scheduler.h
++++ b/include/drm/gpu_scheduler.h
+@@ -556,6 +556,7 @@ void drm_sched_entity_modify_sched(struc
+ 				    struct drm_gpu_scheduler **sched_list,
+                                    unsigned int num_sched_list);
+ 
++void drm_sched_tdr_queue_imm(struct drm_gpu_scheduler *sched);
+ void drm_sched_job_cleanup(struct drm_sched_job *job);
+ void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched);
+ bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched);
diff --git a/patches-6.6/034-25-v6.8-drm-sched-Drop-suffix-from-drm_sched_wakeup_if_can_queue.patch b/patches-6.6/034-25-v6.8-drm-sched-Drop-suffix-from-drm_sched_wakeup_if_can_queue.patch
new file mode 100644
index 0000000..2472275
--- /dev/null
+++ b/patches-6.6/034-25-v6.8-drm-sched-Drop-suffix-from-drm_sched_wakeup_if_can_queue.patch
@@ -0,0 +1,70 @@
+From f12af4c461fb6cd5ed7b48f8b4d09b22eb19fcc5 Mon Sep 17 00:00:00 2001
+From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+Date: Thu, 2 Nov 2023 10:55:38 +0000
+Subject: [PATCH] drm/sched: Drop suffix from drm_sched_wakeup_if_can_queue
+
+Because a) helper is exported to other parts of the scheduler and
+b) there isn't a plain drm_sched_wakeup to begin with, I think we can
+drop the suffix and by doing so separate the intimiate knowledge
+between the scheduler components a bit better.
+
+Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+Cc: Luben Tuikov <ltuikov89@gmail.com>
+Cc: Matthew Brost <matthew.brost@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231102105538.391648-6-tvrtko.ursulin@linux.intel.com
+Reviewed-by: Luben Tuikov <ltuikov89@gmail.com>
+Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
+---
+ drivers/gpu/drm/scheduler/sched_entity.c | 4 ++--
+ drivers/gpu/drm/scheduler/sched_main.c   | 4 ++--
+ include/drm/gpu_scheduler.h              | 2 +-
+ 3 files changed, 5 insertions(+), 5 deletions(-)
+
+--- a/drivers/gpu/drm/scheduler/sched_entity.c
++++ b/drivers/gpu/drm/scheduler/sched_entity.c
+@@ -370,7 +370,7 @@ static void drm_sched_entity_wakeup(stru
+ 		container_of(cb, struct drm_sched_entity, cb);
+ 
+ 	drm_sched_entity_clear_dep(f, cb);
+-	drm_sched_wakeup_if_can_queue(entity->rq->sched);
++	drm_sched_wakeup(entity->rq->sched);
+ }
+ 
+ /**
+@@ -602,7 +602,7 @@ void drm_sched_entity_push_job(struct dr
+ 		if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
+ 			drm_sched_rq_update_fifo(entity, submit_ts);
+ 
+-		drm_sched_wakeup_if_can_queue(entity->rq->sched);
++		drm_sched_wakeup(entity->rq->sched);
+ 	}
+ }
+ EXPORT_SYMBOL(drm_sched_entity_push_job);
+--- a/drivers/gpu/drm/scheduler/sched_main.c
++++ b/drivers/gpu/drm/scheduler/sched_main.c
+@@ -917,12 +917,12 @@ static bool drm_sched_can_queue(struct d
+ }
+ 
+ /**
+- * drm_sched_wakeup_if_can_queue - Wake up the scheduler
++ * drm_sched_wakeup - Wake up the scheduler if it is ready to queue
+  * @sched: scheduler instance
+  *
+  * Wake up the scheduler if we can queue jobs.
+  */
+-void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched)
++void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
+ {
+ 	if (drm_sched_can_queue(sched))
+ 		drm_sched_run_job_queue(sched);
+--- a/include/drm/gpu_scheduler.h
++++ b/include/drm/gpu_scheduler.h
+@@ -558,7 +558,7 @@ void drm_sched_entity_modify_sched(struc
+ 
+ void drm_sched_tdr_queue_imm(struct drm_gpu_scheduler *sched);
+ void drm_sched_job_cleanup(struct drm_sched_job *job);
+-void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched);
++void drm_sched_wakeup(struct drm_gpu_scheduler *sched);
+ bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched);
+ void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched);
+ void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched);
diff --git a/patches-6.6/034-26-v6.8-drm-sched-Qualify-drm_sched_wakeup-by.patch b/patches-6.6/034-26-v6.8-drm-sched-Qualify-drm_sched_wakeup-by.patch
new file mode 100644
index 0000000..820c82f
--- /dev/null
+++ b/patches-6.6/034-26-v6.8-drm-sched-Qualify-drm_sched_wakeup-by.patch
@@ -0,0 +1,69 @@
+From f3123c2590005c5ff631653d31428e40cd10c618 Mon Sep 17 00:00:00 2001
+From: Luben Tuikov <ltuikov89@gmail.com>
+Date: Thu, 9 Nov 2023 18:53:26 -0500
+Subject: [PATCH] drm/sched: Qualify drm_sched_wakeup() by
+ drm_sched_entity_is_ready()
+
+Don't "wake up" the GPU scheduler unless the entity is ready, as well as we
+can queue to the scheduler, i.e. there is no point in waking up the scheduler
+for the entity unless the entity is ready.
+
+Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
+Fixes: bc8d6a9df99038 ("drm/sched: Don't disturb the entity when in RR-mode scheduling")
+Reviewed-by: Danilo Krummrich <dakr@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231110000123.72565-2-ltuikov89@gmail.com
+---
+ drivers/gpu/drm/scheduler/sched_entity.c | 4 ++--
+ drivers/gpu/drm/scheduler/sched_main.c   | 8 +++++---
+ include/drm/gpu_scheduler.h              | 2 +-
+ 3 files changed, 8 insertions(+), 6 deletions(-)
+
+--- a/drivers/gpu/drm/scheduler/sched_entity.c
++++ b/drivers/gpu/drm/scheduler/sched_entity.c
+@@ -370,7 +370,7 @@ static void drm_sched_entity_wakeup(stru
+ 		container_of(cb, struct drm_sched_entity, cb);
+ 
+ 	drm_sched_entity_clear_dep(f, cb);
+-	drm_sched_wakeup(entity->rq->sched);
++	drm_sched_wakeup(entity->rq->sched, entity);
+ }
+ 
+ /**
+@@ -602,7 +602,7 @@ void drm_sched_entity_push_job(struct dr
+ 		if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
+ 			drm_sched_rq_update_fifo(entity, submit_ts);
+ 
+-		drm_sched_wakeup(entity->rq->sched);
++		drm_sched_wakeup(entity->rq->sched, entity);
+ 	}
+ }
+ EXPORT_SYMBOL(drm_sched_entity_push_job);
+--- a/drivers/gpu/drm/scheduler/sched_main.c
++++ b/drivers/gpu/drm/scheduler/sched_main.c
+@@ -922,10 +922,12 @@ static bool drm_sched_can_queue(struct d
+  *
+  * Wake up the scheduler if we can queue jobs.
+  */
+-void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
++void drm_sched_wakeup(struct drm_gpu_scheduler *sched,
++		      struct drm_sched_entity *entity)
+ {
+-	if (drm_sched_can_queue(sched))
+-		drm_sched_run_job_queue(sched);
++	if (drm_sched_entity_is_ready(entity))
++		if (drm_sched_can_queue(sched))
++			drm_sched_run_job_queue(sched);
+ }
+ 
+ /**
+--- a/include/drm/gpu_scheduler.h
++++ b/include/drm/gpu_scheduler.h
+@@ -558,7 +558,7 @@ void drm_sched_entity_modify_sched(struc
+ 
+ void drm_sched_tdr_queue_imm(struct drm_gpu_scheduler *sched);
+ void drm_sched_job_cleanup(struct drm_sched_job *job);
+-void drm_sched_wakeup(struct drm_gpu_scheduler *sched);
++void drm_sched_wakeup(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity);
+ bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched);
+ void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched);
+ void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched);
diff --git a/patches-6.6/034-27-v6.8-drm-sched-implement-dynamic-job-flow-control.patch b/patches-6.6/034-27-v6.8-drm-sched-implement-dynamic-job-flow-control.patch
new file mode 100644
index 0000000..926cc5f
--- /dev/null
+++ b/patches-6.6/034-27-v6.8-drm-sched-implement-dynamic-job-flow-control.patch
@@ -0,0 +1,612 @@
+From a78422e9dff366b3a46ae44caf6ec8ded9c9fc2f Mon Sep 17 00:00:00 2001
+From: Danilo Krummrich <dakr@redhat.com>
+Date: Fri, 10 Nov 2023 01:16:33 +0100
+Subject: [PATCH] drm/sched: implement dynamic job-flow control
+
+Currently, job flow control is implemented simply by limiting the number
+of jobs in flight. Therefore, a scheduler is initialized with a credit
+limit that corresponds to the number of jobs which can be sent to the
+hardware.
+
+This implies that for each job, drivers need to account for the maximum
+job size possible in order to not overflow the ring buffer.
+
+However, there are drivers, such as Nouveau, where the job size has a
+rather large range. For such drivers it can easily happen that job
+submissions not even filling the ring by 1% can block subsequent
+submissions, which, in the worst case, can lead to the ring run dry.
+
+In order to overcome this issue, allow for tracking the actual job size
+instead of the number of jobs. Therefore, add a field to track a job's
+credit count, which represents the number of credits a job contributes
+to the scheduler's credit limit.
+
+Signed-off-by: Danilo Krummrich <dakr@redhat.com>
+Reviewed-by: Luben Tuikov <ltuikov89@gmail.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20231110001638.71750-1-dakr@redhat.com
+---
+ Documentation/gpu/drm-mm.rst                  |   6 +
+ drivers/gpu/drm/amd/amdgpu/amdgpu_job.c       |   2 +-
+ drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c  |   2 +-
+ drivers/gpu/drm/etnaviv/etnaviv_gpu.c         |   2 +-
+ drivers/gpu/drm/lima/lima_device.c            |   2 +-
+ drivers/gpu/drm/lima/lima_sched.c             |   2 +-
+ drivers/gpu/drm/msm/msm_gem_submit.c          |   2 +-
+ drivers/gpu/drm/nouveau/nouveau_sched.c       |   2 +-
+ drivers/gpu/drm/panfrost/panfrost_drv.c       |   2 +-
+ drivers/gpu/drm/panfrost/panfrost_job.c       |   2 +-
+ .../gpu/drm/scheduler/gpu_scheduler_trace.h   |   2 +-
+ drivers/gpu/drm/scheduler/sched_main.c        | 170 ++++++++++++++----
+ drivers/gpu/drm/v3d/v3d_gem.c                 |   2 +-
+ include/drm/gpu_scheduler.h                   |  28 ++-
+ 14 files changed, 175 insertions(+), 51 deletions(-)
+
+--- a/Documentation/gpu/drm-mm.rst
++++ b/Documentation/gpu/drm-mm.rst
+@@ -552,6 +552,12 @@ Overview
+ .. kernel-doc:: drivers/gpu/drm/scheduler/sched_main.c
+    :doc: Overview
+ 
++Flow Control
++------------
++
++.. kernel-doc:: drivers/gpu/drm/scheduler/sched_main.c
++   :doc: Flow Control
++
+ Scheduler Function References
+ -----------------------------
+ 
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+@@ -115,7 +115,7 @@ int amdgpu_job_alloc(struct amdgpu_devic
+ 	if (!entity)
+ 		return 0;
+ 
+-	return drm_sched_job_init(&(*job)->base, entity, owner);
++	return drm_sched_job_init(&(*job)->base, entity, 1, owner);
+ }
+ 
+ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,
+--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
++++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+@@ -535,7 +535,7 @@ int etnaviv_ioctl_gem_submit(struct drm_
+ 
+ 	ret = drm_sched_job_init(&submit->sched_job,
+ 				 &ctx->sched_entity[args->pipe],
+-				 submit->ctx);
++				 1, submit->ctx);
+ 	if (ret)
+ 		goto err_submit_put;
+ 
+--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
++++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+@@ -1917,7 +1917,7 @@ static int etnaviv_gpu_rpm_suspend(struc
+ 	u32 idle, mask;
+ 
+ 	/* If there are any jobs in the HW queue, we're not idle */
+-	if (atomic_read(&gpu->sched.hw_rq_count))
++	if (atomic_read(&gpu->sched.credit_count))
+ 		return -EBUSY;
+ 
+ 	/* Check whether the hardware (except FE and MC) is idle */
+--- a/drivers/gpu/drm/lima/lima_device.c
++++ b/drivers/gpu/drm/lima/lima_device.c
+@@ -514,7 +514,7 @@ int lima_device_suspend(struct device *d
+ 
+ 	/* check any task running */
+ 	for (i = 0; i < lima_pipe_num; i++) {
+-		if (atomic_read(&ldev->pipe[i].base.hw_rq_count))
++		if (atomic_read(&ldev->pipe[i].base.credit_count))
+ 			return -EBUSY;
+ 	}
+ 
+--- a/drivers/gpu/drm/lima/lima_sched.c
++++ b/drivers/gpu/drm/lima/lima_sched.c
+@@ -123,7 +123,7 @@ int lima_sched_task_init(struct lima_sch
+ 	for (i = 0; i < num_bos; i++)
+ 		drm_gem_object_get(&bos[i]->base.base);
+ 
+-	err = drm_sched_job_init(&task->base, &context->base, vm);
++	err = drm_sched_job_init(&task->base, &context->base, 1, vm);
+ 	if (err) {
+ 		kfree(task->bos);
+ 		return err;
+--- a/drivers/gpu/drm/msm/msm_gem_submit.c
++++ b/drivers/gpu/drm/msm/msm_gem_submit.c
+@@ -48,7 +48,7 @@ static struct msm_gem_submit *submit_cre
+ 		return ERR_PTR(ret);
+ 	}
+ 
+-	ret = drm_sched_job_init(&submit->base, queue->entity, queue);
++	ret = drm_sched_job_init(&submit->base, queue->entity, 1, queue);
+ 	if (ret) {
+ 		kfree(submit->hw_fence);
+ 		kfree(submit);
+--- a/drivers/gpu/drm/nouveau/nouveau_sched.c
++++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
+@@ -89,7 +89,7 @@ nouveau_job_init(struct nouveau_job *job
+ 
+ 	}
+ 
+-	ret = drm_sched_job_init(&job->base, &entity->base, NULL);
++	ret = drm_sched_job_init(&job->base, &entity->base, 1, NULL);
+ 	if (ret)
+ 		goto err_free_chains;
+ 
+--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
++++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
+@@ -272,7 +272,7 @@ static int panfrost_ioctl_submit(struct
+ 
+ 	ret = drm_sched_job_init(&job->base,
+ 				 &file_priv->sched_entity[slot],
+-				 NULL);
++				 1, NULL);
+ 	if (ret)
+ 		goto out_put_job;
+ 
+--- a/drivers/gpu/drm/panfrost/panfrost_job.c
++++ b/drivers/gpu/drm/panfrost/panfrost_job.c
+@@ -939,7 +939,7 @@ int panfrost_job_is_idle(struct panfrost
+ 
+ 	for (i = 0; i < NUM_JOB_SLOTS; i++) {
+ 		/* If there are any jobs in the HW queue, we're not idle */
+-		if (atomic_read(&js->queue[i].sched.hw_rq_count))
++		if (atomic_read(&js->queue[i].sched.credit_count))
+ 			return false;
+ 	}
+ 
+--- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
++++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
+@@ -51,7 +51,7 @@ DECLARE_EVENT_CLASS(drm_sched_job,
+ 			   __assign_str(name, sched_job->sched->name);
+ 			   __entry->job_count = spsc_queue_count(&entity->job_queue);
+ 			   __entry->hw_job_count = atomic_read(
+-				   &sched_job->sched->hw_rq_count);
++				   &sched_job->sched->credit_count);
+ 			   ),
+ 	    TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d",
+ 		      __entry->entity, __entry->id,
+--- a/drivers/gpu/drm/scheduler/sched_main.c
++++ b/drivers/gpu/drm/scheduler/sched_main.c
+@@ -48,6 +48,30 @@
+  * through the jobs entity pointer.
+  */
+ 
++/**
++ * DOC: Flow Control
++ *
++ * The DRM GPU scheduler provides a flow control mechanism to regulate the rate
++ * in which the jobs fetched from scheduler entities are executed.
++ *
++ * In this context the &drm_gpu_scheduler keeps track of a driver specified
++ * credit limit representing the capacity of this scheduler and a credit count;
++ * every &drm_sched_job carries a driver specified number of credits.
++ *
++ * Once a job is executed (but not yet finished), the job's credits contribute
++ * to the scheduler's credit count until the job is finished. If by executing
++ * one more job the scheduler's credit count would exceed the scheduler's
++ * credit limit, the job won't be executed. Instead, the scheduler will wait
++ * until the credit count has decreased enough to not overflow its credit limit.
++ * This implies waiting for previously executed jobs.
++ *
++ * Optionally, drivers may register a callback (update_job_credits) provided by
++ * struct drm_sched_backend_ops to update the job's credits dynamically. The
++ * scheduler executes this callback every time the scheduler considers a job for
++ * execution and subsequently checks whether the job fits the scheduler's credit
++ * limit.
++ */
++
+ #include <linux/wait.h>
+ #include <linux/sched.h>
+ #include <linux/completion.h>
+@@ -75,6 +99,51 @@ int drm_sched_policy = DRM_SCHED_POLICY_
+ MODULE_PARM_DESC(sched_policy, "Specify the scheduling policy for entities on a run-queue, " __stringify(DRM_SCHED_POLICY_RR) " = Round Robin, " __stringify(DRM_SCHED_POLICY_FIFO) " = FIFO (default).");
+ module_param_named(sched_policy, drm_sched_policy, int, 0444);
+ 
++static u32 drm_sched_available_credits(struct drm_gpu_scheduler *sched)
++{
++	u32 credits;
++
++	drm_WARN_ON(sched, check_sub_overflow(sched->credit_limit,
++					      atomic_read(&sched->credit_count),
++					      &credits));
++
++	return credits;
++}
++
++/**
++ * drm_sched_can_queue -- Can we queue more to the hardware?
++ * @sched: scheduler instance
++ * @entity: the scheduler entity
++ *
++ * Return true if we can push at least one more job from @entity, false
++ * otherwise.
++ */
++static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched,
++				struct drm_sched_entity *entity)
++{
++	struct drm_sched_job *s_job;
++
++	s_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
++	if (!s_job)
++		return false;
++
++	if (sched->ops->update_job_credits) {
++		s_job->credits = sched->ops->update_job_credits(s_job);
++
++		drm_WARN(sched, !s_job->credits,
++			 "Jobs with zero credits bypass job-flow control.\n");
++	}
++
++	/* If a job exceeds the credit limit, truncate it to the credit limit
++	 * itself to guarantee forward progress.
++	 */
++	if (drm_WARN(sched, s_job->credits > sched->credit_limit,
++		     "Jobs may not exceed the credit limit, truncate.\n"))
++		s_job->credits = sched->credit_limit;
++
++	return drm_sched_available_credits(sched) >= s_job->credits;
++}
++
+ static __always_inline bool drm_sched_entity_compare_before(struct rb_node *a,
+ 							    const struct rb_node *b)
+ {
+@@ -186,12 +255,18 @@ void drm_sched_rq_remove_entity(struct d
+ /**
+  * drm_sched_rq_select_entity_rr - Select an entity which could provide a job to run
+  *
++ * @sched: the gpu scheduler
+  * @rq: scheduler run queue to check.
+  *
+- * Try to find a ready entity, returns NULL if none found.
++ * Try to find the next ready entity.
++ *
++ * Return an entity if one is found; return an error-pointer (!NULL) if an
++ * entity was ready, but the scheduler had insufficient credits to accommodate
++ * its job; return NULL, if no ready entity was found.
+  */
+ static struct drm_sched_entity *
+-drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq)
++drm_sched_rq_select_entity_rr(struct drm_gpu_scheduler *sched,
++			      struct drm_sched_rq *rq)
+ {
+ 	struct drm_sched_entity *entity;
+ 
+@@ -201,6 +276,14 @@ drm_sched_rq_select_entity_rr(struct drm
+ 	if (entity) {
+ 		list_for_each_entry_continue(entity, &rq->entities, list) {
+ 			if (drm_sched_entity_is_ready(entity)) {
++				/* If we can't queue yet, preserve the current
++				 * entity in terms of fairness.
++				 */
++				if (!drm_sched_can_queue(sched, entity)) {
++					spin_unlock(&rq->lock);
++					return ERR_PTR(-ENOSPC);
++				}
++
+ 				rq->current_entity = entity;
+ 				reinit_completion(&entity->entity_idle);
+ 				spin_unlock(&rq->lock);
+@@ -210,8 +293,15 @@ drm_sched_rq_select_entity_rr(struct drm
+ 	}
+ 
+ 	list_for_each_entry(entity, &rq->entities, list) {
+-
+ 		if (drm_sched_entity_is_ready(entity)) {
++			/* If we can't queue yet, preserve the current entity in
++			 * terms of fairness.
++			 */
++			if (!drm_sched_can_queue(sched, entity)) {
++				spin_unlock(&rq->lock);
++				return ERR_PTR(-ENOSPC);
++			}
++
+ 			rq->current_entity = entity;
+ 			reinit_completion(&entity->entity_idle);
+ 			spin_unlock(&rq->lock);
+@@ -230,12 +320,18 @@ drm_sched_rq_select_entity_rr(struct drm
+ /**
+  * drm_sched_rq_select_entity_fifo - Select an entity which provides a job to run
+  *
++ * @sched: the gpu scheduler
+  * @rq: scheduler run queue to check.
+  *
+- * Find oldest waiting ready entity, returns NULL if none found.
++ * Find oldest waiting ready entity.
++ *
++ * Return an entity if one is found; return an error-pointer (!NULL) if an
++ * entity was ready, but the scheduler had insufficient credits to accommodate
++ * its job; return NULL, if no ready entity was found.
+  */
+ static struct drm_sched_entity *
+-drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
++drm_sched_rq_select_entity_fifo(struct drm_gpu_scheduler *sched,
++				struct drm_sched_rq *rq)
+ {
+ 	struct rb_node *rb;
+ 
+@@ -245,6 +341,14 @@ drm_sched_rq_select_entity_fifo(struct d
+ 
+ 		entity = rb_entry(rb, struct drm_sched_entity, rb_tree_node);
+ 		if (drm_sched_entity_is_ready(entity)) {
++			/* If we can't queue yet, preserve the current entity in
++			 * terms of fairness.
++			 */
++			if (!drm_sched_can_queue(sched, entity)) {
++				spin_unlock(&rq->lock);
++				return ERR_PTR(-ENOSPC);
++			}
++
+ 			rq->current_entity = entity;
+ 			reinit_completion(&entity->entity_idle);
+ 			break;
+@@ -302,7 +406,7 @@ static void drm_sched_job_done(struct dr
+ 	struct drm_sched_fence *s_fence = s_job->s_fence;
+ 	struct drm_gpu_scheduler *sched = s_fence->sched;
+ 
+-	atomic_dec(&sched->hw_rq_count);
++	atomic_sub(s_job->credits, &sched->credit_count);
+ 	atomic_dec(sched->score);
+ 
+ 	trace_drm_sched_process_job(s_fence);
+@@ -519,7 +623,7 @@ void drm_sched_stop(struct drm_gpu_sched
+ 					      &s_job->cb)) {
+ 			dma_fence_put(s_job->s_fence->parent);
+ 			s_job->s_fence->parent = NULL;
+-			atomic_dec(&sched->hw_rq_count);
++			atomic_sub(s_job->credits, &sched->credit_count);
+ 		} else {
+ 			/*
+ 			 * remove job from pending_list.
+@@ -580,7 +684,7 @@ void drm_sched_start(struct drm_gpu_sche
+ 	list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) {
+ 		struct dma_fence *fence = s_job->s_fence->parent;
+ 
+-		atomic_inc(&sched->hw_rq_count);
++		atomic_add(s_job->credits, &sched->credit_count);
+ 
+ 		if (!full_recovery)
+ 			continue;
+@@ -664,6 +768,8 @@ EXPORT_SYMBOL(drm_sched_resubmit_jobs);
+  * drm_sched_job_init - init a scheduler job
+  * @job: scheduler job to init
+  * @entity: scheduler entity to use
++ * @credits: the number of credits this job contributes to the schedulers
++ * credit limit
+  * @owner: job owner for debugging
+  *
+  * Refer to drm_sched_entity_push_job() documentation
+@@ -681,7 +787,7 @@ EXPORT_SYMBOL(drm_sched_resubmit_jobs);
+  */
+ int drm_sched_job_init(struct drm_sched_job *job,
+ 		       struct drm_sched_entity *entity,
+-		       void *owner)
++		       u32 credits, void *owner)
+ {
+ 	if (!entity->rq) {
+ 		/* This will most likely be followed by missing frames
+@@ -692,7 +798,13 @@ int drm_sched_job_init(struct drm_sched_
+ 		return -ENOENT;
+ 	}
+ 
++	if (unlikely(!credits)) {
++		pr_err("*ERROR* %s: credits cannot be 0!\n", __func__);
++		return -EINVAL;
++	}
++
+ 	job->entity = entity;
++	job->credits = credits;
+ 	job->s_fence = drm_sched_fence_alloc(entity, owner);
+ 	if (!job->s_fence)
+ 		return -ENOMEM;
+@@ -905,20 +1017,9 @@ void drm_sched_job_cleanup(struct drm_sc
+ EXPORT_SYMBOL(drm_sched_job_cleanup);
+ 
+ /**
+- * drm_sched_can_queue -- Can we queue more to the hardware?
+- * @sched: scheduler instance
+- *
+- * Return true if we can push more jobs to the hw, otherwise false.
+- */
+-static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched)
+-{
+-	return atomic_read(&sched->hw_rq_count) <
+-		sched->hw_submission_limit;
+-}
+-
+-/**
+  * drm_sched_wakeup - Wake up the scheduler if it is ready to queue
+  * @sched: scheduler instance
++ * @entity: the scheduler entity
+  *
+  * Wake up the scheduler if we can queue jobs.
+  */
+@@ -926,7 +1027,7 @@ void drm_sched_wakeup(struct drm_gpu_sch
+ 		      struct drm_sched_entity *entity)
+ {
+ 	if (drm_sched_entity_is_ready(entity))
+-		if (drm_sched_can_queue(sched))
++		if (drm_sched_can_queue(sched, entity))
+ 			drm_sched_run_job_queue(sched);
+ }
+ 
+@@ -935,7 +1036,11 @@ void drm_sched_wakeup(struct drm_gpu_sch
+  *
+  * @sched: scheduler instance
+  *
+- * Returns the entity to process or NULL if none are found.
++ * Return an entity to process or NULL if none are found.
++ *
++ * Note, that we break out of the for-loop when "entity" is non-null, which can
++ * also be an error-pointer--this assures we don't process lower priority
++ * run-queues. See comments in the respectively called functions.
+  */
+ static struct drm_sched_entity *
+ drm_sched_select_entity(struct drm_gpu_scheduler *sched)
+@@ -943,19 +1048,16 @@ drm_sched_select_entity(struct drm_gpu_s
+ 	struct drm_sched_entity *entity;
+ 	int i;
+ 
+-	if (!drm_sched_can_queue(sched))
+-		return NULL;
+-
+ 	/* Kernel run queue has higher priority than normal run queue*/
+ 	for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
+ 		entity = drm_sched_policy == DRM_SCHED_POLICY_FIFO ?
+-			drm_sched_rq_select_entity_fifo(sched->sched_rq[i]) :
+-			drm_sched_rq_select_entity_rr(sched->sched_rq[i]);
++			drm_sched_rq_select_entity_fifo(sched, sched->sched_rq[i]) :
++			drm_sched_rq_select_entity_rr(sched, sched->sched_rq[i]);
+ 		if (entity)
+ 			break;
+ 	}
+ 
+-	return entity;
++	return IS_ERR(entity) ? NULL : entity;
+ }
+ 
+ /**
+@@ -1102,7 +1204,7 @@ static void drm_sched_run_job_work(struc
+ 
+ 	s_fence = sched_job->s_fence;
+ 
+-	atomic_inc(&sched->hw_rq_count);
++	atomic_add(sched_job->credits, &sched->credit_count);
+ 	drm_sched_job_begin(sched_job);
+ 
+ 	trace_drm_run_job(sched_job, entity);
+@@ -1137,7 +1239,7 @@ static void drm_sched_run_job_work(struc
+  * @submit_wq: workqueue to use for submission. If NULL, an ordered wq is
+  *	       allocated and used
+  * @num_rqs: number of runqueues, one for each priority, up to DRM_SCHED_PRIORITY_COUNT
+- * @hw_submission: number of hw submissions that can be in flight
++ * @credit_limit: the number of credits this scheduler can hold from all jobs
+  * @hang_limit: number of times to allow a job to hang before dropping it
+  * @timeout: timeout value in jiffies for the scheduler
+  * @timeout_wq: workqueue to use for timeout work. If NULL, the system_wq is
+@@ -1151,14 +1253,14 @@ static void drm_sched_run_job_work(struc
+ int drm_sched_init(struct drm_gpu_scheduler *sched,
+ 		   const struct drm_sched_backend_ops *ops,
+ 		   struct workqueue_struct *submit_wq,
+-		   u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit,
++		   u32 num_rqs, u32 credit_limit, unsigned int hang_limit,
+ 		   long timeout, struct workqueue_struct *timeout_wq,
+ 		   atomic_t *score, const char *name, struct device *dev)
+ {
+ 	int i, ret;
+ 
+ 	sched->ops = ops;
+-	sched->hw_submission_limit = hw_submission;
++	sched->credit_limit = credit_limit;
+ 	sched->name = name;
+ 	sched->timeout = timeout;
+ 	sched->timeout_wq = timeout_wq ? : system_wq;
+@@ -1207,7 +1309,7 @@ int drm_sched_init(struct drm_gpu_schedu
+ 	init_waitqueue_head(&sched->job_scheduled);
+ 	INIT_LIST_HEAD(&sched->pending_list);
+ 	spin_lock_init(&sched->job_list_lock);
+-	atomic_set(&sched->hw_rq_count, 0);
++	atomic_set(&sched->credit_count, 0);
+ 	INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
+ 	INIT_WORK(&sched->work_run_job, drm_sched_run_job_work);
+ 	INIT_WORK(&sched->work_free_job, drm_sched_free_job_work);
+--- a/drivers/gpu/drm/v3d/v3d_gem.c
++++ b/drivers/gpu/drm/v3d/v3d_gem.c
+@@ -417,7 +417,7 @@ v3d_job_init(struct v3d_dev *v3d, struct
+ 	job->free = free;
+ 
+ 	ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
+-				 v3d_priv);
++				 1, v3d_priv);
+ 	if (ret)
+ 		goto fail;
+ 
+--- a/include/drm/gpu_scheduler.h
++++ b/include/drm/gpu_scheduler.h
+@@ -320,6 +320,7 @@ struct drm_sched_fence *to_drm_sched_fen
+  * @sched: the scheduler instance on which this job is scheduled.
+  * @s_fence: contains the fences for the scheduling of job.
+  * @finish_cb: the callback for the finished fence.
++ * @credits: the number of credits this job contributes to the scheduler
+  * @work: Helper to reschdeule job kill to different context.
+  * @id: a unique id assigned to each job scheduled on the scheduler.
+  * @karma: increment on every hang caused by this job. If this exceeds the hang
+@@ -339,6 +340,8 @@ struct drm_sched_job {
+ 	struct drm_gpu_scheduler	*sched;
+ 	struct drm_sched_fence		*s_fence;
+ 
++	u32				credits;
++
+ 	/*
+ 	 * work is used only after finish_cb has been used and will not be
+ 	 * accessed anymore.
+@@ -462,13 +465,27 @@ struct drm_sched_backend_ops {
+          * and it's time to clean it up.
+ 	 */
+ 	void (*free_job)(struct drm_sched_job *sched_job);
++
++	/**
++	 * @update_job_credits: Called when the scheduler is considering this
++	 * job for execution.
++	 *
++	 * This callback returns the number of credits the job would take if
++	 * pushed to the hardware. Drivers may use this to dynamically update
++	 * the job's credit count. For instance, deduct the number of credits
++	 * for already signalled native fences.
++	 *
++	 * This callback is optional.
++	 */
++	u32 (*update_job_credits)(struct drm_sched_job *sched_job);
+ };
+ 
+ /**
+  * struct drm_gpu_scheduler - scheduler instance-specific data
+  *
+  * @ops: backend operations provided by the driver.
+- * @hw_submission_limit: the max size of the hardware queue.
++ * @credit_limit: the credit limit of this scheduler
++ * @credit_count: the current credit count of this scheduler
+  * @timeout: the time after which a job is removed from the scheduler.
+  * @name: name of the ring for which this scheduler is being used.
+  * @num_rqs: Number of run-queues. This is at most DRM_SCHED_PRIORITY_COUNT,
+@@ -477,7 +494,6 @@ struct drm_sched_backend_ops {
+  * @job_scheduled: once @drm_sched_entity_do_release is called the scheduler
+  *                 waits on this wait queue until all the scheduled jobs are
+  *                 finished.
+- * @hw_rq_count: the number of jobs currently in the hardware queue.
+  * @job_id_count: used to assign unique id to the each job.
+  * @submit_wq: workqueue used to queue @work_run_job and @work_free_job
+  * @timeout_wq: workqueue used to queue @work_tdr
+@@ -501,13 +517,13 @@ struct drm_sched_backend_ops {
+  */
+ struct drm_gpu_scheduler {
+ 	const struct drm_sched_backend_ops	*ops;
+-	uint32_t			hw_submission_limit;
++	u32				credit_limit;
++	atomic_t			credit_count;
+ 	long				timeout;
+ 	const char			*name;
+ 	u32                             num_rqs;
+ 	struct drm_sched_rq             **sched_rq;
+ 	wait_queue_head_t		job_scheduled;
+-	atomic_t			hw_rq_count;
+ 	atomic64_t			job_id_count;
+ 	struct workqueue_struct		*submit_wq;
+ 	struct workqueue_struct		*timeout_wq;
+@@ -529,14 +545,14 @@ struct drm_gpu_scheduler {
+ int drm_sched_init(struct drm_gpu_scheduler *sched,
+ 		   const struct drm_sched_backend_ops *ops,
+ 		   struct workqueue_struct *submit_wq,
+-		   u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit,
++		   u32 num_rqs, u32 credit_limit, unsigned int hang_limit,
+ 		   long timeout, struct workqueue_struct *timeout_wq,
+ 		   atomic_t *score, const char *name, struct device *dev);
+ 
+ void drm_sched_fini(struct drm_gpu_scheduler *sched);
+ int drm_sched_job_init(struct drm_sched_job *job,
+ 		       struct drm_sched_entity *entity,
+-		       void *owner);
++		       u32 credits, void *owner);
+ void drm_sched_job_arm(struct drm_sched_job *job);
+ int drm_sched_job_add_dependency(struct drm_sched_job *job,
+ 				 struct dma_fence *fence);
diff --git a/patches-6.6/034-28-v6.8-iommu-Allow-passing-custom-allocators-to-pgtable-drivers.patch b/patches-6.6/034-28-v6.8-iommu-Allow-passing-custom-allocators-to-pgtable-drivers.patch
new file mode 100644
index 0000000..f875da8
--- /dev/null
+++ b/patches-6.6/034-28-v6.8-iommu-Allow-passing-custom-allocators-to-pgtable-drivers.patch
@@ -0,0 +1,129 @@
+From 17b226dcf80ce79d02f4f0b08813d8848885b986 Mon Sep 17 00:00:00 2001
+From: Boris Brezillon <boris.brezillon@collabora.com>
+Date: Fri, 24 Nov 2023 15:24:33 +0100
+Subject: [PATCH] iommu: Allow passing custom allocators to pgtable drivers
+
+This will be useful for GPU drivers who want to keep page tables in a
+pool so they can:
+
+- keep freed page tables in a free pool and speed-up upcoming page
+  table allocations
+- batch page table allocation instead of allocating one page at a time
+- pre-reserve pages for page tables needed for map/unmap operations,
+  to ensure map/unmap operations don't try to allocate memory in paths
+  they're allowed to block or fail
+
+It might also be valuable for other aspects of GPU and similar
+use-cases, like fine-grained memory accounting and resource limiting.
+
+We will extend the Arm LPAE format to support custom allocators in a
+separate commit.
+
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Reviewed-by: Steven Price <steven.price@arm.com>
+Reviewed-by: Robin Murphy <robin.murphy@arm.com>
+Link: https://lore.kernel.org/r/20231124142434.1577550-2-boris.brezillon@collabora.com
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+---
+ drivers/iommu/io-pgtable.c | 23 +++++++++++++++++++++++
+ include/linux/io-pgtable.h | 34 ++++++++++++++++++++++++++++++++++
+ 2 files changed, 57 insertions(+)
+
+--- a/drivers/iommu/io-pgtable.c
++++ b/drivers/iommu/io-pgtable.c
+@@ -34,6 +34,26 @@ io_pgtable_init_table[IO_PGTABLE_NUM_FMT
+ #endif
+ };
+ 
++static int check_custom_allocator(enum io_pgtable_fmt fmt,
++				  struct io_pgtable_cfg *cfg)
++{
++	/* No custom allocator, no need to check the format. */
++	if (!cfg->alloc && !cfg->free)
++		return 0;
++
++	/* When passing a custom allocator, both the alloc and free
++	 * functions should be provided.
++	 */
++	if (!cfg->alloc || !cfg->free)
++		return -EINVAL;
++
++	/* Make sure the format supports custom allocators. */
++	if (io_pgtable_init_table[fmt]->caps & IO_PGTABLE_CAP_CUSTOM_ALLOCATOR)
++		return 0;
++
++	return -EINVAL;
++}
++
+ struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt,
+ 					    struct io_pgtable_cfg *cfg,
+ 					    void *cookie)
+@@ -44,6 +64,9 @@ struct io_pgtable_ops *alloc_io_pgtable_
+ 	if (fmt >= IO_PGTABLE_NUM_FMTS)
+ 		return NULL;
+ 
++	if (check_custom_allocator(fmt, cfg))
++		return NULL;
++
+ 	fns = io_pgtable_init_table[fmt];
+ 	if (!fns)
+ 		return NULL;
+--- a/include/linux/io-pgtable.h
++++ b/include/linux/io-pgtable.h
+@@ -100,6 +100,30 @@ struct io_pgtable_cfg {
+ 	const struct iommu_flush_ops	*tlb;
+ 	struct device			*iommu_dev;
+ 
++	/**
++	 * @alloc: Custom page allocator.
++	 *
++	 * Optional hook used to allocate page tables. If this function is NULL,
++	 * @free must be NULL too.
++	 *
++	 * Memory returned should be zeroed and suitable for dma_map_single() and
++	 * virt_to_phys().
++	 *
++	 * Not all formats support custom page allocators. Before considering
++	 * passing a non-NULL value, make sure the chosen page format supports
++	 * this feature.
++	 */
++	void *(*alloc)(void *cookie, size_t size, gfp_t gfp);
++
++	/**
++	 * @free: Custom page de-allocator.
++	 *
++	 * Optional hook used to free page tables allocated with the @alloc
++	 * hook. Must be non-NULL if @alloc is not NULL, must be NULL
++	 * otherwise.
++	 */
++	void (*free)(void *cookie, void *pages, size_t size);
++
+ 	/* Low-level data specific to the table format */
+ 	union {
+ 		struct {
+@@ -238,15 +262,25 @@ io_pgtable_tlb_add_page(struct io_pgtabl
+ }
+ 
+ /**
++ * enum io_pgtable_caps - IO page table backend capabilities.
++ */
++enum io_pgtable_caps {
++	/** @IO_PGTABLE_CAP_CUSTOM_ALLOCATOR: Backend accepts custom page table allocators. */
++	IO_PGTABLE_CAP_CUSTOM_ALLOCATOR = BIT(0),
++};
++
++/**
+  * struct io_pgtable_init_fns - Alloc/free a set of page tables for a
+  *                              particular format.
+  *
+  * @alloc: Allocate a set of page tables described by cfg.
+  * @free:  Free the page tables associated with iop.
++ * @caps:  Combination of @io_pgtable_caps flags encoding the backend capabilities.
+  */
+ struct io_pgtable_init_fns {
+ 	struct io_pgtable *(*alloc)(struct io_pgtable_cfg *cfg, void *cookie);
+ 	void (*free)(struct io_pgtable *iop);
++	u32 caps;
+ };
+ 
+ extern struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns;
diff --git a/patches-6.6/034-29-v6.9-drm-exec-drm-gpuvm-Prefer-u32-over-uint32_t.patch b/patches-6.6/034-29-v6.9-drm-exec-drm-gpuvm-Prefer-u32-over-uint32_t.patch
new file mode 100644
index 0000000..6917137
--- /dev/null
+++ b/patches-6.6/034-29-v6.9-drm-exec-drm-gpuvm-Prefer-u32-over-uint32_t.patch
@@ -0,0 +1,68 @@
+From cf41cebf9dc8143ca7bb0aabb7e0053e16f0515a Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
+Date: Fri, 19 Jan 2024 10:05:57 +0100
+Subject: [PATCH] drm/exec, drm/gpuvm: Prefer u32 over uint32_t
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The relatively recently introduced drm/exec utility was using uint32_t
+in its interface, which was then also carried over to drm/gpuvm.
+
+Prefer u32 in new code and update drm/exec and drm/gpuvm accordingly.
+
+Cc: Christian König <christian.koenig@amd.com>
+Cc: Danilo Krummrich <dakr@redhat.com>
+Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Reviewed-by: Danilo Krummrich <dakr@redhat.com>
+Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240119090557.6360-1-thomas.hellstrom@linux.intel.com
+---
+ drivers/gpu/drm/drm_exec.c | 2 +-
+ include/drm/drm_exec.h     | 4 ++--
+ include/drm/drm_gpuvm.h    | 2 +-
+ 3 files changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/gpu/drm/drm_exec.c
++++ b/drivers/gpu/drm/drm_exec.c
+@@ -72,7 +72,7 @@ static void drm_exec_unlock_all(struct d
+  *
+  * Initialize the object and make sure that we can track locked objects.
+  */
+-void drm_exec_init(struct drm_exec *exec, uint32_t flags)
++void drm_exec_init(struct drm_exec *exec, u32 flags)
+ {
+ 	exec->flags = flags;
+ 	exec->objects = kmalloc(PAGE_SIZE, GFP_KERNEL);
+--- a/include/drm/drm_exec.h
++++ b/include/drm/drm_exec.h
+@@ -18,7 +18,7 @@ struct drm_exec {
+ 	/**
+ 	 * @flags: Flags to control locking behavior
+ 	 */
+-	uint32_t		flags;
++	u32                     flags;
+ 
+ 	/**
+ 	 * @ticket: WW ticket used for acquiring locks
+@@ -135,7 +135,7 @@ static inline bool drm_exec_is_contended
+ 	return !!exec->contended;
+ }
+ 
+-void drm_exec_init(struct drm_exec *exec, uint32_t flags);
++void drm_exec_init(struct drm_exec *exec, u32 flags);
+ void drm_exec_fini(struct drm_exec *exec);
+ bool drm_exec_cleanup(struct drm_exec *exec);
+ int drm_exec_lock_obj(struct drm_exec *exec, struct drm_gem_object *obj);
+--- a/include/drm/drm_gpuvm.h
++++ b/include/drm/drm_gpuvm.h
+@@ -514,7 +514,7 @@ struct drm_gpuvm_exec {
+ 	/**
+ 	 * @flags: the flags for the struct drm_exec
+ 	 */
+-	uint32_t flags;
++	u32 flags;
+ 
+ 	/**
+ 	 * @vm: the &drm_gpuvm to lock its DMA reservations
diff --git a/patches-6.6/034-30-v6.10-drm-panthor-Add-uAPI.patch b/patches-6.6/034-30-v6.10-drm-panthor-Add-uAPI.patch
new file mode 100644
index 0000000..e6f9bee
--- /dev/null
+++ b/patches-6.6/034-30-v6.10-drm-panthor-Add-uAPI.patch
@@ -0,0 +1,1024 @@
+From 0f25e493a2462dbdd2e34f4e100405380cc0201a Mon Sep 17 00:00:00 2001
+From: Boris Brezillon <boris.brezillon@collabora.com>
+Date: Thu, 29 Feb 2024 17:22:15 +0100
+Subject: [PATCH] drm/panthor: Add uAPI
+
+Panthor follows the lead of other recently submitted drivers with
+ioctls allowing us to support modern Vulkan features, like sparse memory
+binding:
+
+- Pretty standard GEM management ioctls (BO_CREATE and BO_MMAP_OFFSET),
+  with the 'exclusive-VM' bit to speed-up BO reservation on job submission
+- VM management ioctls (VM_CREATE, VM_DESTROY and VM_BIND). The VM_BIND
+  ioctl is loosely based on the Xe model, and can handle both
+  asynchronous and synchronous requests
+- GPU execution context creation/destruction, tiler heap context creation
+  and job submission. Those ioctls reflect how the hardware/scheduler
+  works and are thus driver specific.
+
+We also have a way to expose IO regions, such that the usermode driver
+can directly access specific/well-isolate registers, like the
+LATEST_FLUSH register used to implement cache-flush reduction.
+
+This uAPI intentionally keeps usermode queues out of the scope, which
+explains why doorbell registers and command stream ring-buffers are not
+directly exposed to userspace.
+
+v6:
+- Add Maxime's and Heiko's acks
+
+v5:
+- Fix typo
+- Add Liviu's R-b
+
+v4:
+- Add a VM_GET_STATE ioctl
+- Fix doc
+- Expose the CORE_FEATURES register so we can deal with variants in the
+  UMD
+- Add Steve's R-b
+
+v3:
+- Add the concept of sync-only VM operation
+- Fix support for 32-bit userspace
+- Rework drm_panthor_vm_create to pass the user VA size instead of
+  the kernel VA size (suggested by Robin Murphy)
+- Typo fixes
+- Explicitly cast enums with top bit set to avoid compiler warnings in
+  -pedantic mode.
+- Drop property core_group_count as it can be easily calculated by the
+  number of bits set in l2_present.
+
+Co-developed-by: Steven Price <steven.price@arm.com>
+Signed-off-by: Steven Price <steven.price@arm.com>
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Reviewed-by: Steven Price <steven.price@arm.com>
+Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
+Acked-by: Maxime Ripard <mripard@kernel.org>
+Acked-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-2-boris.brezillon@collabora.com
+---
+ Documentation/gpu/driver-uapi.rst |   5 +
+ include/uapi/drm/panthor_drm.h    | 945 ++++++++++++++++++++++++++++++
+ 2 files changed, 950 insertions(+)
+ create mode 100644 include/uapi/drm/panthor_drm.h
+
+--- a/Documentation/gpu/driver-uapi.rst
++++ b/Documentation/gpu/driver-uapi.rst
+@@ -17,3 +17,8 @@ VM_BIND / EXEC uAPI
+     :doc: Overview
+ 
+ .. kernel-doc:: include/uapi/drm/nouveau_drm.h
++
++drm/panthor uAPI
++================
++
++.. kernel-doc:: include/uapi/drm/panthor_drm.h
+--- /dev/null
++++ b/include/uapi/drm/panthor_drm.h
+@@ -0,0 +1,945 @@
++/* SPDX-License-Identifier: MIT */
++/* Copyright (C) 2023 Collabora ltd. */
++#ifndef _PANTHOR_DRM_H_
++#define _PANTHOR_DRM_H_
++
++#include "drm.h"
++
++#if defined(__cplusplus)
++extern "C" {
++#endif
++
++/**
++ * DOC: Introduction
++ *
++ * This documentation describes the Panthor IOCTLs.
++ *
++ * Just a few generic rules about the data passed to the Panthor IOCTLs:
++ *
++ * - Structures must be aligned on 64-bit/8-byte. If the object is not
++ *   naturally aligned, a padding field must be added.
++ * - Fields must be explicitly aligned to their natural type alignment with
++ *   pad[0..N] fields.
++ * - All padding fields will be checked by the driver to make sure they are
++ *   zeroed.
++ * - Flags can be added, but not removed/replaced.
++ * - New fields can be added to the main structures (the structures
++ *   directly passed to the ioctl). Those fields can be added at the end of
++ *   the structure, or replace existing padding fields. Any new field being
++ *   added must preserve the behavior that existed before those fields were
++ *   added when a value of zero is passed.
++ * - New fields can be added to indirect objects (objects pointed by the
++ *   main structure), iff those objects are passed a size to reflect the
++ *   size known by the userspace driver (see drm_panthor_obj_array::stride
++ *   or drm_panthor_dev_query::size).
++ * - If the kernel driver is too old to know some fields, those will be
++ *   ignored if zero, and otherwise rejected (and so will be zero on output).
++ * - If userspace is too old to know some fields, those will be zeroed
++ *   (input) before the structure is parsed by the kernel driver.
++ * - Each new flag/field addition must come with a driver version update so
++ *   the userspace driver doesn't have to trial and error to know which
++ *   flags are supported.
++ * - Structures should not contain unions, as this would defeat the
++ *   extensibility of such structures.
++ * - IOCTLs can't be removed or replaced. New IOCTL IDs should be placed
++ *   at the end of the drm_panthor_ioctl_id enum.
++ */
++
++/**
++ * DOC: MMIO regions exposed to userspace.
++ *
++ * .. c:macro:: DRM_PANTHOR_USER_MMIO_OFFSET
++ *
++ * File offset for all MMIO regions being exposed to userspace. Don't use
++ * this value directly, use DRM_PANTHOR_USER_<name>_OFFSET values instead.
++ * pgoffset passed to mmap2() is an unsigned long, which forces us to use a
++ * different offset on 32-bit and 64-bit systems.
++ *
++ * .. c:macro:: DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET
++ *
++ * File offset for the LATEST_FLUSH_ID register. The Userspace driver controls
++ * GPU cache flushing through CS instructions, but the flush reduction
++ * mechanism requires a flush_id. This flush_id could be queried with an
++ * ioctl, but Arm provides a well-isolated register page containing only this
++ * read-only register, so let's expose this page through a static mmap offset
++ * and allow direct mapping of this MMIO region so we can avoid the
++ * user <-> kernel round-trip.
++ */
++#define DRM_PANTHOR_USER_MMIO_OFFSET_32BIT	(1ull << 43)
++#define DRM_PANTHOR_USER_MMIO_OFFSET_64BIT	(1ull << 56)
++#define DRM_PANTHOR_USER_MMIO_OFFSET		(sizeof(unsigned long) < 8 ? \
++						 DRM_PANTHOR_USER_MMIO_OFFSET_32BIT : \
++						 DRM_PANTHOR_USER_MMIO_OFFSET_64BIT)
++#define DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET	(DRM_PANTHOR_USER_MMIO_OFFSET | 0)
++
++/**
++ * DOC: IOCTL IDs
++ *
++ * enum drm_panthor_ioctl_id - IOCTL IDs
++ *
++ * Place new ioctls at the end, don't re-order, don't replace or remove entries.
++ *
++ * These IDs are not meant to be used directly. Use the DRM_IOCTL_PANTHOR_xxx
++ * definitions instead.
++ */
++enum drm_panthor_ioctl_id {
++	/** @DRM_PANTHOR_DEV_QUERY: Query device information. */
++	DRM_PANTHOR_DEV_QUERY = 0,
++
++	/** @DRM_PANTHOR_VM_CREATE: Create a VM. */
++	DRM_PANTHOR_VM_CREATE,
++
++	/** @DRM_PANTHOR_VM_DESTROY: Destroy a VM. */
++	DRM_PANTHOR_VM_DESTROY,
++
++	/** @DRM_PANTHOR_VM_BIND: Bind/unbind memory to a VM. */
++	DRM_PANTHOR_VM_BIND,
++
++	/** @DRM_PANTHOR_VM_GET_STATE: Get VM state. */
++	DRM_PANTHOR_VM_GET_STATE,
++
++	/** @DRM_PANTHOR_BO_CREATE: Create a buffer object. */
++	DRM_PANTHOR_BO_CREATE,
++
++	/**
++	 * @DRM_PANTHOR_BO_MMAP_OFFSET: Get the file offset to pass to
++	 * mmap to map a GEM object.
++	 */
++	DRM_PANTHOR_BO_MMAP_OFFSET,
++
++	/** @DRM_PANTHOR_GROUP_CREATE: Create a scheduling group. */
++	DRM_PANTHOR_GROUP_CREATE,
++
++	/** @DRM_PANTHOR_GROUP_DESTROY: Destroy a scheduling group. */
++	DRM_PANTHOR_GROUP_DESTROY,
++
++	/**
++	 * @DRM_PANTHOR_GROUP_SUBMIT: Submit jobs to queues belonging
++	 * to a specific scheduling group.
++	 */
++	DRM_PANTHOR_GROUP_SUBMIT,
++
++	/** @DRM_PANTHOR_GROUP_GET_STATE: Get the state of a scheduling group. */
++	DRM_PANTHOR_GROUP_GET_STATE,
++
++	/** @DRM_PANTHOR_TILER_HEAP_CREATE: Create a tiler heap. */
++	DRM_PANTHOR_TILER_HEAP_CREATE,
++
++	/** @DRM_PANTHOR_TILER_HEAP_DESTROY: Destroy a tiler heap. */
++	DRM_PANTHOR_TILER_HEAP_DESTROY,
++};
++
++/**
++ * DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number
++ * @__access: Access type. Must be R, W or RW.
++ * @__id: One of the DRM_PANTHOR_xxx id.
++ * @__type: Suffix of the type being passed to the IOCTL.
++ *
++ * Don't use this macro directly, use the DRM_IOCTL_PANTHOR_xxx
++ * values instead.
++ *
++ * Return: An IOCTL number to be passed to ioctl() from userspace.
++ */
++#define DRM_IOCTL_PANTHOR(__access, __id, __type) \
++	DRM_IO ## __access(DRM_COMMAND_BASE + DRM_PANTHOR_ ## __id, \
++			   struct drm_panthor_ ## __type)
++
++#define DRM_IOCTL_PANTHOR_DEV_QUERY \
++	DRM_IOCTL_PANTHOR(WR, DEV_QUERY, dev_query)
++#define DRM_IOCTL_PANTHOR_VM_CREATE \
++	DRM_IOCTL_PANTHOR(WR, VM_CREATE, vm_create)
++#define DRM_IOCTL_PANTHOR_VM_DESTROY \
++	DRM_IOCTL_PANTHOR(WR, VM_DESTROY, vm_destroy)
++#define DRM_IOCTL_PANTHOR_VM_BIND \
++	DRM_IOCTL_PANTHOR(WR, VM_BIND, vm_bind)
++#define DRM_IOCTL_PANTHOR_VM_GET_STATE \
++	DRM_IOCTL_PANTHOR(WR, VM_GET_STATE, vm_get_state)
++#define DRM_IOCTL_PANTHOR_BO_CREATE \
++	DRM_IOCTL_PANTHOR(WR, BO_CREATE, bo_create)
++#define DRM_IOCTL_PANTHOR_BO_MMAP_OFFSET \
++	DRM_IOCTL_PANTHOR(WR, BO_MMAP_OFFSET, bo_mmap_offset)
++#define DRM_IOCTL_PANTHOR_GROUP_CREATE \
++	DRM_IOCTL_PANTHOR(WR, GROUP_CREATE, group_create)
++#define DRM_IOCTL_PANTHOR_GROUP_DESTROY \
++	DRM_IOCTL_PANTHOR(WR, GROUP_DESTROY, group_destroy)
++#define DRM_IOCTL_PANTHOR_GROUP_SUBMIT \
++	DRM_IOCTL_PANTHOR(WR, GROUP_SUBMIT, group_submit)
++#define DRM_IOCTL_PANTHOR_GROUP_GET_STATE \
++	DRM_IOCTL_PANTHOR(WR, GROUP_GET_STATE, group_get_state)
++#define DRM_IOCTL_PANTHOR_TILER_HEAP_CREATE \
++	DRM_IOCTL_PANTHOR(WR, TILER_HEAP_CREATE, tiler_heap_create)
++#define DRM_IOCTL_PANTHOR_TILER_HEAP_DESTROY \
++	DRM_IOCTL_PANTHOR(WR, TILER_HEAP_DESTROY, tiler_heap_destroy)
++
++/**
++ * DOC: IOCTL arguments
++ */
++
++/**
++ * struct drm_panthor_obj_array - Object array.
++ *
++ * This object is used to pass an array of objects whose size is subject to changes in
++ * future versions of the driver. In order to support this mutability, we pass a stride
++ * describing the size of the object as known by userspace.
++ *
++ * You shouldn't fill drm_panthor_obj_array fields directly. You should instead use
++ * the DRM_PANTHOR_OBJ_ARRAY() macro that takes care of initializing the stride to
++ * the object size.
++ */
++struct drm_panthor_obj_array {
++	/** @stride: Stride of object struct. Used for versioning. */
++	__u32 stride;
++
++	/** @count: Number of objects in the array. */
++	__u32 count;
++
++	/** @array: User pointer to an array of objects. */
++	__u64 array;
++};
++
++/**
++ * DRM_PANTHOR_OBJ_ARRAY() - Initialize a drm_panthor_obj_array field.
++ * @cnt: Number of elements in the array.
++ * @ptr: Pointer to the array to pass to the kernel.
++ *
++ * Macro initializing a drm_panthor_obj_array based on the object size as known
++ * by userspace.
++ */
++#define DRM_PANTHOR_OBJ_ARRAY(cnt, ptr) \
++	{ .stride = sizeof((ptr)[0]), .count = (cnt), .array = (__u64)(uintptr_t)(ptr) }
++
++/**
++ * enum drm_panthor_sync_op_flags - Synchronization operation flags.
++ */
++enum drm_panthor_sync_op_flags {
++	/** @DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK: Synchronization handle type mask. */
++	DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK = 0xff,
++
++	/** @DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ: Synchronization object type. */
++	DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ = 0,
++
++	/**
++	 * @DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ: Timeline synchronization
++	 * object type.
++	 */
++	DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ = 1,
++
++	/** @DRM_PANTHOR_SYNC_OP_WAIT: Wait operation. */
++	DRM_PANTHOR_SYNC_OP_WAIT = 0 << 31,
++
++	/** @DRM_PANTHOR_SYNC_OP_SIGNAL: Signal operation. */
++	DRM_PANTHOR_SYNC_OP_SIGNAL = (int)(1u << 31),
++};
++
++/**
++ * struct drm_panthor_sync_op - Synchronization operation.
++ */
++struct drm_panthor_sync_op {
++	/** @flags: Synchronization operation flags. Combination of DRM_PANTHOR_SYNC_OP values. */
++	__u32 flags;
++
++	/** @handle: Sync handle. */
++	__u32 handle;
++
++	/**
++	 * @timeline_value: MBZ if
++	 * (flags & DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK) !=
++	 * DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ.
++	 */
++	__u64 timeline_value;
++};
++
++/**
++ * enum drm_panthor_dev_query_type - Query type
++ *
++ * Place new types at the end, don't re-order, don't remove or replace.
++ */
++enum drm_panthor_dev_query_type {
++	/** @DRM_PANTHOR_DEV_QUERY_GPU_INFO: Query GPU information. */
++	DRM_PANTHOR_DEV_QUERY_GPU_INFO = 0,
++
++	/** @DRM_PANTHOR_DEV_QUERY_CSIF_INFO: Query command-stream interface information. */
++	DRM_PANTHOR_DEV_QUERY_CSIF_INFO,
++};
++
++/**
++ * struct drm_panthor_gpu_info - GPU information
++ *
++ * Structure grouping all queryable information relating to the GPU.
++ */
++struct drm_panthor_gpu_info {
++	/** @gpu_id : GPU ID. */
++	__u32 gpu_id;
++#define DRM_PANTHOR_ARCH_MAJOR(x)		((x) >> 28)
++#define DRM_PANTHOR_ARCH_MINOR(x)		(((x) >> 24) & 0xf)
++#define DRM_PANTHOR_ARCH_REV(x)			(((x) >> 20) & 0xf)
++#define DRM_PANTHOR_PRODUCT_MAJOR(x)		(((x) >> 16) & 0xf)
++#define DRM_PANTHOR_VERSION_MAJOR(x)		(((x) >> 12) & 0xf)
++#define DRM_PANTHOR_VERSION_MINOR(x)		(((x) >> 4) & 0xff)
++#define DRM_PANTHOR_VERSION_STATUS(x)		((x) & 0xf)
++
++	/** @gpu_rev: GPU revision. */
++	__u32 gpu_rev;
++
++	/** @csf_id: Command stream frontend ID. */
++	__u32 csf_id;
++#define DRM_PANTHOR_CSHW_MAJOR(x)		(((x) >> 26) & 0x3f)
++#define DRM_PANTHOR_CSHW_MINOR(x)		(((x) >> 20) & 0x3f)
++#define DRM_PANTHOR_CSHW_REV(x)			(((x) >> 16) & 0xf)
++#define DRM_PANTHOR_MCU_MAJOR(x)		(((x) >> 10) & 0x3f)
++#define DRM_PANTHOR_MCU_MINOR(x)		(((x) >> 4) & 0x3f)
++#define DRM_PANTHOR_MCU_REV(x)			((x) & 0xf)
++
++	/** @l2_features: L2-cache features. */
++	__u32 l2_features;
++
++	/** @tiler_features: Tiler features. */
++	__u32 tiler_features;
++
++	/** @mem_features: Memory features. */
++	__u32 mem_features;
++
++	/** @mmu_features: MMU features. */
++	__u32 mmu_features;
++#define DRM_PANTHOR_MMU_VA_BITS(x)		((x) & 0xff)
++
++	/** @thread_features: Thread features. */
++	__u32 thread_features;
++
++	/** @max_threads: Maximum number of threads. */
++	__u32 max_threads;
++
++	/** @thread_max_workgroup_size: Maximum workgroup size. */
++	__u32 thread_max_workgroup_size;
++
++	/**
++	 * @thread_max_barrier_size: Maximum number of threads that can wait
++	 * simultaneously on a barrier.
++	 */
++	__u32 thread_max_barrier_size;
++
++	/** @coherency_features: Coherency features. */
++	__u32 coherency_features;
++
++	/** @texture_features: Texture features. */
++	__u32 texture_features[4];
++
++	/** @as_present: Bitmask encoding the number of address-space exposed by the MMU. */
++	__u32 as_present;
++
++	/** @shader_present: Bitmask encoding the shader cores exposed by the GPU. */
++	__u64 shader_present;
++
++	/** @l2_present: Bitmask encoding the L2 caches exposed by the GPU. */
++	__u64 l2_present;
++
++	/** @tiler_present: Bitmask encoding the tiler units exposed by the GPU. */
++	__u64 tiler_present;
++
++	/* @core_features: Used to discriminate core variants when they exist. */
++	__u32 core_features;
++
++	/* @pad: MBZ. */
++	__u32 pad;
++};
++
++/**
++ * struct drm_panthor_csif_info - Command stream interface information
++ *
++ * Structure grouping all queryable information relating to the command stream interface.
++ */
++struct drm_panthor_csif_info {
++	/** @csg_slot_count: Number of command stream group slots exposed by the firmware. */
++	__u32 csg_slot_count;
++
++	/** @cs_slot_count: Number of command stream slots per group. */
++	__u32 cs_slot_count;
++
++	/** @cs_reg_count: Number of command stream registers. */
++	__u32 cs_reg_count;
++
++	/** @scoreboard_slot_count: Number of scoreboard slots. */
++	__u32 scoreboard_slot_count;
++
++	/**
++	 * @unpreserved_cs_reg_count: Number of command stream registers reserved by
++	 * the kernel driver to call a userspace command stream.
++	 *
++	 * All registers can be used by a userspace command stream, but the
++	 * [cs_slot_count - unpreserved_cs_reg_count .. cs_slot_count] registers are
++	 * used by the kernel when DRM_PANTHOR_IOCTL_GROUP_SUBMIT is called.
++	 */
++	__u32 unpreserved_cs_reg_count;
++
++	/**
++	 * @pad: Padding field, set to zero.
++	 */
++	__u32 pad;
++};
++
++/**
++ * struct drm_panthor_dev_query - Arguments passed to DRM_PANTHOR_IOCTL_DEV_QUERY
++ */
++struct drm_panthor_dev_query {
++	/** @type: the query type (see drm_panthor_dev_query_type). */
++	__u32 type;
++
++	/**
++	 * @size: size of the type being queried.
++	 *
++	 * If pointer is NULL, size is updated by the driver to provide the
++	 * output structure size. If pointer is not NULL, the driver will
++	 * only copy min(size, actual_structure_size) bytes to the pointer,
++	 * and update the size accordingly. This allows us to extend query
++	 * types without breaking userspace.
++	 */
++	__u32 size;
++
++	/**
++	 * @pointer: user pointer to a query type struct.
++	 *
++	 * Pointer can be NULL, in which case, nothing is copied, but the
++	 * actual structure size is returned. If not NULL, it must point to
++	 * a location that's large enough to hold size bytes.
++	 */
++	__u64 pointer;
++};
++
++/**
++ * struct drm_panthor_vm_create - Arguments passed to DRM_PANTHOR_IOCTL_VM_CREATE
++ */
++struct drm_panthor_vm_create {
++	/** @flags: VM flags, MBZ. */
++	__u32 flags;
++
++	/** @id: Returned VM ID. */
++	__u32 id;
++
++	/**
++	 * @user_va_range: Size of the VA space reserved for user objects.
++	 *
++	 * The kernel will pick the remaining space to map kernel-only objects to the
++	 * VM (heap chunks, heap context, ring buffers, kernel synchronization objects,
++	 * ...). If the space left for kernel objects is too small, kernel object
++	 * allocation will fail further down the road. One can use
++	 * drm_panthor_gpu_info::mmu_features to extract the total virtual address
++	 * range, and chose a user_va_range that leaves some space to the kernel.
++	 *
++	 * If user_va_range is zero, the kernel will pick a sensible value based on
++	 * TASK_SIZE and the virtual range supported by the GPU MMU (the kernel/user
++	 * split should leave enough VA space for userspace processes to support SVM,
++	 * while still allowing the kernel to map some amount of kernel objects in
++	 * the kernel VA range). The value chosen by the driver will be returned in
++	 * @user_va_range.
++	 *
++	 * User VA space always starts at 0x0, kernel VA space is always placed after
++	 * the user VA range.
++	 */
++	__u64 user_va_range;
++};
++
++/**
++ * struct drm_panthor_vm_destroy - Arguments passed to DRM_PANTHOR_IOCTL_VM_DESTROY
++ */
++struct drm_panthor_vm_destroy {
++	/** @id: ID of the VM to destroy. */
++	__u32 id;
++
++	/** @pad: MBZ. */
++	__u32 pad;
++};
++
++/**
++ * enum drm_panthor_vm_bind_op_flags - VM bind operation flags
++ */
++enum drm_panthor_vm_bind_op_flags {
++	/**
++	 * @DRM_PANTHOR_VM_BIND_OP_MAP_READONLY: Map the memory read-only.
++	 *
++	 * Only valid with DRM_PANTHOR_VM_BIND_OP_TYPE_MAP.
++	 */
++	DRM_PANTHOR_VM_BIND_OP_MAP_READONLY = 1 << 0,
++
++	/**
++	 * @DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC: Map the memory not-executable.
++	 *
++	 * Only valid with DRM_PANTHOR_VM_BIND_OP_TYPE_MAP.
++	 */
++	DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC = 1 << 1,
++
++	/**
++	 * @DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED: Map the memory uncached.
++	 *
++	 * Only valid with DRM_PANTHOR_VM_BIND_OP_TYPE_MAP.
++	 */
++	DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED = 1 << 2,
++
++	/**
++	 * @DRM_PANTHOR_VM_BIND_OP_TYPE_MASK: Mask used to determine the type of operation.
++	 */
++	DRM_PANTHOR_VM_BIND_OP_TYPE_MASK = (int)(0xfu << 28),
++
++	/** @DRM_PANTHOR_VM_BIND_OP_TYPE_MAP: Map operation. */
++	DRM_PANTHOR_VM_BIND_OP_TYPE_MAP = 0 << 28,
++
++	/** @DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP: Unmap operation. */
++	DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP = 1 << 28,
++
++	/**
++	 * @DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY: No VM operation.
++	 *
++	 * Just serves as a synchronization point on a VM queue.
++	 *
++	 * Only valid if %DRM_PANTHOR_VM_BIND_ASYNC is set in drm_panthor_vm_bind::flags,
++	 * and drm_panthor_vm_bind_op::syncs contains at least one element.
++	 */
++	DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY = 2 << 28,
++};
++
++/**
++ * struct drm_panthor_vm_bind_op - VM bind operation
++ */
++struct drm_panthor_vm_bind_op {
++	/** @flags: Combination of drm_panthor_vm_bind_op_flags flags. */
++	__u32 flags;
++
++	/**
++	 * @bo_handle: Handle of the buffer object to map.
++	 * MBZ for unmap or sync-only operations.
++	 */
++	__u32 bo_handle;
++
++	/**
++	 * @bo_offset: Buffer object offset.
++	 * MBZ for unmap or sync-only operations.
++	 */
++	__u64 bo_offset;
++
++	/**
++	 * @va: Virtual address to map/unmap.
++	 * MBZ for sync-only operations.
++	 */
++	__u64 va;
++
++	/**
++	 * @size: Size to map/unmap.
++	 * MBZ for sync-only operations.
++	 */
++	__u64 size;
++
++	/**
++	 * @syncs: Array of struct drm_panthor_sync_op synchronization
++	 * operations.
++	 *
++	 * This array must be empty if %DRM_PANTHOR_VM_BIND_ASYNC is not set on
++	 * the drm_panthor_vm_bind object containing this VM bind operation.
++	 *
++	 * This array shall not be empty for sync-only operations.
++	 */
++	struct drm_panthor_obj_array syncs;
++
++};
++
++/**
++ * enum drm_panthor_vm_bind_flags - VM bind flags
++ */
++enum drm_panthor_vm_bind_flags {
++	/**
++	 * @DRM_PANTHOR_VM_BIND_ASYNC: VM bind operations are queued to the VM
++	 * queue instead of being executed synchronously.
++	 */
++	DRM_PANTHOR_VM_BIND_ASYNC = 1 << 0,
++};
++
++/**
++ * struct drm_panthor_vm_bind - Arguments passed to DRM_IOCTL_PANTHOR_VM_BIND
++ */
++struct drm_panthor_vm_bind {
++	/** @vm_id: VM targeted by the bind request. */
++	__u32 vm_id;
++
++	/** @flags: Combination of drm_panthor_vm_bind_flags flags. */
++	__u32 flags;
++
++	/** @ops: Array of struct drm_panthor_vm_bind_op bind operations. */
++	struct drm_panthor_obj_array ops;
++};
++
++/**
++ * enum drm_panthor_vm_state - VM states.
++ */
++enum drm_panthor_vm_state {
++	/**
++	 * @DRM_PANTHOR_VM_STATE_USABLE: VM is usable.
++	 *
++	 * New VM operations will be accepted on this VM.
++	 */
++	DRM_PANTHOR_VM_STATE_USABLE,
++
++	/**
++	 * @DRM_PANTHOR_VM_STATE_UNUSABLE: VM is unusable.
++	 *
++	 * Something put the VM in an unusable state (like an asynchronous
++	 * VM_BIND request failing for any reason).
++	 *
++	 * Once the VM is in this state, all new MAP operations will be
++	 * rejected, and any GPU job targeting this VM will fail.
++	 * UNMAP operations are still accepted.
++	 *
++	 * The only way to recover from an unusable VM is to create a new
++	 * VM, and destroy the old one.
++	 */
++	DRM_PANTHOR_VM_STATE_UNUSABLE,
++};
++
++/**
++ * struct drm_panthor_vm_get_state - Get VM state.
++ */
++struct drm_panthor_vm_get_state {
++	/** @vm_id: VM targeted by the get_state request. */
++	__u32 vm_id;
++
++	/**
++	 * @state: state returned by the driver.
++	 *
++	 * Must be one of the enum drm_panthor_vm_state values.
++	 */
++	__u32 state;
++};
++
++/**
++ * enum drm_panthor_bo_flags - Buffer object flags, passed at creation time.
++ */
++enum drm_panthor_bo_flags {
++	/** @DRM_PANTHOR_BO_NO_MMAP: The buffer object will never be CPU-mapped in userspace. */
++	DRM_PANTHOR_BO_NO_MMAP = (1 << 0),
++};
++
++/**
++ * struct drm_panthor_bo_create - Arguments passed to DRM_IOCTL_PANTHOR_BO_CREATE.
++ */
++struct drm_panthor_bo_create {
++	/**
++	 * @size: Requested size for the object
++	 *
++	 * The (page-aligned) allocated size for the object will be returned.
++	 */
++	__u64 size;
++
++	/**
++	 * @flags: Flags. Must be a combination of drm_panthor_bo_flags flags.
++	 */
++	__u32 flags;
++
++	/**
++	 * @exclusive_vm_id: Exclusive VM this buffer object will be mapped to.
++	 *
++	 * If not zero, the field must refer to a valid VM ID, and implies that:
++	 *  - the buffer object will only ever be bound to that VM
++	 *  - cannot be exported as a PRIME fd
++	 */
++	__u32 exclusive_vm_id;
++
++	/**
++	 * @handle: Returned handle for the object.
++	 *
++	 * Object handles are nonzero.
++	 */
++	__u32 handle;
++
++	/** @pad: MBZ. */
++	__u32 pad;
++};
++
++/**
++ * struct drm_panthor_bo_mmap_offset - Arguments passed to DRM_IOCTL_PANTHOR_BO_MMAP_OFFSET.
++ */
++struct drm_panthor_bo_mmap_offset {
++	/** @handle: Handle of the object we want an mmap offset for. */
++	__u32 handle;
++
++	/** @pad: MBZ. */
++	__u32 pad;
++
++	/** @offset: The fake offset to use for subsequent mmap calls. */
++	__u64 offset;
++};
++
++/**
++ * struct drm_panthor_queue_create - Queue creation arguments.
++ */
++struct drm_panthor_queue_create {
++	/**
++	 * @priority: Defines the priority of queues inside a group. Goes from 0 to 15,
++	 * 15 being the highest priority.
++	 */
++	__u8 priority;
++
++	/** @pad: Padding fields, MBZ. */
++	__u8 pad[3];
++
++	/** @ringbuf_size: Size of the ring buffer to allocate to this queue. */
++	__u32 ringbuf_size;
++};
++
++/**
++ * enum drm_panthor_group_priority - Scheduling group priority
++ */
++enum drm_panthor_group_priority {
++	/** @PANTHOR_GROUP_PRIORITY_LOW: Low priority group. */
++	PANTHOR_GROUP_PRIORITY_LOW = 0,
++
++	/** @PANTHOR_GROUP_PRIORITY_MEDIUM: Medium priority group. */
++	PANTHOR_GROUP_PRIORITY_MEDIUM,
++
++	/** @PANTHOR_GROUP_PRIORITY_HIGH: High priority group. */
++	PANTHOR_GROUP_PRIORITY_HIGH,
++};
++
++/**
++ * struct drm_panthor_group_create - Arguments passed to DRM_IOCTL_PANTHOR_GROUP_CREATE
++ */
++struct drm_panthor_group_create {
++	/** @queues: Array of drm_panthor_queue_create elements. */
++	struct drm_panthor_obj_array queues;
++
++	/**
++	 * @max_compute_cores: Maximum number of cores that can be used by compute
++	 * jobs across CS queues bound to this group.
++	 *
++	 * Must be less or equal to the number of bits set in @compute_core_mask.
++	 */
++	__u8 max_compute_cores;
++
++	/**
++	 * @max_fragment_cores: Maximum number of cores that can be used by fragment
++	 * jobs across CS queues bound to this group.
++	 *
++	 * Must be less or equal to the number of bits set in @fragment_core_mask.
++	 */
++	__u8 max_fragment_cores;
++
++	/**
++	 * @max_tiler_cores: Maximum number of tilers that can be used by tiler jobs
++	 * across CS queues bound to this group.
++	 *
++	 * Must be less or equal to the number of bits set in @tiler_core_mask.
++	 */
++	__u8 max_tiler_cores;
++
++	/** @priority: Group priority (see enum drm_panthor_group_priority). */
++	__u8 priority;
++
++	/** @pad: Padding field, MBZ. */
++	__u32 pad;
++
++	/**
++	 * @compute_core_mask: Mask encoding cores that can be used for compute jobs.
++	 *
++	 * This field must have at least @max_compute_cores bits set.
++	 *
++	 * The bits set here should also be set in drm_panthor_gpu_info::shader_present.
++	 */
++	__u64 compute_core_mask;
++
++	/**
++	 * @fragment_core_mask: Mask encoding cores that can be used for fragment jobs.
++	 *
++	 * This field must have at least @max_fragment_cores bits set.
++	 *
++	 * The bits set here should also be set in drm_panthor_gpu_info::shader_present.
++	 */
++	__u64 fragment_core_mask;
++
++	/**
++	 * @tiler_core_mask: Mask encoding cores that can be used for tiler jobs.
++	 *
++	 * This field must have at least @max_tiler_cores bits set.
++	 *
++	 * The bits set here should also be set in drm_panthor_gpu_info::tiler_present.
++	 */
++	__u64 tiler_core_mask;
++
++	/**
++	 * @vm_id: VM ID to bind this group to.
++	 *
++	 * All submission to queues bound to this group will use this VM.
++	 */
++	__u32 vm_id;
++
++	/**
++	 * @group_handle: Returned group handle. Passed back when submitting jobs or
++	 * destroying a group.
++	 */
++	__u32 group_handle;
++};
++
++/**
++ * struct drm_panthor_group_destroy - Arguments passed to DRM_IOCTL_PANTHOR_GROUP_DESTROY
++ */
++struct drm_panthor_group_destroy {
++	/** @group_handle: Group to destroy */
++	__u32 group_handle;
++
++	/** @pad: Padding field, MBZ. */
++	__u32 pad;
++};
++
++/**
++ * struct drm_panthor_queue_submit - Job submission arguments.
++ *
++ * This is describing the userspace command stream to call from the kernel
++ * command stream ring-buffer. Queue submission is always part of a group
++ * submission, taking one or more jobs to submit to the underlying queues.
++ */
++struct drm_panthor_queue_submit {
++	/** @queue_index: Index of the queue inside a group. */
++	__u32 queue_index;
++
++	/**
++	 * @stream_size: Size of the command stream to execute.
++	 *
++	 * Must be 64-bit/8-byte aligned (the size of a CS instruction)
++	 *
++	 * Can be zero if stream_addr is zero too.
++	 */
++	__u32 stream_size;
++
++	/**
++	 * @stream_addr: GPU address of the command stream to execute.
++	 *
++	 * Must be aligned on 64-byte.
++	 *
++	 * Can be zero is stream_size is zero too.
++	 */
++	__u64 stream_addr;
++
++	/**
++	 * @latest_flush: FLUSH_ID read at the time the stream was built.
++	 *
++	 * This allows cache flush elimination for the automatic
++	 * flush+invalidate(all) done at submission time, which is needed to
++	 * ensure the GPU doesn't get garbage when reading the indirect command
++	 * stream buffers. If you want the cache flush to happen
++	 * unconditionally, pass a zero here.
++	 */
++	__u32 latest_flush;
++
++	/** @pad: MBZ. */
++	__u32 pad;
++
++	/** @syncs: Array of struct drm_panthor_sync_op sync operations. */
++	struct drm_panthor_obj_array syncs;
++};
++
++/**
++ * struct drm_panthor_group_submit - Arguments passed to DRM_IOCTL_PANTHOR_GROUP_SUBMIT
++ */
++struct drm_panthor_group_submit {
++	/** @group_handle: Handle of the group to queue jobs to. */
++	__u32 group_handle;
++
++	/** @pad: MBZ. */
++	__u32 pad;
++
++	/** @queue_submits: Array of drm_panthor_queue_submit objects. */
++	struct drm_panthor_obj_array queue_submits;
++};
++
++/**
++ * enum drm_panthor_group_state_flags - Group state flags
++ */
++enum drm_panthor_group_state_flags {
++	/**
++	 * @DRM_PANTHOR_GROUP_STATE_TIMEDOUT: Group had unfinished jobs.
++	 *
++	 * When a group ends up with this flag set, no jobs can be submitted to its queues.
++	 */
++	DRM_PANTHOR_GROUP_STATE_TIMEDOUT = 1 << 0,
++
++	/**
++	 * @DRM_PANTHOR_GROUP_STATE_FATAL_FAULT: Group had fatal faults.
++	 *
++	 * When a group ends up with this flag set, no jobs can be submitted to its queues.
++	 */
++	DRM_PANTHOR_GROUP_STATE_FATAL_FAULT = 1 << 1,
++};
++
++/**
++ * struct drm_panthor_group_get_state - Arguments passed to DRM_IOCTL_PANTHOR_GROUP_GET_STATE
++ *
++ * Used to query the state of a group and decide whether a new group should be created to
++ * replace it.
++ */
++struct drm_panthor_group_get_state {
++	/** @group_handle: Handle of the group to query state on */
++	__u32 group_handle;
++
++	/**
++	 * @state: Combination of DRM_PANTHOR_GROUP_STATE_* flags encoding the
++	 * group state.
++	 */
++	__u32 state;
++
++	/** @fatal_queues: Bitmask of queues that faced fatal faults. */
++	__u32 fatal_queues;
++
++	/** @pad: MBZ */
++	__u32 pad;
++};
++
++/**
++ * struct drm_panthor_tiler_heap_create - Arguments passed to DRM_IOCTL_PANTHOR_TILER_HEAP_CREATE
++ */
++struct drm_panthor_tiler_heap_create {
++	/** @vm_id: VM ID the tiler heap should be mapped to */
++	__u32 vm_id;
++
++	/** @initial_chunk_count: Initial number of chunks to allocate. */
++	__u32 initial_chunk_count;
++
++	/** @chunk_size: Chunk size. Must be a power of two at least 256KB large. */
++	__u32 chunk_size;
++
++	/** @max_chunks: Maximum number of chunks that can be allocated. */
++	__u32 max_chunks;
++
++	/**
++	 * @target_in_flight: Maximum number of in-flight render passes.
++	 *
++	 * If the heap has more than tiler jobs in-flight, the FW will wait for render
++	 * passes to finish before queuing new tiler jobs.
++	 */
++	__u32 target_in_flight;
++
++	/** @handle: Returned heap handle. Passed back to DESTROY_TILER_HEAP. */
++	__u32 handle;
++
++	/** @tiler_heap_ctx_gpu_va: Returned heap GPU virtual address returned */
++	__u64 tiler_heap_ctx_gpu_va;
++
++	/**
++	 * @first_heap_chunk_gpu_va: First heap chunk.
++	 *
++	 * The tiler heap is formed of heap chunks forming a single-link list. This
++	 * is the first element in the list.
++	 */
++	__u64 first_heap_chunk_gpu_va;
++};
++
++/**
++ * struct drm_panthor_tiler_heap_destroy - Arguments passed to DRM_IOCTL_PANTHOR_TILER_HEAP_DESTROY
++ */
++struct drm_panthor_tiler_heap_destroy {
++	/** @handle: Handle of the tiler heap to destroy */
++	__u32 handle;
++
++	/** @pad: Padding field, MBZ. */
++	__u32 pad;
++};
++
++#if defined(__cplusplus)
++}
++#endif
++
++#endif /* _PANTHOR_DRM_H_ */
diff --git a/patches-6.6/034-31-v6.10-drm-panthor-Add-GPU-register-definitions.patch b/patches-6.6/034-31-v6.10-drm-panthor-Add-GPU-register-definitions.patch
new file mode 100644
index 0000000..371567e
--- /dev/null
+++ b/patches-6.6/034-31-v6.10-drm-panthor-Add-GPU-register-definitions.patch
@@ -0,0 +1,280 @@
+From 546b366600ef34847702f43bb2d22f914d19eae0 Mon Sep 17 00:00:00 2001
+From: Boris Brezillon <boris.brezillon@collabora.com>
+Date: Thu, 29 Feb 2024 17:22:16 +0100
+Subject: [PATCH] drm/panthor: Add GPU register definitions
+
+Those are the registers directly accessible through the MMIO range.
+
+FW registers are exposed in panthor_fw.h.
+
+v6:
+- Add Maxime's and Heiko's acks
+
+v4:
+- Add the CORE_FEATURES register (needed for GPU variants)
+- Add Steve's R-b
+
+v3:
+- Add macros to extract GPU ID info
+- Formatting changes
+- Remove AS_TRANSCFG_ADRMODE_LEGACY - it doesn't exist post-CSF
+- Remove CSF_GPU_LATEST_FLUSH_ID_DEFAULT
+- Add GPU_L2_FEATURES_LINE_SIZE for extracting the GPU cache line size
+
+Co-developed-by: Steven Price <steven.price@arm.com>
+Signed-off-by: Steven Price <steven.price@arm.com>
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Acked-by: Steven Price <steven.price@arm.com> # MIT+GPL2 relicensing,Arm
+Acked-by: Grant Likely <grant.likely@linaro.org> # MIT+GPL2 relicensing,Linaro
+Acked-by: Boris Brezillon <boris.brezillon@collabora.com> # MIT+GPL2 relicensing,Collabora
+Reviewed-by: Steven Price <steven.price@arm.com>
+Acked-by: Maxime Ripard <mripard@kernel.org>
+Acked-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-3-boris.brezillon@collabora.com
+---
+ drivers/gpu/drm/panthor/panthor_regs.h | 239 +++++++++++++++++++++++++
+ 1 file changed, 239 insertions(+)
+ create mode 100644 drivers/gpu/drm/panthor/panthor_regs.h
+
+--- /dev/null
++++ b/drivers/gpu/drm/panthor/panthor_regs.h
+@@ -0,0 +1,239 @@
++/* SPDX-License-Identifier: GPL-2.0 or MIT */
++/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
++/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
++/* Copyright 2023 Collabora ltd. */
++/*
++ * Register definitions based on mali_kbase_gpu_regmap.h and
++ * mali_kbase_gpu_regmap_csf.h
++ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
++ */
++#ifndef __PANTHOR_REGS_H__
++#define __PANTHOR_REGS_H__
++
++#define GPU_ID						0x0
++#define   GPU_ARCH_MAJOR(x)				((x) >> 28)
++#define   GPU_ARCH_MINOR(x)				(((x) & GENMASK(27, 24)) >> 24)
++#define   GPU_ARCH_REV(x)				(((x) & GENMASK(23, 20)) >> 20)
++#define   GPU_PROD_MAJOR(x)				(((x) & GENMASK(19, 16)) >> 16)
++#define   GPU_VER_MAJOR(x)				(((x) & GENMASK(15, 12)) >> 12)
++#define   GPU_VER_MINOR(x)				(((x) & GENMASK(11, 4)) >> 4)
++#define   GPU_VER_STATUS(x)				((x) & GENMASK(3, 0))
++
++#define GPU_L2_FEATURES					0x4
++#define  GPU_L2_FEATURES_LINE_SIZE(x)			(1 << ((x) & GENMASK(7, 0)))
++
++#define GPU_CORE_FEATURES				0x8
++
++#define GPU_TILER_FEATURES				0xC
++#define GPU_MEM_FEATURES				0x10
++#define   GROUPS_L2_COHERENT				BIT(0)
++
++#define GPU_MMU_FEATURES				0x14
++#define  GPU_MMU_FEATURES_VA_BITS(x)			((x) & GENMASK(7, 0))
++#define  GPU_MMU_FEATURES_PA_BITS(x)			(((x) >> 8) & GENMASK(7, 0))
++#define GPU_AS_PRESENT					0x18
++#define GPU_CSF_ID					0x1C
++
++#define GPU_INT_RAWSTAT					0x20
++#define GPU_INT_CLEAR					0x24
++#define GPU_INT_MASK					0x28
++#define GPU_INT_STAT					0x2c
++#define   GPU_IRQ_FAULT					BIT(0)
++#define   GPU_IRQ_PROTM_FAULT				BIT(1)
++#define   GPU_IRQ_RESET_COMPLETED			BIT(8)
++#define   GPU_IRQ_POWER_CHANGED				BIT(9)
++#define   GPU_IRQ_POWER_CHANGED_ALL			BIT(10)
++#define   GPU_IRQ_CLEAN_CACHES_COMPLETED		BIT(17)
++#define   GPU_IRQ_DOORBELL_MIRROR			BIT(18)
++#define   GPU_IRQ_MCU_STATUS_CHANGED			BIT(19)
++#define GPU_CMD						0x30
++#define   GPU_CMD_DEF(type, payload)			((type) | ((payload) << 8))
++#define   GPU_SOFT_RESET				GPU_CMD_DEF(1, 1)
++#define   GPU_HARD_RESET				GPU_CMD_DEF(1, 2)
++#define   CACHE_CLEAN					BIT(0)
++#define   CACHE_INV					BIT(1)
++#define   GPU_FLUSH_CACHES(l2, lsc, oth)		\
++	  GPU_CMD_DEF(4, ((l2) << 0) | ((lsc) << 4) | ((oth) << 8))
++
++#define GPU_STATUS					0x34
++#define   GPU_STATUS_ACTIVE				BIT(0)
++#define   GPU_STATUS_PWR_ACTIVE				BIT(1)
++#define   GPU_STATUS_PAGE_FAULT				BIT(4)
++#define   GPU_STATUS_PROTM_ACTIVE			BIT(7)
++#define   GPU_STATUS_DBG_ENABLED			BIT(8)
++
++#define GPU_FAULT_STATUS				0x3C
++#define GPU_FAULT_ADDR_LO				0x40
++#define GPU_FAULT_ADDR_HI				0x44
++
++#define GPU_PWR_KEY					0x50
++#define  GPU_PWR_KEY_UNLOCK				0x2968A819
++#define GPU_PWR_OVERRIDE0				0x54
++#define GPU_PWR_OVERRIDE1				0x58
++
++#define GPU_TIMESTAMP_OFFSET_LO				0x88
++#define GPU_TIMESTAMP_OFFSET_HI				0x8C
++#define GPU_CYCLE_COUNT_LO				0x90
++#define GPU_CYCLE_COUNT_HI				0x94
++#define GPU_TIMESTAMP_LO				0x98
++#define GPU_TIMESTAMP_HI				0x9C
++
++#define GPU_THREAD_MAX_THREADS				0xA0
++#define GPU_THREAD_MAX_WORKGROUP_SIZE			0xA4
++#define GPU_THREAD_MAX_BARRIER_SIZE			0xA8
++#define GPU_THREAD_FEATURES				0xAC
++
++#define GPU_TEXTURE_FEATURES(n)				(0xB0 + ((n) * 4))
++
++#define GPU_SHADER_PRESENT_LO				0x100
++#define GPU_SHADER_PRESENT_HI				0x104
++#define GPU_TILER_PRESENT_LO				0x110
++#define GPU_TILER_PRESENT_HI				0x114
++#define GPU_L2_PRESENT_LO				0x120
++#define GPU_L2_PRESENT_HI				0x124
++
++#define SHADER_READY_LO					0x140
++#define SHADER_READY_HI					0x144
++#define TILER_READY_LO					0x150
++#define TILER_READY_HI					0x154
++#define L2_READY_LO					0x160
++#define L2_READY_HI					0x164
++
++#define SHADER_PWRON_LO					0x180
++#define SHADER_PWRON_HI					0x184
++#define TILER_PWRON_LO					0x190
++#define TILER_PWRON_HI					0x194
++#define L2_PWRON_LO					0x1A0
++#define L2_PWRON_HI					0x1A4
++
++#define SHADER_PWROFF_LO				0x1C0
++#define SHADER_PWROFF_HI				0x1C4
++#define TILER_PWROFF_LO					0x1D0
++#define TILER_PWROFF_HI					0x1D4
++#define L2_PWROFF_LO					0x1E0
++#define L2_PWROFF_HI					0x1E4
++
++#define SHADER_PWRTRANS_LO				0x200
++#define SHADER_PWRTRANS_HI				0x204
++#define TILER_PWRTRANS_LO				0x210
++#define TILER_PWRTRANS_HI				0x214
++#define L2_PWRTRANS_LO					0x220
++#define L2_PWRTRANS_HI					0x224
++
++#define SHADER_PWRACTIVE_LO				0x240
++#define SHADER_PWRACTIVE_HI				0x244
++#define TILER_PWRACTIVE_LO				0x250
++#define TILER_PWRACTIVE_HI				0x254
++#define L2_PWRACTIVE_LO					0x260
++#define L2_PWRACTIVE_HI					0x264
++
++#define GPU_REVID					0x280
++
++#define GPU_COHERENCY_FEATURES				0x300
++#define GPU_COHERENCY_PROT_BIT(name)			BIT(GPU_COHERENCY_  ## name)
++
++#define GPU_COHERENCY_PROTOCOL				0x304
++#define   GPU_COHERENCY_ACE				0
++#define   GPU_COHERENCY_ACE_LITE			1
++#define   GPU_COHERENCY_NONE				31
++
++#define MCU_CONTROL					0x700
++#define MCU_CONTROL_ENABLE				1
++#define MCU_CONTROL_AUTO				2
++#define MCU_CONTROL_DISABLE				0
++
++#define MCU_STATUS					0x704
++#define MCU_STATUS_DISABLED				0
++#define MCU_STATUS_ENABLED				1
++#define MCU_STATUS_HALT					2
++#define MCU_STATUS_FATAL				3
++
++/* Job Control regs */
++#define JOB_INT_RAWSTAT					0x1000
++#define JOB_INT_CLEAR					0x1004
++#define JOB_INT_MASK					0x1008
++#define JOB_INT_STAT					0x100c
++#define   JOB_INT_GLOBAL_IF				BIT(31)
++#define   JOB_INT_CSG_IF(x)				BIT(x)
++
++/* MMU regs */
++#define MMU_INT_RAWSTAT					0x2000
++#define MMU_INT_CLEAR					0x2004
++#define MMU_INT_MASK					0x2008
++#define MMU_INT_STAT					0x200c
++
++/* AS_COMMAND register commands */
++
++#define MMU_BASE					0x2400
++#define MMU_AS_SHIFT					6
++#define MMU_AS(as)					(MMU_BASE + ((as) << MMU_AS_SHIFT))
++
++#define AS_TRANSTAB_LO(as)				(MMU_AS(as) + 0x0)
++#define AS_TRANSTAB_HI(as)				(MMU_AS(as) + 0x4)
++#define AS_MEMATTR_LO(as)				(MMU_AS(as) + 0x8)
++#define AS_MEMATTR_HI(as)				(MMU_AS(as) + 0xC)
++#define   AS_MEMATTR_AARCH64_INNER_ALLOC_IMPL		(2 << 2)
++#define   AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(w, r)	((3 << 2) | \
++							 ((w) ? BIT(0) : 0) | \
++							 ((r) ? BIT(1) : 0))
++#define   AS_MEMATTR_AARCH64_SH_MIDGARD_INNER		(0 << 4)
++#define   AS_MEMATTR_AARCH64_SH_CPU_INNER		(1 << 4)
++#define   AS_MEMATTR_AARCH64_SH_CPU_INNER_SHADER_COH	(2 << 4)
++#define   AS_MEMATTR_AARCH64_SHARED			(0 << 6)
++#define   AS_MEMATTR_AARCH64_INNER_OUTER_NC		(1 << 6)
++#define   AS_MEMATTR_AARCH64_INNER_OUTER_WB		(2 << 6)
++#define   AS_MEMATTR_AARCH64_FAULT			(3 << 6)
++#define AS_LOCKADDR_LO(as)				(MMU_AS(as) + 0x10)
++#define AS_LOCKADDR_HI(as)				(MMU_AS(as) + 0x14)
++#define AS_COMMAND(as)					(MMU_AS(as) + 0x18)
++#define   AS_COMMAND_NOP				0
++#define   AS_COMMAND_UPDATE				1
++#define   AS_COMMAND_LOCK				2
++#define   AS_COMMAND_UNLOCK				3
++#define   AS_COMMAND_FLUSH_PT				4
++#define   AS_COMMAND_FLUSH_MEM				5
++#define   AS_LOCK_REGION_MIN_SIZE			(1ULL << 15)
++#define AS_FAULTSTATUS(as)				(MMU_AS(as) + 0x1C)
++#define  AS_FAULTSTATUS_ACCESS_TYPE_MASK		(0x3 << 8)
++#define  AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC		(0x0 << 8)
++#define  AS_FAULTSTATUS_ACCESS_TYPE_EX			(0x1 << 8)
++#define  AS_FAULTSTATUS_ACCESS_TYPE_READ		(0x2 << 8)
++#define  AS_FAULTSTATUS_ACCESS_TYPE_WRITE		(0x3 << 8)
++#define AS_FAULTADDRESS_LO(as)				(MMU_AS(as) + 0x20)
++#define AS_FAULTADDRESS_HI(as)				(MMU_AS(as) + 0x24)
++#define AS_STATUS(as)					(MMU_AS(as) + 0x28)
++#define   AS_STATUS_AS_ACTIVE				BIT(0)
++#define AS_TRANSCFG_LO(as)				(MMU_AS(as) + 0x30)
++#define AS_TRANSCFG_HI(as)				(MMU_AS(as) + 0x34)
++#define   AS_TRANSCFG_ADRMODE_UNMAPPED			(1 << 0)
++#define   AS_TRANSCFG_ADRMODE_IDENTITY			(2 << 0)
++#define   AS_TRANSCFG_ADRMODE_AARCH64_4K		(6 << 0)
++#define   AS_TRANSCFG_ADRMODE_AARCH64_64K		(8 << 0)
++#define   AS_TRANSCFG_INA_BITS(x)			((x) << 6)
++#define   AS_TRANSCFG_OUTA_BITS(x)			((x) << 14)
++#define   AS_TRANSCFG_SL_CONCAT				BIT(22)
++#define   AS_TRANSCFG_PTW_MEMATTR_NC			(1 << 24)
++#define   AS_TRANSCFG_PTW_MEMATTR_WB			(2 << 24)
++#define   AS_TRANSCFG_PTW_SH_NS				(0 << 28)
++#define   AS_TRANSCFG_PTW_SH_OS				(2 << 28)
++#define   AS_TRANSCFG_PTW_SH_IS				(3 << 28)
++#define   AS_TRANSCFG_PTW_RA				BIT(30)
++#define   AS_TRANSCFG_DISABLE_HIER_AP			BIT(33)
++#define   AS_TRANSCFG_DISABLE_AF_FAULT			BIT(34)
++#define   AS_TRANSCFG_WXN				BIT(35)
++#define   AS_TRANSCFG_XREADABLE				BIT(36)
++#define AS_FAULTEXTRA_LO(as)				(MMU_AS(as) + 0x38)
++#define AS_FAULTEXTRA_HI(as)				(MMU_AS(as) + 0x3C)
++
++#define CSF_GPU_LATEST_FLUSH_ID				0x10000
++
++#define CSF_DOORBELL(i)					(0x80000 + ((i) * 0x10000))
++#define CSF_GLB_DOORBELL_ID				0
++
++#define gpu_write(dev, reg, data) \
++	writel(data, (dev)->iomem + (reg))
++
++#define gpu_read(dev, reg) \
++	readl((dev)->iomem + (reg))
++
++#endif
diff --git a/patches-6.6/034-32-v6.10-drm-panthor-Add-the-device-logical-block.patch b/patches-6.6/034-32-v6.10-drm-panthor-Add-the-device-logical-block.patch
new file mode 100644
index 0000000..c2583d4
--- /dev/null
+++ b/patches-6.6/034-32-v6.10-drm-panthor-Add-the-device-logical-block.patch
@@ -0,0 +1,1013 @@
+From 5fe909cae118a757a77afb37174b99436a36d2e2 Mon Sep 17 00:00:00 2001
+From: Boris Brezillon <boris.brezillon@collabora.com>
+Date: Thu, 29 Feb 2024 17:22:17 +0100
+Subject: [PATCH] drm/panthor: Add the device logical block
+
+The panthor driver is designed in a modular way, where each logical
+block is dealing with a specific HW-block or software feature. In order
+for those blocks to communicate with each other, we need a central
+panthor_device collecting all the blocks, and exposing some common
+features, like interrupt handling, power management, reset, ...
+
+This what this panthor_device logical block is about.
+
+v6:
+- Add Maxime's and Heiko's acks
+- Keep header inclusion alphabetically ordered
+
+v5:
+- Suspend the MMU/GPU blocks if panthor_fw_resume() fails in
+  panthor_device_resume()
+- Move the pm_runtime_use_autosuspend() call before drm_dev_register()
+- Add Liviu's R-b
+
+v4:
+- Check drmm_mutex_init() return code
+- Fix panthor_device_reset_work() out path
+- Fix the race in the unplug logic
+- Fix typos
+- Unplug blocks when something fails in panthor_device_init()
+- Add Steve's R-b
+
+v3:
+- Add acks for the MIT+GPL2 relicensing
+- Fix 32-bit support
+- Shorten the sections protected by panthor_device::pm::mmio_lock to fix
+  lock ordering issues.
+- Rename panthor_device::pm::lock into panthor_device::pm::mmio_lock to
+  better reflect what this lock is protecting
+- Use dev_err_probe()
+- Make sure we call drm_dev_exit() when something fails half-way in
+  panthor_device_reset_work()
+- Replace CSF_GPU_LATEST_FLUSH_ID_DEFAULT with a constant '1' and a
+  comment to explain. Also remove setting the dummy flush ID on suspend.
+- Remove drm_WARN_ON() in panthor_exception_name()
+- Check pirq->suspended in panthor_xxx_irq_raw_handler()
+
+Co-developed-by: Steven Price <steven.price@arm.com>
+Signed-off-by: Steven Price <steven.price@arm.com>
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Acked-by: Steven Price <steven.price@arm.com> # MIT+GPL2 relicensing,Arm
+Acked-by: Grant Likely <grant.likely@linaro.org> # MIT+GPL2 relicensing,Linaro
+Acked-by: Boris Brezillon <boris.brezillon@collabora.com> # MIT+GPL2 relicensing,Collabora
+Reviewed-by: Steven Price <steven.price@arm.com>
+Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
+Acked-by: Maxime Ripard <mripard@kernel.org>
+Acked-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-4-boris.brezillon@collabora.com
+---
+ drivers/gpu/drm/panthor/panthor_device.c | 549 +++++++++++++++++++++++
+ drivers/gpu/drm/panthor/panthor_device.h | 394 ++++++++++++++++
+ 2 files changed, 943 insertions(+)
+ create mode 100644 drivers/gpu/drm/panthor/panthor_device.c
+ create mode 100644 drivers/gpu/drm/panthor/panthor_device.h
+
+--- /dev/null
++++ b/drivers/gpu/drm/panthor/panthor_device.c
+@@ -0,0 +1,549 @@
++// SPDX-License-Identifier: GPL-2.0 or MIT
++/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
++/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
++/* Copyright 2023 Collabora ltd. */
++
++#include <linux/clk.h>
++#include <linux/platform_device.h>
++#include <linux/pm_domain.h>
++#include <linux/pm_runtime.h>
++#include <linux/regulator/consumer.h>
++#include <linux/reset.h>
++
++#include <drm/drm_drv.h>
++#include <drm/drm_managed.h>
++
++#include "panthor_devfreq.h"
++#include "panthor_device.h"
++#include "panthor_fw.h"
++#include "panthor_gpu.h"
++#include "panthor_mmu.h"
++#include "panthor_regs.h"
++#include "panthor_sched.h"
++
++static int panthor_clk_init(struct panthor_device *ptdev)
++{
++	ptdev->clks.core = devm_clk_get(ptdev->base.dev, NULL);
++	if (IS_ERR(ptdev->clks.core))
++		return dev_err_probe(ptdev->base.dev,
++				     PTR_ERR(ptdev->clks.core),
++				     "get 'core' clock failed");
++
++	ptdev->clks.stacks = devm_clk_get_optional(ptdev->base.dev, "stacks");
++	if (IS_ERR(ptdev->clks.stacks))
++		return dev_err_probe(ptdev->base.dev,
++				     PTR_ERR(ptdev->clks.stacks),
++				     "get 'stacks' clock failed");
++
++	ptdev->clks.coregroup = devm_clk_get_optional(ptdev->base.dev, "coregroup");
++	if (IS_ERR(ptdev->clks.coregroup))
++		return dev_err_probe(ptdev->base.dev,
++				     PTR_ERR(ptdev->clks.coregroup),
++				     "get 'coregroup' clock failed");
++
++	drm_info(&ptdev->base, "clock rate = %lu\n", clk_get_rate(ptdev->clks.core));
++	return 0;
++}
++
++void panthor_device_unplug(struct panthor_device *ptdev)
++{
++	/* This function can be called from two different path: the reset work
++	 * and the platform device remove callback. drm_dev_unplug() doesn't
++	 * deal with concurrent callers, so we have to protect drm_dev_unplug()
++	 * calls with our own lock, and bail out if the device is already
++	 * unplugged.
++	 */
++	mutex_lock(&ptdev->unplug.lock);
++	if (drm_dev_is_unplugged(&ptdev->base)) {
++		/* Someone beat us, release the lock and wait for the unplug
++		 * operation to be reported as done.
++		 **/
++		mutex_unlock(&ptdev->unplug.lock);
++		wait_for_completion(&ptdev->unplug.done);
++		return;
++	}
++
++	/* Call drm_dev_unplug() so any access to HW blocks happening after
++	 * that point get rejected.
++	 */
++	drm_dev_unplug(&ptdev->base);
++
++	/* We do the rest of the unplug with the unplug lock released,
++	 * future callers will wait on ptdev->unplug.done anyway.
++	 */
++	mutex_unlock(&ptdev->unplug.lock);
++
++	drm_WARN_ON(&ptdev->base, pm_runtime_get_sync(ptdev->base.dev) < 0);
++
++	/* Now, try to cleanly shutdown the GPU before the device resources
++	 * get reclaimed.
++	 */
++	panthor_sched_unplug(ptdev);
++	panthor_fw_unplug(ptdev);
++	panthor_mmu_unplug(ptdev);
++	panthor_gpu_unplug(ptdev);
++
++	pm_runtime_dont_use_autosuspend(ptdev->base.dev);
++	pm_runtime_put_sync_suspend(ptdev->base.dev);
++
++	/* Report the unplug operation as done to unblock concurrent
++	 * panthor_device_unplug() callers.
++	 */
++	complete_all(&ptdev->unplug.done);
++}
++
++static void panthor_device_reset_cleanup(struct drm_device *ddev, void *data)
++{
++	struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base);
++
++	cancel_work_sync(&ptdev->reset.work);
++	destroy_workqueue(ptdev->reset.wq);
++}
++
++static void panthor_device_reset_work(struct work_struct *work)
++{
++	struct panthor_device *ptdev = container_of(work, struct panthor_device, reset.work);
++	int ret = 0, cookie;
++
++	if (atomic_read(&ptdev->pm.state) != PANTHOR_DEVICE_PM_STATE_ACTIVE) {
++		/*
++		 * No need for a reset as the device has been (or will be)
++		 * powered down
++		 */
++		atomic_set(&ptdev->reset.pending, 0);
++		return;
++	}
++
++	if (!drm_dev_enter(&ptdev->base, &cookie))
++		return;
++
++	panthor_sched_pre_reset(ptdev);
++	panthor_fw_pre_reset(ptdev, true);
++	panthor_mmu_pre_reset(ptdev);
++	panthor_gpu_soft_reset(ptdev);
++	panthor_gpu_l2_power_on(ptdev);
++	panthor_mmu_post_reset(ptdev);
++	ret = panthor_fw_post_reset(ptdev);
++	if (ret)
++		goto out_dev_exit;
++
++	atomic_set(&ptdev->reset.pending, 0);
++	panthor_sched_post_reset(ptdev);
++
++out_dev_exit:
++	drm_dev_exit(cookie);
++
++	if (ret) {
++		panthor_device_unplug(ptdev);
++		drm_err(&ptdev->base, "Failed to boot MCU after reset, making device unusable.");
++	}
++}
++
++static bool panthor_device_is_initialized(struct panthor_device *ptdev)
++{
++	return !!ptdev->scheduler;
++}
++
++static void panthor_device_free_page(struct drm_device *ddev, void *data)
++{
++	free_page((unsigned long)data);
++}
++
++int panthor_device_init(struct panthor_device *ptdev)
++{
++	struct resource *res;
++	struct page *p;
++	int ret;
++
++	ptdev->coherent = device_get_dma_attr(ptdev->base.dev) == DEV_DMA_COHERENT;
++
++	init_completion(&ptdev->unplug.done);
++	ret = drmm_mutex_init(&ptdev->base, &ptdev->unplug.lock);
++	if (ret)
++		return ret;
++
++	ret = drmm_mutex_init(&ptdev->base, &ptdev->pm.mmio_lock);
++	if (ret)
++		return ret;
++
++	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED);
++	p = alloc_page(GFP_KERNEL | __GFP_ZERO);
++	if (!p)
++		return -ENOMEM;
++
++	ptdev->pm.dummy_latest_flush = page_address(p);
++	ret = drmm_add_action_or_reset(&ptdev->base, panthor_device_free_page,
++				       ptdev->pm.dummy_latest_flush);
++	if (ret)
++		return ret;
++
++	/*
++	 * Set the dummy page holding the latest flush to 1. This will cause the
++	 * flush to avoided as we know it isn't necessary if the submission
++	 * happens while the dummy page is mapped. Zero cannot be used because
++	 * that means 'always flush'.
++	 */
++	*ptdev->pm.dummy_latest_flush = 1;
++
++	INIT_WORK(&ptdev->reset.work, panthor_device_reset_work);
++	ptdev->reset.wq = alloc_ordered_workqueue("panthor-reset-wq", 0);
++	if (!ptdev->reset.wq)
++		return -ENOMEM;
++
++	ret = drmm_add_action_or_reset(&ptdev->base, panthor_device_reset_cleanup, NULL);
++	if (ret)
++		return ret;
++
++	ret = panthor_clk_init(ptdev);
++	if (ret)
++		return ret;
++
++	ret = panthor_devfreq_init(ptdev);
++	if (ret)
++		return ret;
++
++	ptdev->iomem = devm_platform_get_and_ioremap_resource(to_platform_device(ptdev->base.dev),
++							      0, &res);
++	if (IS_ERR(ptdev->iomem))
++		return PTR_ERR(ptdev->iomem);
++
++	ptdev->phys_addr = res->start;
++
++	ret = devm_pm_runtime_enable(ptdev->base.dev);
++	if (ret)
++		return ret;
++
++	ret = pm_runtime_resume_and_get(ptdev->base.dev);
++	if (ret)
++		return ret;
++
++	ret = panthor_gpu_init(ptdev);
++	if (ret)
++		goto err_rpm_put;
++
++	ret = panthor_mmu_init(ptdev);
++	if (ret)
++		goto err_unplug_gpu;
++
++	ret = panthor_fw_init(ptdev);
++	if (ret)
++		goto err_unplug_mmu;
++
++	ret = panthor_sched_init(ptdev);
++	if (ret)
++		goto err_unplug_fw;
++
++	/* ~3 frames */
++	pm_runtime_set_autosuspend_delay(ptdev->base.dev, 50);
++	pm_runtime_use_autosuspend(ptdev->base.dev);
++
++	ret = drm_dev_register(&ptdev->base, 0);
++	if (ret)
++		goto err_disable_autosuspend;
++
++	pm_runtime_put_autosuspend(ptdev->base.dev);
++	return 0;
++
++err_disable_autosuspend:
++	pm_runtime_dont_use_autosuspend(ptdev->base.dev);
++	panthor_sched_unplug(ptdev);
++
++err_unplug_fw:
++	panthor_fw_unplug(ptdev);
++
++err_unplug_mmu:
++	panthor_mmu_unplug(ptdev);
++
++err_unplug_gpu:
++	panthor_gpu_unplug(ptdev);
++
++err_rpm_put:
++	pm_runtime_put_sync_suspend(ptdev->base.dev);
++	return ret;
++}
++
++#define PANTHOR_EXCEPTION(id) \
++	[DRM_PANTHOR_EXCEPTION_ ## id] = { \
++		.name = #id, \
++	}
++
++struct panthor_exception_info {
++	const char *name;
++};
++
++static const struct panthor_exception_info panthor_exception_infos[] = {
++	PANTHOR_EXCEPTION(OK),
++	PANTHOR_EXCEPTION(TERMINATED),
++	PANTHOR_EXCEPTION(KABOOM),
++	PANTHOR_EXCEPTION(EUREKA),
++	PANTHOR_EXCEPTION(ACTIVE),
++	PANTHOR_EXCEPTION(CS_RES_TERM),
++	PANTHOR_EXCEPTION(CS_CONFIG_FAULT),
++	PANTHOR_EXCEPTION(CS_ENDPOINT_FAULT),
++	PANTHOR_EXCEPTION(CS_BUS_FAULT),
++	PANTHOR_EXCEPTION(CS_INSTR_INVALID),
++	PANTHOR_EXCEPTION(CS_CALL_STACK_OVERFLOW),
++	PANTHOR_EXCEPTION(CS_INHERIT_FAULT),
++	PANTHOR_EXCEPTION(INSTR_INVALID_PC),
++	PANTHOR_EXCEPTION(INSTR_INVALID_ENC),
++	PANTHOR_EXCEPTION(INSTR_BARRIER_FAULT),
++	PANTHOR_EXCEPTION(DATA_INVALID_FAULT),
++	PANTHOR_EXCEPTION(TILE_RANGE_FAULT),
++	PANTHOR_EXCEPTION(ADDR_RANGE_FAULT),
++	PANTHOR_EXCEPTION(IMPRECISE_FAULT),
++	PANTHOR_EXCEPTION(OOM),
++	PANTHOR_EXCEPTION(CSF_FW_INTERNAL_ERROR),
++	PANTHOR_EXCEPTION(CSF_RES_EVICTION_TIMEOUT),
++	PANTHOR_EXCEPTION(GPU_BUS_FAULT),
++	PANTHOR_EXCEPTION(GPU_SHAREABILITY_FAULT),
++	PANTHOR_EXCEPTION(SYS_SHAREABILITY_FAULT),
++	PANTHOR_EXCEPTION(GPU_CACHEABILITY_FAULT),
++	PANTHOR_EXCEPTION(TRANSLATION_FAULT_0),
++	PANTHOR_EXCEPTION(TRANSLATION_FAULT_1),
++	PANTHOR_EXCEPTION(TRANSLATION_FAULT_2),
++	PANTHOR_EXCEPTION(TRANSLATION_FAULT_3),
++	PANTHOR_EXCEPTION(TRANSLATION_FAULT_4),
++	PANTHOR_EXCEPTION(PERM_FAULT_0),
++	PANTHOR_EXCEPTION(PERM_FAULT_1),
++	PANTHOR_EXCEPTION(PERM_FAULT_2),
++	PANTHOR_EXCEPTION(PERM_FAULT_3),
++	PANTHOR_EXCEPTION(ACCESS_FLAG_1),
++	PANTHOR_EXCEPTION(ACCESS_FLAG_2),
++	PANTHOR_EXCEPTION(ACCESS_FLAG_3),
++	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_IN),
++	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT0),
++	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT1),
++	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT2),
++	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT3),
++	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_0),
++	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_1),
++	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_2),
++	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_3),
++};
++
++const char *panthor_exception_name(struct panthor_device *ptdev, u32 exception_code)
++{
++	if (exception_code >= ARRAY_SIZE(panthor_exception_infos) ||
++	    !panthor_exception_infos[exception_code].name)
++		return "Unknown exception type";
++
++	return panthor_exception_infos[exception_code].name;
++}
++
++static vm_fault_t panthor_mmio_vm_fault(struct vm_fault *vmf)
++{
++	struct vm_area_struct *vma = vmf->vma;
++	struct panthor_device *ptdev = vma->vm_private_data;
++	u64 id = (u64)vma->vm_pgoff << PAGE_SHIFT;
++	unsigned long pfn;
++	pgprot_t pgprot;
++	vm_fault_t ret;
++	bool active;
++	int cookie;
++
++	if (!drm_dev_enter(&ptdev->base, &cookie))
++		return VM_FAULT_SIGBUS;
++
++	mutex_lock(&ptdev->pm.mmio_lock);
++	active = atomic_read(&ptdev->pm.state) == PANTHOR_DEVICE_PM_STATE_ACTIVE;
++
++	switch (panthor_device_mmio_offset(id)) {
++	case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET:
++		if (active)
++			pfn = __phys_to_pfn(ptdev->phys_addr + CSF_GPU_LATEST_FLUSH_ID);
++		else
++			pfn = virt_to_pfn(ptdev->pm.dummy_latest_flush);
++		break;
++
++	default:
++		ret = VM_FAULT_SIGBUS;
++		goto out_unlock;
++	}
++
++	pgprot = vma->vm_page_prot;
++	if (active)
++		pgprot = pgprot_noncached(pgprot);
++
++	ret = vmf_insert_pfn_prot(vma, vmf->address, pfn, pgprot);
++
++out_unlock:
++	mutex_unlock(&ptdev->pm.mmio_lock);
++	drm_dev_exit(cookie);
++	return ret;
++}
++
++static const struct vm_operations_struct panthor_mmio_vm_ops = {
++	.fault = panthor_mmio_vm_fault,
++};
++
++int panthor_device_mmap_io(struct panthor_device *ptdev, struct vm_area_struct *vma)
++{
++	u64 id = (u64)vma->vm_pgoff << PAGE_SHIFT;
++
++	switch (panthor_device_mmio_offset(id)) {
++	case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET:
++		if (vma->vm_end - vma->vm_start != PAGE_SIZE ||
++		    (vma->vm_flags & (VM_WRITE | VM_EXEC)))
++			return -EINVAL;
++
++		break;
++
++	default:
++		return -EINVAL;
++	}
++
++	/* Defer actual mapping to the fault handler. */
++	vma->vm_private_data = ptdev;
++	vma->vm_ops = &panthor_mmio_vm_ops;
++	vm_flags_set(vma,
++		     VM_IO | VM_DONTCOPY | VM_DONTEXPAND |
++		     VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP);
++	return 0;
++}
++
++#ifdef CONFIG_PM
++int panthor_device_resume(struct device *dev)
++{
++	struct panthor_device *ptdev = dev_get_drvdata(dev);
++	int ret, cookie;
++
++	if (atomic_read(&ptdev->pm.state) != PANTHOR_DEVICE_PM_STATE_SUSPENDED)
++		return -EINVAL;
++
++	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_RESUMING);
++
++	ret = clk_prepare_enable(ptdev->clks.core);
++	if (ret)
++		goto err_set_suspended;
++
++	ret = clk_prepare_enable(ptdev->clks.stacks);
++	if (ret)
++		goto err_disable_core_clk;
++
++	ret = clk_prepare_enable(ptdev->clks.coregroup);
++	if (ret)
++		goto err_disable_stacks_clk;
++
++	ret = panthor_devfreq_resume(ptdev);
++	if (ret)
++		goto err_disable_coregroup_clk;
++
++	if (panthor_device_is_initialized(ptdev) &&
++	    drm_dev_enter(&ptdev->base, &cookie)) {
++		panthor_gpu_resume(ptdev);
++		panthor_mmu_resume(ptdev);
++		ret = drm_WARN_ON(&ptdev->base, panthor_fw_resume(ptdev));
++		if (!ret) {
++			panthor_sched_resume(ptdev);
++		} else {
++			panthor_mmu_suspend(ptdev);
++			panthor_gpu_suspend(ptdev);
++		}
++
++		drm_dev_exit(cookie);
++
++		if (ret)
++			goto err_suspend_devfreq;
++	}
++
++	if (atomic_read(&ptdev->reset.pending))
++		queue_work(ptdev->reset.wq, &ptdev->reset.work);
++
++	/* Clear all IOMEM mappings pointing to this device after we've
++	 * resumed. This way the fake mappings pointing to the dummy pages
++	 * are removed and the real iomem mapping will be restored on next
++	 * access.
++	 */
++	mutex_lock(&ptdev->pm.mmio_lock);
++	unmap_mapping_range(ptdev->base.anon_inode->i_mapping,
++			    DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1);
++	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_ACTIVE);
++	mutex_unlock(&ptdev->pm.mmio_lock);
++	return 0;
++
++err_suspend_devfreq:
++	panthor_devfreq_suspend(ptdev);
++
++err_disable_coregroup_clk:
++	clk_disable_unprepare(ptdev->clks.coregroup);
++
++err_disable_stacks_clk:
++	clk_disable_unprepare(ptdev->clks.stacks);
++
++err_disable_core_clk:
++	clk_disable_unprepare(ptdev->clks.core);
++
++err_set_suspended:
++	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED);
++	return ret;
++}
++
++int panthor_device_suspend(struct device *dev)
++{
++	struct panthor_device *ptdev = dev_get_drvdata(dev);
++	int ret, cookie;
++
++	if (atomic_read(&ptdev->pm.state) != PANTHOR_DEVICE_PM_STATE_ACTIVE)
++		return -EINVAL;
++
++	/* Clear all IOMEM mappings pointing to this device before we
++	 * shutdown the power-domain and clocks. Failing to do that results
++	 * in external aborts when the process accesses the iomem region.
++	 * We change the state and call unmap_mapping_range() with the
++	 * mmio_lock held to make sure the vm_fault handler won't set up
++	 * invalid mappings.
++	 */
++	mutex_lock(&ptdev->pm.mmio_lock);
++	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDING);
++	unmap_mapping_range(ptdev->base.anon_inode->i_mapping,
++			    DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1);
++	mutex_unlock(&ptdev->pm.mmio_lock);
++
++	if (panthor_device_is_initialized(ptdev) &&
++	    drm_dev_enter(&ptdev->base, &cookie)) {
++		cancel_work_sync(&ptdev->reset.work);
++
++		/* We prepare everything as if we were resetting the GPU.
++		 * The end of the reset will happen in the resume path though.
++		 */
++		panthor_sched_suspend(ptdev);
++		panthor_fw_suspend(ptdev);
++		panthor_mmu_suspend(ptdev);
++		panthor_gpu_suspend(ptdev);
++		drm_dev_exit(cookie);
++	}
++
++	ret = panthor_devfreq_suspend(ptdev);
++	if (ret) {
++		if (panthor_device_is_initialized(ptdev) &&
++		    drm_dev_enter(&ptdev->base, &cookie)) {
++			panthor_gpu_resume(ptdev);
++			panthor_mmu_resume(ptdev);
++			drm_WARN_ON(&ptdev->base, panthor_fw_resume(ptdev));
++			panthor_sched_resume(ptdev);
++			drm_dev_exit(cookie);
++		}
++
++		goto err_set_active;
++	}
++
++	clk_disable_unprepare(ptdev->clks.coregroup);
++	clk_disable_unprepare(ptdev->clks.stacks);
++	clk_disable_unprepare(ptdev->clks.core);
++	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED);
++	return 0;
++
++err_set_active:
++	/* If something failed and we have to revert back to an
++	 * active state, we also need to clear the MMIO userspace
++	 * mappings, so any dumb pages that were mapped while we
++	 * were trying to suspend gets invalidated.
++	 */
++	mutex_lock(&ptdev->pm.mmio_lock);
++	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_ACTIVE);
++	unmap_mapping_range(ptdev->base.anon_inode->i_mapping,
++			    DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1);
++	mutex_unlock(&ptdev->pm.mmio_lock);
++	return ret;
++}
++#endif
+--- /dev/null
++++ b/drivers/gpu/drm/panthor/panthor_device.h
+@@ -0,0 +1,394 @@
++/* SPDX-License-Identifier: GPL-2.0 or MIT */
++/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
++/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
++/* Copyright 2023 Collabora ltd. */
++
++#ifndef __PANTHOR_DEVICE_H__
++#define __PANTHOR_DEVICE_H__
++
++#include <linux/atomic.h>
++#include <linux/io-pgtable.h>
++#include <linux/regulator/consumer.h>
++#include <linux/sched.h>
++#include <linux/spinlock.h>
++
++#include <drm/drm_device.h>
++#include <drm/drm_mm.h>
++#include <drm/gpu_scheduler.h>
++#include <drm/panthor_drm.h>
++
++struct panthor_csf;
++struct panthor_csf_ctx;
++struct panthor_device;
++struct panthor_gpu;
++struct panthor_group_pool;
++struct panthor_heap_pool;
++struct panthor_job;
++struct panthor_mmu;
++struct panthor_fw;
++struct panthor_perfcnt;
++struct panthor_vm;
++struct panthor_vm_pool;
++
++/**
++ * enum panthor_device_pm_state - PM state
++ */
++enum panthor_device_pm_state {
++	/** @PANTHOR_DEVICE_PM_STATE_SUSPENDED: Device is suspended. */
++	PANTHOR_DEVICE_PM_STATE_SUSPENDED = 0,
++
++	/** @PANTHOR_DEVICE_PM_STATE_RESUMING: Device is being resumed. */
++	PANTHOR_DEVICE_PM_STATE_RESUMING,
++
++	/** @PANTHOR_DEVICE_PM_STATE_ACTIVE: Device is active. */
++	PANTHOR_DEVICE_PM_STATE_ACTIVE,
++
++	/** @PANTHOR_DEVICE_PM_STATE_SUSPENDING: Device is being suspended. */
++	PANTHOR_DEVICE_PM_STATE_SUSPENDING,
++};
++
++/**
++ * struct panthor_irq - IRQ data
++ *
++ * Used to automate IRQ handling for the 3 different IRQs we have in this driver.
++ */
++struct panthor_irq {
++	/** @ptdev: Panthor device */
++	struct panthor_device *ptdev;
++
++	/** @irq: IRQ number. */
++	int irq;
++
++	/** @mask: Current mask being applied to xxx_INT_MASK. */
++	u32 mask;
++
++	/** @suspended: Set to true when the IRQ is suspended. */
++	atomic_t suspended;
++};
++
++/**
++ * struct panthor_device - Panthor device
++ */
++struct panthor_device {
++	/** @base: Base drm_device. */
++	struct drm_device base;
++
++	/** @phys_addr: Physical address of the iomem region. */
++	phys_addr_t phys_addr;
++
++	/** @iomem: CPU mapping of the IOMEM region. */
++	void __iomem *iomem;
++
++	/** @clks: GPU clocks. */
++	struct {
++		/** @core: Core clock. */
++		struct clk *core;
++
++		/** @stacks: Stacks clock. This clock is optional. */
++		struct clk *stacks;
++
++		/** @coregroup: Core group clock. This clock is optional. */
++		struct clk *coregroup;
++	} clks;
++
++	/** @coherent: True if the CPU/GPU are memory coherent. */
++	bool coherent;
++
++	/** @gpu_info: GPU information. */
++	struct drm_panthor_gpu_info gpu_info;
++
++	/** @csif_info: Command stream interface information. */
++	struct drm_panthor_csif_info csif_info;
++
++	/** @gpu: GPU management data. */
++	struct panthor_gpu *gpu;
++
++	/** @fw: FW management data. */
++	struct panthor_fw *fw;
++
++	/** @mmu: MMU management data. */
++	struct panthor_mmu *mmu;
++
++	/** @scheduler: Scheduler management data. */
++	struct panthor_scheduler *scheduler;
++
++	/** @devfreq: Device frequency scaling management data. */
++	struct panthor_devfreq *devfreq;
++
++	/** @unplug: Device unplug related fields. */
++	struct {
++		/** @lock: Lock used to serialize unplug operations. */
++		struct mutex lock;
++
++		/**
++		 * @done: Completion object signaled when the unplug
++		 * operation is done.
++		 */
++		struct completion done;
++	} unplug;
++
++	/** @reset: Reset related fields. */
++	struct {
++		/** @wq: Ordered worqueud used to schedule reset operations. */
++		struct workqueue_struct *wq;
++
++		/** @work: Reset work. */
++		struct work_struct work;
++
++		/** @pending: Set to true if a reset is pending. */
++		atomic_t pending;
++	} reset;
++
++	/** @pm: Power management related data. */
++	struct {
++		/** @state: Power state. */
++		atomic_t state;
++
++		/**
++		 * @mmio_lock: Lock protecting MMIO userspace CPU mappings.
++		 *
++		 * This is needed to ensure we map the dummy IO pages when
++		 * the device is being suspended, and the real IO pages when
++		 * the device is being resumed. We can't just do with the
++		 * state atomicity to deal with this race.
++		 */
++		struct mutex mmio_lock;
++
++		/**
++		 * @dummy_latest_flush: Dummy LATEST_FLUSH page.
++		 *
++		 * Used to replace the real LATEST_FLUSH page when the GPU
++		 * is suspended.
++		 */
++		u32 *dummy_latest_flush;
++	} pm;
++};
++
++/**
++ * struct panthor_file - Panthor file
++ */
++struct panthor_file {
++	/** @ptdev: Device attached to this file. */
++	struct panthor_device *ptdev;
++
++	/** @vms: VM pool attached to this file. */
++	struct panthor_vm_pool *vms;
++
++	/** @groups: Scheduling group pool attached to this file. */
++	struct panthor_group_pool *groups;
++};
++
++int panthor_device_init(struct panthor_device *ptdev);
++void panthor_device_unplug(struct panthor_device *ptdev);
++
++/**
++ * panthor_device_schedule_reset() - Schedules a reset operation
++ */
++static inline void panthor_device_schedule_reset(struct panthor_device *ptdev)
++{
++	if (!atomic_cmpxchg(&ptdev->reset.pending, 0, 1) &&
++	    atomic_read(&ptdev->pm.state) == PANTHOR_DEVICE_PM_STATE_ACTIVE)
++		queue_work(ptdev->reset.wq, &ptdev->reset.work);
++}
++
++/**
++ * panthor_device_reset_is_pending() - Checks if a reset is pending.
++ *
++ * Return: true if a reset is pending, false otherwise.
++ */
++static inline bool panthor_device_reset_is_pending(struct panthor_device *ptdev)
++{
++	return atomic_read(&ptdev->reset.pending) != 0;
++}
++
++int panthor_device_mmap_io(struct panthor_device *ptdev,
++			   struct vm_area_struct *vma);
++
++int panthor_device_resume(struct device *dev);
++int panthor_device_suspend(struct device *dev);
++
++enum drm_panthor_exception_type {
++	DRM_PANTHOR_EXCEPTION_OK = 0x00,
++	DRM_PANTHOR_EXCEPTION_TERMINATED = 0x04,
++	DRM_PANTHOR_EXCEPTION_KABOOM = 0x05,
++	DRM_PANTHOR_EXCEPTION_EUREKA = 0x06,
++	DRM_PANTHOR_EXCEPTION_ACTIVE = 0x08,
++	DRM_PANTHOR_EXCEPTION_CS_RES_TERM = 0x0f,
++	DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT = 0x3f,
++	DRM_PANTHOR_EXCEPTION_CS_CONFIG_FAULT = 0x40,
++	DRM_PANTHOR_EXCEPTION_CS_ENDPOINT_FAULT = 0x44,
++	DRM_PANTHOR_EXCEPTION_CS_BUS_FAULT = 0x48,
++	DRM_PANTHOR_EXCEPTION_CS_INSTR_INVALID = 0x49,
++	DRM_PANTHOR_EXCEPTION_CS_CALL_STACK_OVERFLOW = 0x4a,
++	DRM_PANTHOR_EXCEPTION_CS_INHERIT_FAULT = 0x4b,
++	DRM_PANTHOR_EXCEPTION_INSTR_INVALID_PC = 0x50,
++	DRM_PANTHOR_EXCEPTION_INSTR_INVALID_ENC = 0x51,
++	DRM_PANTHOR_EXCEPTION_INSTR_BARRIER_FAULT = 0x55,
++	DRM_PANTHOR_EXCEPTION_DATA_INVALID_FAULT = 0x58,
++	DRM_PANTHOR_EXCEPTION_TILE_RANGE_FAULT = 0x59,
++	DRM_PANTHOR_EXCEPTION_ADDR_RANGE_FAULT = 0x5a,
++	DRM_PANTHOR_EXCEPTION_IMPRECISE_FAULT = 0x5b,
++	DRM_PANTHOR_EXCEPTION_OOM = 0x60,
++	DRM_PANTHOR_EXCEPTION_CSF_FW_INTERNAL_ERROR = 0x68,
++	DRM_PANTHOR_EXCEPTION_CSF_RES_EVICTION_TIMEOUT = 0x69,
++	DRM_PANTHOR_EXCEPTION_GPU_BUS_FAULT = 0x80,
++	DRM_PANTHOR_EXCEPTION_GPU_SHAREABILITY_FAULT = 0x88,
++	DRM_PANTHOR_EXCEPTION_SYS_SHAREABILITY_FAULT = 0x89,
++	DRM_PANTHOR_EXCEPTION_GPU_CACHEABILITY_FAULT = 0x8a,
++	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_0 = 0xc0,
++	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_1 = 0xc1,
++	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_2 = 0xc2,
++	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_3 = 0xc3,
++	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_4 = 0xc4,
++	DRM_PANTHOR_EXCEPTION_PERM_FAULT_0 = 0xc8,
++	DRM_PANTHOR_EXCEPTION_PERM_FAULT_1 = 0xc9,
++	DRM_PANTHOR_EXCEPTION_PERM_FAULT_2 = 0xca,
++	DRM_PANTHOR_EXCEPTION_PERM_FAULT_3 = 0xcb,
++	DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_1 = 0xd9,
++	DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_2 = 0xda,
++	DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_3 = 0xdb,
++	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_IN = 0xe0,
++	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT0 = 0xe4,
++	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT1 = 0xe5,
++	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT2 = 0xe6,
++	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT3 = 0xe7,
++	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_0 = 0xe8,
++	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_1 = 0xe9,
++	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_2 = 0xea,
++	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_3 = 0xeb,
++};
++
++/**
++ * panthor_exception_is_fault() - Checks if an exception is a fault.
++ *
++ * Return: true if the exception is a fault, false otherwise.
++ */
++static inline bool
++panthor_exception_is_fault(u32 exception_code)
++{
++	return exception_code > DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT;
++}
++
++const char *panthor_exception_name(struct panthor_device *ptdev,
++				   u32 exception_code);
++
++/**
++ * PANTHOR_IRQ_HANDLER() - Define interrupt handlers and the interrupt
++ * registration function.
++ *
++ * The boiler-plate to gracefully deal with shared interrupts is
++ * auto-generated. All you have to do is call PANTHOR_IRQ_HANDLER()
++ * just after the actual handler. The handler prototype is:
++ *
++ * void (*handler)(struct panthor_device *, u32 status);
++ */
++#define PANTHOR_IRQ_HANDLER(__name, __reg_prefix, __handler)					\
++static irqreturn_t panthor_ ## __name ## _irq_raw_handler(int irq, void *data)			\
++{												\
++	struct panthor_irq *pirq = data;							\
++	struct panthor_device *ptdev = pirq->ptdev;						\
++												\
++	if (atomic_read(&pirq->suspended))							\
++		return IRQ_NONE;								\
++	if (!gpu_read(ptdev, __reg_prefix ## _INT_STAT))					\
++		return IRQ_NONE;								\
++												\
++	gpu_write(ptdev, __reg_prefix ## _INT_MASK, 0);						\
++	return IRQ_WAKE_THREAD;									\
++}												\
++												\
++static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *data)		\
++{												\
++	struct panthor_irq *pirq = data;							\
++	struct panthor_device *ptdev = pirq->ptdev;						\
++	irqreturn_t ret = IRQ_NONE;								\
++												\
++	while (true) {										\
++		u32 status = gpu_read(ptdev, __reg_prefix ## _INT_RAWSTAT) & pirq->mask;	\
++												\
++		if (!status)									\
++			break;									\
++												\
++		gpu_write(ptdev, __reg_prefix ## _INT_CLEAR, status);				\
++												\
++		__handler(ptdev, status);							\
++		ret = IRQ_HANDLED;								\
++	}											\
++												\
++	if (!atomic_read(&pirq->suspended))							\
++		gpu_write(ptdev, __reg_prefix ## _INT_MASK, pirq->mask);			\
++												\
++	return ret;										\
++}												\
++												\
++static inline void panthor_ ## __name ## _irq_suspend(struct panthor_irq *pirq)			\
++{												\
++	int cookie;										\
++												\
++	atomic_set(&pirq->suspended, true);							\
++												\
++	if (drm_dev_enter(&pirq->ptdev->base, &cookie)) {					\
++		gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, 0);				\
++		synchronize_irq(pirq->irq);							\
++		drm_dev_exit(cookie);								\
++	}											\
++												\
++	pirq->mask = 0;										\
++}												\
++												\
++static inline void panthor_ ## __name ## _irq_resume(struct panthor_irq *pirq, u32 mask)	\
++{												\
++	int cookie;										\
++												\
++	atomic_set(&pirq->suspended, false);							\
++	pirq->mask = mask;									\
++												\
++	if (drm_dev_enter(&pirq->ptdev->base, &cookie)) {					\
++		gpu_write(pirq->ptdev, __reg_prefix ## _INT_CLEAR, mask);			\
++		gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, mask);			\
++		drm_dev_exit(cookie);								\
++	}											\
++}												\
++												\
++static int panthor_request_ ## __name ## _irq(struct panthor_device *ptdev,			\
++					      struct panthor_irq *pirq,				\
++					      int irq, u32 mask)				\
++{												\
++	pirq->ptdev = ptdev;									\
++	pirq->irq = irq;									\
++	panthor_ ## __name ## _irq_resume(pirq, mask);						\
++												\
++	return devm_request_threaded_irq(ptdev->base.dev, irq,					\
++					 panthor_ ## __name ## _irq_raw_handler,		\
++					 panthor_ ## __name ## _irq_threaded_handler,		\
++					 IRQF_SHARED, KBUILD_MODNAME "-" # __name,		\
++					 pirq);							\
++}
++
++/**
++ * panthor_device_mmio_offset() - Turn a user MMIO offset into a kernel one
++ * @offset: Offset to convert.
++ *
++ * With 32-bit systems being limited by the 32-bit representation of mmap2's
++ * pgoffset field, we need to make the MMIO offset arch specific. This function
++ * converts a user MMIO offset into something the kernel driver understands.
++ *
++ * If the kernel and userspace architecture match, the offset is unchanged. If
++ * the kernel is 64-bit and userspace is 32-bit, the offset is adjusted to match
++ * 64-bit offsets. 32-bit kernel with 64-bit userspace is impossible.
++ *
++ * Return: Adjusted offset.
++ */
++static inline u64 panthor_device_mmio_offset(u64 offset)
++{
++#ifdef CONFIG_ARM64
++	if (test_tsk_thread_flag(current, TIF_32BIT))
++		offset += DRM_PANTHOR_USER_MMIO_OFFSET_64BIT - DRM_PANTHOR_USER_MMIO_OFFSET_32BIT;
++#endif
++
++	return offset;
++}
++
++extern struct workqueue_struct *panthor_cleanup_wq;
++
++#endif
diff --git a/patches-6.6/034-33-v6.10-drm-panthor-Add-the-GPU-logical-block.patch b/patches-6.6/034-33-v6.10-drm-panthor-Add-the-GPU-logical-block.patch
new file mode 100644
index 0000000..dbc1ba9
--- /dev/null
+++ b/patches-6.6/034-33-v6.10-drm-panthor-Add-the-GPU-logical-block.patch
@@ -0,0 +1,593 @@
+From 5cd894e258c4b0b92b9b475309cea244e590d194 Mon Sep 17 00:00:00 2001
+From: Boris Brezillon <boris.brezillon@collabora.com>
+Date: Thu, 29 Feb 2024 17:22:18 +0100
+Subject: [PATCH] drm/panthor: Add the GPU logical block
+
+Handles everything that's not related to the FW, the MMU or the
+scheduler. This is the block dealing with the GPU property retrieval,
+the GPU block power on/off logic, and some global operations, like
+global cache flushing.
+
+v6:
+- Add Maxime's and Heiko's acks
+
+v5:
+- Fix GPU_MODEL() kernel doc
+- Fix test in panthor_gpu_block_power_off()
+- Add Steve's R-b
+
+v4:
+- Expose CORE_FEATURES through DEV_QUERY
+
+v3:
+- Add acks for the MIT/GPL2 relicensing
+- Use macros to extract GPU ID info
+- Make sure we reset clear pending_reqs bits when wait_event_timeout()
+  times out but the corresponding bit is cleared in GPU_INT_RAWSTAT
+  (can happen if the IRQ is masked or HW takes to long to call the IRQ
+  handler)
+- GPU_MODEL now takes separate arch and product majors to be more
+  readable.
+- Drop GPU_IRQ_MCU_STATUS_CHANGED from interrupt mask.
+- Handle GPU_IRQ_PROTM_FAULT correctly (don't output registers that are
+  not updated for protected interrupts).
+- Minor code tidy ups
+
+Cc: Alexey Sheplyakov <asheplyakov@basealt.ru> # MIT+GPL2 relicensing
+Co-developed-by: Steven Price <steven.price@arm.com>
+Signed-off-by: Steven Price <steven.price@arm.com>
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Acked-by: Steven Price <steven.price@arm.com> # MIT+GPL2 relicensing,Arm
+Acked-by: Grant Likely <grant.likely@linaro.org> # MIT+GPL2 relicensing,Linaro
+Acked-by: Boris Brezillon <boris.brezillon@collabora.com> # MIT+GPL2 relicensing,Collabora
+Reviewed-by: Steven Price <steven.price@arm.com>
+Acked-by: Maxime Ripard <mripard@kernel.org>
+Acked-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-5-boris.brezillon@collabora.com
+---
+ drivers/gpu/drm/panthor/panthor_gpu.c | 482 ++++++++++++++++++++++++++
+ drivers/gpu/drm/panthor/panthor_gpu.h |  52 +++
+ 2 files changed, 534 insertions(+)
+ create mode 100644 drivers/gpu/drm/panthor/panthor_gpu.c
+ create mode 100644 drivers/gpu/drm/panthor/panthor_gpu.h
+
+--- /dev/null
++++ b/drivers/gpu/drm/panthor/panthor_gpu.c
+@@ -0,0 +1,482 @@
++// SPDX-License-Identifier: GPL-2.0 or MIT
++/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
++/* Copyright 2019 Linaro, Ltd., Rob Herring <robh@kernel.org> */
++/* Copyright 2019 Collabora ltd. */
++
++#include <linux/bitfield.h>
++#include <linux/bitmap.h>
++#include <linux/delay.h>
++#include <linux/dma-mapping.h>
++#include <linux/interrupt.h>
++#include <linux/io.h>
++#include <linux/iopoll.h>
++#include <linux/platform_device.h>
++#include <linux/pm_runtime.h>
++
++#include <drm/drm_drv.h>
++#include <drm/drm_managed.h>
++
++#include "panthor_device.h"
++#include "panthor_gpu.h"
++#include "panthor_regs.h"
++
++/**
++ * struct panthor_gpu - GPU block management data.
++ */
++struct panthor_gpu {
++	/** @irq: GPU irq. */
++	struct panthor_irq irq;
++
++	/** @reqs_lock: Lock protecting access to pending_reqs. */
++	spinlock_t reqs_lock;
++
++	/** @pending_reqs: Pending GPU requests. */
++	u32 pending_reqs;
++
++	/** @reqs_acked: GPU request wait queue. */
++	wait_queue_head_t reqs_acked;
++};
++
++/**
++ * struct panthor_model - GPU model description
++ */
++struct panthor_model {
++	/** @name: Model name. */
++	const char *name;
++
++	/** @arch_major: Major version number of architecture. */
++	u8 arch_major;
++
++	/** @product_major: Major version number of product. */
++	u8 product_major;
++};
++
++/**
++ * GPU_MODEL() - Define a GPU model. A GPU product can be uniquely identified
++ * by a combination of the major architecture version and the major product
++ * version.
++ * @_name: Name for the GPU model.
++ * @_arch_major: Architecture major.
++ * @_product_major: Product major.
++ */
++#define GPU_MODEL(_name, _arch_major, _product_major) \
++{\
++	.name = __stringify(_name),				\
++	.arch_major = _arch_major,				\
++	.product_major = _product_major,			\
++}
++
++static const struct panthor_model gpu_models[] = {
++	GPU_MODEL(g610, 10, 7),
++	{},
++};
++
++#define GPU_INTERRUPTS_MASK	\
++	(GPU_IRQ_FAULT | \
++	 GPU_IRQ_PROTM_FAULT | \
++	 GPU_IRQ_RESET_COMPLETED | \
++	 GPU_IRQ_CLEAN_CACHES_COMPLETED)
++
++static void panthor_gpu_init_info(struct panthor_device *ptdev)
++{
++	const struct panthor_model *model;
++	u32 arch_major, product_major;
++	u32 major, minor, status;
++	unsigned int i;
++
++	ptdev->gpu_info.gpu_id = gpu_read(ptdev, GPU_ID);
++	ptdev->gpu_info.csf_id = gpu_read(ptdev, GPU_CSF_ID);
++	ptdev->gpu_info.gpu_rev = gpu_read(ptdev, GPU_REVID);
++	ptdev->gpu_info.core_features = gpu_read(ptdev, GPU_CORE_FEATURES);
++	ptdev->gpu_info.l2_features = gpu_read(ptdev, GPU_L2_FEATURES);
++	ptdev->gpu_info.tiler_features = gpu_read(ptdev, GPU_TILER_FEATURES);
++	ptdev->gpu_info.mem_features = gpu_read(ptdev, GPU_MEM_FEATURES);
++	ptdev->gpu_info.mmu_features = gpu_read(ptdev, GPU_MMU_FEATURES);
++	ptdev->gpu_info.thread_features = gpu_read(ptdev, GPU_THREAD_FEATURES);
++	ptdev->gpu_info.max_threads = gpu_read(ptdev, GPU_THREAD_MAX_THREADS);
++	ptdev->gpu_info.thread_max_workgroup_size = gpu_read(ptdev, GPU_THREAD_MAX_WORKGROUP_SIZE);
++	ptdev->gpu_info.thread_max_barrier_size = gpu_read(ptdev, GPU_THREAD_MAX_BARRIER_SIZE);
++	ptdev->gpu_info.coherency_features = gpu_read(ptdev, GPU_COHERENCY_FEATURES);
++	for (i = 0; i < 4; i++)
++		ptdev->gpu_info.texture_features[i] = gpu_read(ptdev, GPU_TEXTURE_FEATURES(i));
++
++	ptdev->gpu_info.as_present = gpu_read(ptdev, GPU_AS_PRESENT);
++
++	ptdev->gpu_info.shader_present = gpu_read(ptdev, GPU_SHADER_PRESENT_LO);
++	ptdev->gpu_info.shader_present |= (u64)gpu_read(ptdev, GPU_SHADER_PRESENT_HI) << 32;
++
++	ptdev->gpu_info.tiler_present = gpu_read(ptdev, GPU_TILER_PRESENT_LO);
++	ptdev->gpu_info.tiler_present |= (u64)gpu_read(ptdev, GPU_TILER_PRESENT_HI) << 32;
++
++	ptdev->gpu_info.l2_present = gpu_read(ptdev, GPU_L2_PRESENT_LO);
++	ptdev->gpu_info.l2_present |= (u64)gpu_read(ptdev, GPU_L2_PRESENT_HI) << 32;
++
++	arch_major = GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id);
++	product_major = GPU_PROD_MAJOR(ptdev->gpu_info.gpu_id);
++	major = GPU_VER_MAJOR(ptdev->gpu_info.gpu_id);
++	minor = GPU_VER_MINOR(ptdev->gpu_info.gpu_id);
++	status = GPU_VER_STATUS(ptdev->gpu_info.gpu_id);
++
++	for (model = gpu_models; model->name; model++) {
++		if (model->arch_major == arch_major &&
++		    model->product_major == product_major)
++			break;
++	}
++
++	drm_info(&ptdev->base,
++		 "mali-%s id 0x%x major 0x%x minor 0x%x status 0x%x",
++		 model->name ?: "unknown", ptdev->gpu_info.gpu_id >> 16,
++		 major, minor, status);
++
++	drm_info(&ptdev->base,
++		 "Features: L2:%#x Tiler:%#x Mem:%#x MMU:%#x AS:%#x",
++		 ptdev->gpu_info.l2_features,
++		 ptdev->gpu_info.tiler_features,
++		 ptdev->gpu_info.mem_features,
++		 ptdev->gpu_info.mmu_features,
++		 ptdev->gpu_info.as_present);
++
++	drm_info(&ptdev->base,
++		 "shader_present=0x%0llx l2_present=0x%0llx tiler_present=0x%0llx",
++		 ptdev->gpu_info.shader_present, ptdev->gpu_info.l2_present,
++		 ptdev->gpu_info.tiler_present);
++}
++
++static void panthor_gpu_irq_handler(struct panthor_device *ptdev, u32 status)
++{
++	if (status & GPU_IRQ_FAULT) {
++		u32 fault_status = gpu_read(ptdev, GPU_FAULT_STATUS);
++		u64 address = ((u64)gpu_read(ptdev, GPU_FAULT_ADDR_HI) << 32) |
++			      gpu_read(ptdev, GPU_FAULT_ADDR_LO);
++
++		drm_warn(&ptdev->base, "GPU Fault 0x%08x (%s) at 0x%016llx\n",
++			 fault_status, panthor_exception_name(ptdev, fault_status & 0xFF),
++			 address);
++	}
++	if (status & GPU_IRQ_PROTM_FAULT)
++		drm_warn(&ptdev->base, "GPU Fault in protected mode\n");
++
++	spin_lock(&ptdev->gpu->reqs_lock);
++	if (status & ptdev->gpu->pending_reqs) {
++		ptdev->gpu->pending_reqs &= ~status;
++		wake_up_all(&ptdev->gpu->reqs_acked);
++	}
++	spin_unlock(&ptdev->gpu->reqs_lock);
++}
++PANTHOR_IRQ_HANDLER(gpu, GPU, panthor_gpu_irq_handler);
++
++/**
++ * panthor_gpu_unplug() - Called when the GPU is unplugged.
++ * @ptdev: Device to unplug.
++ */
++void panthor_gpu_unplug(struct panthor_device *ptdev)
++{
++	unsigned long flags;
++
++	/* Make sure the IRQ handler is not running after that point. */
++	panthor_gpu_irq_suspend(&ptdev->gpu->irq);
++
++	/* Wake-up all waiters. */
++	spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
++	ptdev->gpu->pending_reqs = 0;
++	wake_up_all(&ptdev->gpu->reqs_acked);
++	spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags);
++}
++
++/**
++ * panthor_gpu_init() - Initialize the GPU block
++ * @ptdev: Device.
++ *
++ * Return: 0 on success, a negative error code otherwise.
++ */
++int panthor_gpu_init(struct panthor_device *ptdev)
++{
++	struct panthor_gpu *gpu;
++	u32 pa_bits;
++	int ret, irq;
++
++	gpu = drmm_kzalloc(&ptdev->base, sizeof(*gpu), GFP_KERNEL);
++	if (!gpu)
++		return -ENOMEM;
++
++	spin_lock_init(&gpu->reqs_lock);
++	init_waitqueue_head(&gpu->reqs_acked);
++	ptdev->gpu = gpu;
++	panthor_gpu_init_info(ptdev);
++
++	dma_set_max_seg_size(ptdev->base.dev, UINT_MAX);
++	pa_bits = GPU_MMU_FEATURES_PA_BITS(ptdev->gpu_info.mmu_features);
++	ret = dma_set_mask_and_coherent(ptdev->base.dev, DMA_BIT_MASK(pa_bits));
++	if (ret)
++		return ret;
++
++	irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "gpu");
++	if (irq <= 0)
++		return ret;
++
++	ret = panthor_request_gpu_irq(ptdev, &ptdev->gpu->irq, irq, GPU_INTERRUPTS_MASK);
++	if (ret)
++		return ret;
++
++	return 0;
++}
++
++/**
++ * panthor_gpu_block_power_off() - Power-off a specific block of the GPU
++ * @ptdev: Device.
++ * @blk_name: Block name.
++ * @pwroff_reg: Power-off register for this block.
++ * @pwrtrans_reg: Power transition register for this block.
++ * @mask: Sub-elements to power-off.
++ * @timeout_us: Timeout in microseconds.
++ *
++ * Return: 0 on success, a negative error code otherwise.
++ */
++int panthor_gpu_block_power_off(struct panthor_device *ptdev,
++				const char *blk_name,
++				u32 pwroff_reg, u32 pwrtrans_reg,
++				u64 mask, u32 timeout_us)
++{
++	u32 val, i;
++	int ret;
++
++	for (i = 0; i < 2; i++) {
++		u32 mask32 = mask >> (i * 32);
++
++		if (!mask32)
++			continue;
++
++		ret = readl_relaxed_poll_timeout(ptdev->iomem + pwrtrans_reg + (i * 4),
++						 val, !(mask32 & val),
++						 100, timeout_us);
++		if (ret) {
++			drm_err(&ptdev->base, "timeout waiting on %s:%llx power transition",
++				blk_name, mask);
++			return ret;
++		}
++	}
++
++	if (mask & GENMASK(31, 0))
++		gpu_write(ptdev, pwroff_reg, mask);
++
++	if (mask >> 32)
++		gpu_write(ptdev, pwroff_reg + 4, mask >> 32);
++
++	for (i = 0; i < 2; i++) {
++		u32 mask32 = mask >> (i * 32);
++
++		if (!mask32)
++			continue;
++
++		ret = readl_relaxed_poll_timeout(ptdev->iomem + pwrtrans_reg + (i * 4),
++						 val, !(mask32 & val),
++						 100, timeout_us);
++		if (ret) {
++			drm_err(&ptdev->base, "timeout waiting on %s:%llx power transition",
++				blk_name, mask);
++			return ret;
++		}
++	}
++
++	return 0;
++}
++
++/**
++ * panthor_gpu_block_power_on() - Power-on a specific block of the GPU
++ * @ptdev: Device.
++ * @blk_name: Block name.
++ * @pwron_reg: Power-on register for this block.
++ * @pwrtrans_reg: Power transition register for this block.
++ * @rdy_reg: Power transition ready register.
++ * @mask: Sub-elements to power-on.
++ * @timeout_us: Timeout in microseconds.
++ *
++ * Return: 0 on success, a negative error code otherwise.
++ */
++int panthor_gpu_block_power_on(struct panthor_device *ptdev,
++			       const char *blk_name,
++			       u32 pwron_reg, u32 pwrtrans_reg,
++			       u32 rdy_reg, u64 mask, u32 timeout_us)
++{
++	u32 val, i;
++	int ret;
++
++	for (i = 0; i < 2; i++) {
++		u32 mask32 = mask >> (i * 32);
++
++		if (!mask32)
++			continue;
++
++		ret = readl_relaxed_poll_timeout(ptdev->iomem + pwrtrans_reg + (i * 4),
++						 val, !(mask32 & val),
++						 100, timeout_us);
++		if (ret) {
++			drm_err(&ptdev->base, "timeout waiting on %s:%llx power transition",
++				blk_name, mask);
++			return ret;
++		}
++	}
++
++	if (mask & GENMASK(31, 0))
++		gpu_write(ptdev, pwron_reg, mask);
++
++	if (mask >> 32)
++		gpu_write(ptdev, pwron_reg + 4, mask >> 32);
++
++	for (i = 0; i < 2; i++) {
++		u32 mask32 = mask >> (i * 32);
++
++		if (!mask32)
++			continue;
++
++		ret = readl_relaxed_poll_timeout(ptdev->iomem + rdy_reg + (i * 4),
++						 val, (mask32 & val) == mask32,
++						 100, timeout_us);
++		if (ret) {
++			drm_err(&ptdev->base, "timeout waiting on %s:%llx readyness",
++				blk_name, mask);
++			return ret;
++		}
++	}
++
++	return 0;
++}
++
++/**
++ * panthor_gpu_l2_power_on() - Power-on the L2-cache
++ * @ptdev: Device.
++ *
++ * Return: 0 on success, a negative error code otherwise.
++ */
++int panthor_gpu_l2_power_on(struct panthor_device *ptdev)
++{
++	if (ptdev->gpu_info.l2_present != 1) {
++		/*
++		 * Only support one core group now.
++		 * ~(l2_present - 1) unsets all bits in l2_present except
++		 * the bottom bit. (l2_present - 2) has all the bits in
++		 * the first core group set. AND them together to generate
++		 * a mask of cores in the first core group.
++		 */
++		u64 core_mask = ~(ptdev->gpu_info.l2_present - 1) &
++				(ptdev->gpu_info.l2_present - 2);
++		drm_info_once(&ptdev->base, "using only 1st core group (%lu cores from %lu)\n",
++			      hweight64(core_mask),
++			      hweight64(ptdev->gpu_info.shader_present));
++	}
++
++	return panthor_gpu_power_on(ptdev, L2, 1, 20000);
++}
++
++/**
++ * panthor_gpu_flush_caches() - Flush caches
++ * @ptdev: Device.
++ * @l2: L2 flush type.
++ * @lsc: LSC flush type.
++ * @other: Other flush type.
++ *
++ * Return: 0 on success, a negative error code otherwise.
++ */
++int panthor_gpu_flush_caches(struct panthor_device *ptdev,
++			     u32 l2, u32 lsc, u32 other)
++{
++	bool timedout = false;
++	unsigned long flags;
++
++	spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
++	if (!drm_WARN_ON(&ptdev->base,
++			 ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED)) {
++		ptdev->gpu->pending_reqs |= GPU_IRQ_CLEAN_CACHES_COMPLETED;
++		gpu_write(ptdev, GPU_CMD, GPU_FLUSH_CACHES(l2, lsc, other));
++	}
++	spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags);
++
++	if (!wait_event_timeout(ptdev->gpu->reqs_acked,
++				!(ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED),
++				msecs_to_jiffies(100))) {
++		spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
++		if ((ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED) != 0 &&
++		    !(gpu_read(ptdev, GPU_INT_RAWSTAT) & GPU_IRQ_CLEAN_CACHES_COMPLETED))
++			timedout = true;
++		else
++			ptdev->gpu->pending_reqs &= ~GPU_IRQ_CLEAN_CACHES_COMPLETED;
++		spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags);
++	}
++
++	if (timedout) {
++		drm_err(&ptdev->base, "Flush caches timeout");
++		return -ETIMEDOUT;
++	}
++
++	return 0;
++}
++
++/**
++ * panthor_gpu_soft_reset() - Issue a soft-reset
++ * @ptdev: Device.
++ *
++ * Return: 0 on success, a negative error code otherwise.
++ */
++int panthor_gpu_soft_reset(struct panthor_device *ptdev)
++{
++	bool timedout = false;
++	unsigned long flags;
++
++	spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
++	if (!drm_WARN_ON(&ptdev->base,
++			 ptdev->gpu->pending_reqs & GPU_IRQ_RESET_COMPLETED)) {
++		ptdev->gpu->pending_reqs |= GPU_IRQ_RESET_COMPLETED;
++		gpu_write(ptdev, GPU_INT_CLEAR, GPU_IRQ_RESET_COMPLETED);
++		gpu_write(ptdev, GPU_CMD, GPU_SOFT_RESET);
++	}
++	spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags);
++
++	if (!wait_event_timeout(ptdev->gpu->reqs_acked,
++				!(ptdev->gpu->pending_reqs & GPU_IRQ_RESET_COMPLETED),
++				msecs_to_jiffies(100))) {
++		spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
++		if ((ptdev->gpu->pending_reqs & GPU_IRQ_RESET_COMPLETED) != 0 &&
++		    !(gpu_read(ptdev, GPU_INT_RAWSTAT) & GPU_IRQ_RESET_COMPLETED))
++			timedout = true;
++		else
++			ptdev->gpu->pending_reqs &= ~GPU_IRQ_RESET_COMPLETED;
++		spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags);
++	}
++
++	if (timedout) {
++		drm_err(&ptdev->base, "Soft reset timeout");
++		return -ETIMEDOUT;
++	}
++
++	return 0;
++}
++
++/**
++ * panthor_gpu_suspend() - Suspend the GPU block.
++ * @ptdev: Device.
++ *
++ * Suspend the GPU irq. This should be called last in the suspend procedure,
++ * after all other blocks have been suspented.
++ */
++void panthor_gpu_suspend(struct panthor_device *ptdev)
++{
++	/*
++	 * It may be preferable to simply power down the L2, but for now just
++	 * soft-reset which will leave the L2 powered down.
++	 */
++	panthor_gpu_soft_reset(ptdev);
++	panthor_gpu_irq_suspend(&ptdev->gpu->irq);
++}
++
++/**
++ * panthor_gpu_resume() - Resume the GPU block.
++ * @ptdev: Device.
++ *
++ * Resume the IRQ handler and power-on the L2-cache.
++ * The FW takes care of powering the other blocks.
++ */
++void panthor_gpu_resume(struct panthor_device *ptdev)
++{
++	panthor_gpu_irq_resume(&ptdev->gpu->irq, GPU_INTERRUPTS_MASK);
++	panthor_gpu_l2_power_on(ptdev);
++}
+--- /dev/null
++++ b/drivers/gpu/drm/panthor/panthor_gpu.h
+@@ -0,0 +1,52 @@
++/* SPDX-License-Identifier: GPL-2.0 or MIT */
++/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
++/* Copyright 2019 Collabora ltd. */
++
++#ifndef __PANTHOR_GPU_H__
++#define __PANTHOR_GPU_H__
++
++struct panthor_device;
++
++int panthor_gpu_init(struct panthor_device *ptdev);
++void panthor_gpu_unplug(struct panthor_device *ptdev);
++void panthor_gpu_suspend(struct panthor_device *ptdev);
++void panthor_gpu_resume(struct panthor_device *ptdev);
++
++int panthor_gpu_block_power_on(struct panthor_device *ptdev,
++			       const char *blk_name,
++			       u32 pwron_reg, u32 pwrtrans_reg,
++			       u32 rdy_reg, u64 mask, u32 timeout_us);
++int panthor_gpu_block_power_off(struct panthor_device *ptdev,
++				const char *blk_name,
++				u32 pwroff_reg, u32 pwrtrans_reg,
++				u64 mask, u32 timeout_us);
++
++/**
++ * panthor_gpu_power_on() - Power on the GPU block.
++ *
++ * Return: 0 on success, a negative error code otherwise.
++ */
++#define panthor_gpu_power_on(ptdev, type, mask, timeout_us) \
++	panthor_gpu_block_power_on(ptdev, #type, \
++				  type ## _PWRON_LO, \
++				  type ## _PWRTRANS_LO, \
++				  type ## _READY_LO, \
++				  mask, timeout_us)
++
++/**
++ * panthor_gpu_power_off() - Power off the GPU block.
++ *
++ * Return: 0 on success, a negative error code otherwise.
++ */
++#define panthor_gpu_power_off(ptdev, type, mask, timeout_us) \
++	panthor_gpu_block_power_off(ptdev, #type, \
++				   type ## _PWROFF_LO, \
++				   type ## _PWRTRANS_LO, \
++				   mask, timeout_us)
++
++int panthor_gpu_l2_power_on(struct panthor_device *ptdev);
++int panthor_gpu_flush_caches(struct panthor_device *ptdev,
++			     u32 l2, u32 lsc, u32 other);
++int panthor_gpu_soft_reset(struct panthor_device *ptdev);
++
++#endif
diff --git a/patches-6.6/034-34-v6.10-drm-panthor-Add-GEM-logical-block.patch b/patches-6.6/034-34-v6.10-drm-panthor-Add-GEM-logical-block.patch
new file mode 100644
index 0000000..dec21e3
--- /dev/null
+++ b/patches-6.6/034-34-v6.10-drm-panthor-Add-GEM-logical-block.patch
@@ -0,0 +1,426 @@
+From 8a1cc07578bf42d85f008316873d710ff684dd29 Mon Sep 17 00:00:00 2001
+From: Boris Brezillon <boris.brezillon@collabora.com>
+Date: Thu, 29 Feb 2024 17:22:19 +0100
+Subject: [PATCH] drm/panthor: Add GEM logical block
+
+Anything relating to GEM object management is placed here. Nothing
+particularly interesting here, given the implementation is based on
+drm_gem_shmem_object, which is doing most of the work.
+
+v6:
+- Add Maxime's and Heiko's acks
+- Return a page-aligned BO size to userspace when creating a BO
+- Keep header inclusion alphabetically ordered
+
+v5:
+- Add Liviu's and Steve's R-b
+
+v4:
+- Force kernel BOs to be GPU mapped
+- Make panthor_kernel_bo_destroy() robust against ERR/NULL BO pointers
+  to simplify the call sites
+
+v3:
+- Add acks for the MIT/GPL2 relicensing
+- Provide a panthor_kernel_bo abstraction for buffer objects managed by
+  the kernel (will replace panthor_fw_mem and be used everywhere we were
+  using panthor_gem_create_and_map() before)
+- Adjust things to match drm_gpuvm changes
+- Change return of panthor_gem_create_with_handle() to int
+
+Co-developed-by: Steven Price <steven.price@arm.com>
+Signed-off-by: Steven Price <steven.price@arm.com>
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Acked-by: Steven Price <steven.price@arm.com> # MIT+GPL2 relicensing,Arm
+Acked-by: Grant Likely <grant.likely@linaro.org> # MIT+GPL2 relicensing,Linaro
+Acked-by: Boris Brezillon <boris.brezillon@collabora.com> # MIT+GPL2 relicensing,Collabora
+Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
+Reviewed-by: Steven Price <steven.price@arm.com>
+Acked-by: Maxime Ripard <mripard@kernel.org>
+Acked-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-6-boris.brezillon@collabora.com
+---
+ drivers/gpu/drm/panthor/panthor_gem.c | 230 ++++++++++++++++++++++++++
+ drivers/gpu/drm/panthor/panthor_gem.h | 142 ++++++++++++++++
+ 2 files changed, 372 insertions(+)
+ create mode 100644 drivers/gpu/drm/panthor/panthor_gem.c
+ create mode 100644 drivers/gpu/drm/panthor/panthor_gem.h
+
+--- /dev/null
++++ b/drivers/gpu/drm/panthor/panthor_gem.c
+@@ -0,0 +1,230 @@
++// SPDX-License-Identifier: GPL-2.0 or MIT
++/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
++/* Copyright 2023 Collabora ltd. */
++
++#include <linux/dma-buf.h>
++#include <linux/dma-mapping.h>
++#include <linux/err.h>
++#include <linux/slab.h>
++
++#include <drm/panthor_drm.h>
++
++#include "panthor_device.h"
++#include "panthor_gem.h"
++#include "panthor_mmu.h"
++
++static void panthor_gem_free_object(struct drm_gem_object *obj)
++{
++	struct panthor_gem_object *bo = to_panthor_bo(obj);
++	struct drm_gem_object *vm_root_gem = bo->exclusive_vm_root_gem;
++
++	drm_gem_free_mmap_offset(&bo->base.base);
++	mutex_destroy(&bo->gpuva_list_lock);
++	drm_gem_shmem_free(&bo->base);
++	drm_gem_object_put(vm_root_gem);
++}
++
++/**
++ * panthor_kernel_bo_destroy() - Destroy a kernel buffer object
++ * @vm: The VM this BO was mapped to.
++ * @bo: Kernel buffer object to destroy. If NULL or an ERR_PTR(), the destruction
++ * is skipped.
++ */
++void panthor_kernel_bo_destroy(struct panthor_vm *vm,
++			       struct panthor_kernel_bo *bo)
++{
++	int ret;
++
++	if (IS_ERR_OR_NULL(bo))
++		return;
++
++	panthor_kernel_bo_vunmap(bo);
++
++	if (drm_WARN_ON(bo->obj->dev,
++			to_panthor_bo(bo->obj)->exclusive_vm_root_gem != panthor_vm_root_gem(vm)))
++		goto out_free_bo;
++
++	ret = panthor_vm_unmap_range(vm, bo->va_node.start,
++				     panthor_kernel_bo_size(bo));
++	if (ret)
++		goto out_free_bo;
++
++	panthor_vm_free_va(vm, &bo->va_node);
++	drm_gem_object_put(bo->obj);
++
++out_free_bo:
++	kfree(bo);
++}
++
++/**
++ * panthor_kernel_bo_create() - Create and map a GEM object to a VM
++ * @ptdev: Device.
++ * @vm: VM to map the GEM to. If NULL, the kernel object is not GPU mapped.
++ * @size: Size of the buffer object.
++ * @bo_flags: Combination of drm_panthor_bo_flags flags.
++ * @vm_map_flags: Combination of drm_panthor_vm_bind_op_flags (only those
++ * that are related to map operations).
++ * @gpu_va: GPU address assigned when mapping to the VM.
++ * If gpu_va == PANTHOR_VM_KERNEL_AUTO_VA, the virtual address will be
++ * automatically allocated.
++ *
++ * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
++ */
++struct panthor_kernel_bo *
++panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm,
++			 size_t size, u32 bo_flags, u32 vm_map_flags,
++			 u64 gpu_va)
++{
++	struct drm_gem_shmem_object *obj;
++	struct panthor_kernel_bo *kbo;
++	struct panthor_gem_object *bo;
++	int ret;
++
++	if (drm_WARN_ON(&ptdev->base, !vm))
++		return ERR_PTR(-EINVAL);
++
++	kbo = kzalloc(sizeof(*kbo), GFP_KERNEL);
++	if (!kbo)
++		return ERR_PTR(-ENOMEM);
++
++	obj = drm_gem_shmem_create(&ptdev->base, size);
++	if (IS_ERR(obj)) {
++		ret = PTR_ERR(obj);
++		goto err_free_bo;
++	}
++
++	bo = to_panthor_bo(&obj->base);
++	size = obj->base.size;
++	kbo->obj = &obj->base;
++	bo->flags = bo_flags;
++
++	ret = panthor_vm_alloc_va(vm, gpu_va, size, &kbo->va_node);
++	if (ret)
++		goto err_put_obj;
++
++	ret = panthor_vm_map_bo_range(vm, bo, 0, size, kbo->va_node.start, vm_map_flags);
++	if (ret)
++		goto err_free_va;
++
++	bo->exclusive_vm_root_gem = panthor_vm_root_gem(vm);
++	drm_gem_object_get(bo->exclusive_vm_root_gem);
++	bo->base.base.resv = bo->exclusive_vm_root_gem->resv;
++	return kbo;
++
++err_free_va:
++	panthor_vm_free_va(vm, &kbo->va_node);
++
++err_put_obj:
++	drm_gem_object_put(&obj->base);
++
++err_free_bo:
++	kfree(kbo);
++	return ERR_PTR(ret);
++}
++
++static int panthor_gem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
++{
++	struct panthor_gem_object *bo = to_panthor_bo(obj);
++
++	/* Don't allow mmap on objects that have the NO_MMAP flag set. */
++	if (bo->flags & DRM_PANTHOR_BO_NO_MMAP)
++		return -EINVAL;
++
++	return drm_gem_shmem_object_mmap(obj, vma);
++}
++
++static struct dma_buf *
++panthor_gem_prime_export(struct drm_gem_object *obj, int flags)
++{
++	/* We can't export GEMs that have an exclusive VM. */
++	if (to_panthor_bo(obj)->exclusive_vm_root_gem)
++		return ERR_PTR(-EINVAL);
++
++	return drm_gem_prime_export(obj, flags);
++}
++
++static const struct drm_gem_object_funcs panthor_gem_funcs = {
++	.free = panthor_gem_free_object,
++	.print_info = drm_gem_shmem_object_print_info,
++	.pin = drm_gem_shmem_object_pin,
++	.unpin = drm_gem_shmem_object_unpin,
++	.get_sg_table = drm_gem_shmem_object_get_sg_table,
++	.vmap = drm_gem_shmem_object_vmap,
++	.vunmap = drm_gem_shmem_object_vunmap,
++	.mmap = panthor_gem_mmap,
++	.export = panthor_gem_prime_export,
++	.vm_ops = &drm_gem_shmem_vm_ops,
++};
++
++/**
++ * panthor_gem_create_object - Implementation of driver->gem_create_object.
++ * @ddev: DRM device
++ * @size: Size in bytes of the memory the object will reference
++ *
++ * This lets the GEM helpers allocate object structs for us, and keep
++ * our BO stats correct.
++ */
++struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t size)
++{
++	struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base);
++	struct panthor_gem_object *obj;
++
++	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
++	if (!obj)
++		return ERR_PTR(-ENOMEM);
++
++	obj->base.base.funcs = &panthor_gem_funcs;
++	obj->base.map_wc = !ptdev->coherent;
++	mutex_init(&obj->gpuva_list_lock);
++	drm_gem_gpuva_set_lock(&obj->base.base, &obj->gpuva_list_lock);
++
++	return &obj->base.base;
++}
++
++/**
++ * panthor_gem_create_with_handle() - Create a GEM object and attach it to a handle.
++ * @file: DRM file.
++ * @ddev: DRM device.
++ * @exclusive_vm: Exclusive VM. Not NULL if the GEM object can't be shared.
++ * @size: Size of the GEM object to allocate.
++ * @flags: Combination of drm_panthor_bo_flags flags.
++ * @handle: Pointer holding the handle pointing to the new GEM object.
++ *
++ * Return: Zero on success
++ */
++int
++panthor_gem_create_with_handle(struct drm_file *file,
++			       struct drm_device *ddev,
++			       struct panthor_vm *exclusive_vm,
++			       u64 *size, u32 flags, u32 *handle)
++{
++	int ret;
++	struct drm_gem_shmem_object *shmem;
++	struct panthor_gem_object *bo;
++
++	shmem = drm_gem_shmem_create(ddev, *size);
++	if (IS_ERR(shmem))
++		return PTR_ERR(shmem);
++
++	bo = to_panthor_bo(&shmem->base);
++	bo->flags = flags;
++
++	if (exclusive_vm) {
++		bo->exclusive_vm_root_gem = panthor_vm_root_gem(exclusive_vm);
++		drm_gem_object_get(bo->exclusive_vm_root_gem);
++		bo->base.base.resv = bo->exclusive_vm_root_gem->resv;
++	}
++
++	/*
++	 * Allocate an id of idr table where the obj is registered
++	 * and handle has the id what user can see.
++	 */
++	ret = drm_gem_handle_create(file, &shmem->base, handle);
++	if (!ret)
++		*size = bo->base.base.size;
++
++	/* drop reference from allocate - handle holds it now. */
++	drm_gem_object_put(&shmem->base);
++
++	return ret;
++}
+--- /dev/null
++++ b/drivers/gpu/drm/panthor/panthor_gem.h
+@@ -0,0 +1,142 @@
++/* SPDX-License-Identifier: GPL-2.0 or MIT */
++/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
++/* Copyright 2023 Collabora ltd. */
++
++#ifndef __PANTHOR_GEM_H__
++#define __PANTHOR_GEM_H__
++
++#include <drm/drm_gem_shmem_helper.h>
++#include <drm/drm_mm.h>
++
++#include <linux/iosys-map.h>
++#include <linux/rwsem.h>
++
++struct panthor_vm;
++
++/**
++ * struct panthor_gem_object - Driver specific GEM object.
++ */
++struct panthor_gem_object {
++	/** @base: Inherit from drm_gem_shmem_object. */
++	struct drm_gem_shmem_object base;
++
++	/**
++	 * @exclusive_vm_root_gem: Root GEM of the exclusive VM this GEM object
++	 * is attached to.
++	 *
++	 * If @exclusive_vm_root_gem != NULL, any attempt to bind the GEM to a
++	 * different VM will fail.
++	 *
++	 * All FW memory objects have this field set to the root GEM of the MCU
++	 * VM.
++	 */
++	struct drm_gem_object *exclusive_vm_root_gem;
++
++	/**
++	 * @gpuva_list_lock: Custom GPUVA lock.
++	 *
++	 * Used to protect insertion of drm_gpuva elements to the
++	 * drm_gem_object.gpuva.list list.
++	 *
++	 * We can't use the GEM resv for that, because drm_gpuva_link() is
++	 * called in a dma-signaling path, where we're not allowed to take
++	 * resv locks.
++	 */
++	struct mutex gpuva_list_lock;
++
++	/** @flags: Combination of drm_panthor_bo_flags flags. */
++	u32 flags;
++};
++
++/**
++ * struct panthor_kernel_bo - Kernel buffer object.
++ *
++ * These objects are only manipulated by the kernel driver and not
++ * directly exposed to the userspace. The GPU address of a kernel
++ * BO might be passed to userspace though.
++ */
++struct panthor_kernel_bo {
++	/**
++	 * @obj: The GEM object backing this kernel buffer object.
++	 */
++	struct drm_gem_object *obj;
++
++	/**
++	 * @va_node: VA space allocated to this GEM.
++	 */
++	struct drm_mm_node va_node;
++
++	/**
++	 * @kmap: Kernel CPU mapping of @gem.
++	 */
++	void *kmap;
++};
++
++static inline
++struct panthor_gem_object *to_panthor_bo(struct drm_gem_object *obj)
++{
++	return container_of(to_drm_gem_shmem_obj(obj), struct panthor_gem_object, base);
++}
++
++struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t size);
++
++struct drm_gem_object *
++panthor_gem_prime_import_sg_table(struct drm_device *ddev,
++				  struct dma_buf_attachment *attach,
++				  struct sg_table *sgt);
++
++int
++panthor_gem_create_with_handle(struct drm_file *file,
++			       struct drm_device *ddev,
++			       struct panthor_vm *exclusive_vm,
++			       u64 *size, u32 flags, uint32_t *handle);
++
++static inline u64
++panthor_kernel_bo_gpuva(struct panthor_kernel_bo *bo)
++{
++	return bo->va_node.start;
++}
++
++static inline size_t
++panthor_kernel_bo_size(struct panthor_kernel_bo *bo)
++{
++	return bo->obj->size;
++}
++
++static inline int
++panthor_kernel_bo_vmap(struct panthor_kernel_bo *bo)
++{
++	struct iosys_map map;
++	int ret;
++
++	if (bo->kmap)
++		return 0;
++
++	ret = drm_gem_vmap_unlocked(bo->obj, &map);
++	if (ret)
++		return ret;
++
++	bo->kmap = map.vaddr;
++	return 0;
++}
++
++static inline void
++panthor_kernel_bo_vunmap(struct panthor_kernel_bo *bo)
++{
++	if (bo->kmap) {
++		struct iosys_map map = IOSYS_MAP_INIT_VADDR(bo->kmap);
++
++		drm_gem_vunmap_unlocked(bo->obj, &map);
++		bo->kmap = NULL;
++	}
++}
++
++struct panthor_kernel_bo *
++panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm,
++			 size_t size, u32 bo_flags, u32 vm_map_flags,
++			 u64 gpu_va);
++
++void panthor_kernel_bo_destroy(struct panthor_vm *vm,
++			       struct panthor_kernel_bo *bo);
++
++#endif /* __PANTHOR_GEM_H__ */
diff --git a/patches-6.6/034-35-v6.10-drm-panthor-Add-the-devfreq-logical-block.patch b/patches-6.6/034-35-v6.10-drm-panthor-Add-the-devfreq-logical-block.patch
new file mode 100644
index 0000000..be7ac4a
--- /dev/null
+++ b/patches-6.6/034-35-v6.10-drm-panthor-Add-the-devfreq-logical-block.patch
@@ -0,0 +1,356 @@
+From fac9b22df4b1108f7fa5a087a77f922489861484 Mon Sep 17 00:00:00 2001
+From: Boris Brezillon <boris.brezillon@collabora.com>
+Date: Thu, 29 Feb 2024 17:22:20 +0100
+Subject: [PATCH] drm/panthor: Add the devfreq logical block
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Every thing related to devfreq in placed in panthor_devfreq.c, and
+helpers that can be called by other logical blocks are exposed through
+panthor_devfreq.h.
+
+This implementation is loosely based on the panfrost implementation,
+the only difference being that we don't count device users, because
+the idle/active state will be managed by the scheduler logic.
+
+v6:
+- Add Maxime's and Heiko's acks
+- Keep header inclusion alphabetically ordered
+
+v4:
+- Add Clément's A-b for the relicensing
+
+v3:
+- Add acks for the MIT/GPL2 relicensing
+
+v2:
+- Added in v2
+
+Cc: Clément Péron <peron.clem@gmail.com> # MIT+GPL2 relicensing
+Reviewed-by: Steven Price <steven.price@arm.com>
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Acked-by: Steven Price <steven.price@arm.com> # MIT+GPL2 relicensing,Arm
+Acked-by: Grant Likely <grant.likely@linaro.org> # MIT+GPL2 relicensing,Linaro
+Acked-by: Boris Brezillon <boris.brezillon@collabora.com> # MIT+GPL2 relicensing,Collabora
+Acked-by: Clément Péron <peron.clem@gmail.com> # MIT+GPL2 relicensing
+Acked-by: Maxime Ripard <mripard@kernel.org>
+Acked-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-7-boris.brezillon@collabora.com
+---
+ drivers/gpu/drm/panthor/panthor_devfreq.c | 283 ++++++++++++++++++++++
+ drivers/gpu/drm/panthor/panthor_devfreq.h |  21 ++
+ 2 files changed, 304 insertions(+)
+ create mode 100644 drivers/gpu/drm/panthor/panthor_devfreq.c
+ create mode 100644 drivers/gpu/drm/panthor/panthor_devfreq.h
+
+--- /dev/null
++++ b/drivers/gpu/drm/panthor/panthor_devfreq.c
+@@ -0,0 +1,283 @@
++// SPDX-License-Identifier: GPL-2.0 or MIT
++/* Copyright 2019 Collabora ltd. */
++
++#include <linux/clk.h>
++#include <linux/devfreq.h>
++#include <linux/devfreq_cooling.h>
++#include <linux/platform_device.h>
++#include <linux/pm_opp.h>
++
++#include <drm/drm_managed.h>
++
++#include "panthor_devfreq.h"
++#include "panthor_device.h"
++
++/**
++ * struct panthor_devfreq - Device frequency management
++ */
++struct panthor_devfreq {
++	/** @devfreq: devfreq device. */
++	struct devfreq *devfreq;
++
++	/** @gov_data: Governor data. */
++	struct devfreq_simple_ondemand_data gov_data;
++
++	/** @busy_time: Busy time. */
++	ktime_t busy_time;
++
++	/** @idle_time: Idle time. */
++	ktime_t idle_time;
++
++	/** @time_last_update: Last update time. */
++	ktime_t time_last_update;
++
++	/** @last_busy_state: True if the GPU was busy last time we updated the state. */
++	bool last_busy_state;
++
++	/*
++	 * @lock: Lock used to protect busy_time, idle_time, time_last_update and
++	 * last_busy_state.
++	 *
++	 * These fields can be accessed concurrently by panthor_devfreq_get_dev_status()
++	 * and panthor_devfreq_record_{busy,idle}().
++	 */
++	spinlock_t lock;
++};
++
++static void panthor_devfreq_update_utilization(struct panthor_devfreq *pdevfreq)
++{
++	ktime_t now, last;
++
++	now = ktime_get();
++	last = pdevfreq->time_last_update;
++
++	if (pdevfreq->last_busy_state)
++		pdevfreq->busy_time += ktime_sub(now, last);
++	else
++		pdevfreq->idle_time += ktime_sub(now, last);
++
++	pdevfreq->time_last_update = now;
++}
++
++static int panthor_devfreq_target(struct device *dev, unsigned long *freq,
++				  u32 flags)
++{
++	struct dev_pm_opp *opp;
++
++	opp = devfreq_recommended_opp(dev, freq, flags);
++	if (IS_ERR(opp))
++		return PTR_ERR(opp);
++	dev_pm_opp_put(opp);
++
++	return dev_pm_opp_set_rate(dev, *freq);
++}
++
++static void panthor_devfreq_reset(struct panthor_devfreq *pdevfreq)
++{
++	pdevfreq->busy_time = 0;
++	pdevfreq->idle_time = 0;
++	pdevfreq->time_last_update = ktime_get();
++}
++
++static int panthor_devfreq_get_dev_status(struct device *dev,
++					  struct devfreq_dev_status *status)
++{
++	struct panthor_device *ptdev = dev_get_drvdata(dev);
++	struct panthor_devfreq *pdevfreq = ptdev->devfreq;
++	unsigned long irqflags;
++
++	status->current_frequency = clk_get_rate(ptdev->clks.core);
++
++	spin_lock_irqsave(&pdevfreq->lock, irqflags);
++
++	panthor_devfreq_update_utilization(pdevfreq);
++
++	status->total_time = ktime_to_ns(ktime_add(pdevfreq->busy_time,
++						   pdevfreq->idle_time));
++
++	status->busy_time = ktime_to_ns(pdevfreq->busy_time);
++
++	panthor_devfreq_reset(pdevfreq);
++
++	spin_unlock_irqrestore(&pdevfreq->lock, irqflags);
++
++	drm_dbg(&ptdev->base, "busy %lu total %lu %lu %% freq %lu MHz\n",
++		status->busy_time, status->total_time,
++		status->busy_time / (status->total_time / 100),
++		status->current_frequency / 1000 / 1000);
++
++	return 0;
++}
++
++static struct devfreq_dev_profile panthor_devfreq_profile = {
++	.timer = DEVFREQ_TIMER_DELAYED,
++	.polling_ms = 50, /* ~3 frames */
++	.target = panthor_devfreq_target,
++	.get_dev_status = panthor_devfreq_get_dev_status,
++};
++
++int panthor_devfreq_init(struct panthor_device *ptdev)
++{
++	/* There's actually 2 regulators (mali and sram), but the OPP core only
++	 * supports one.
++	 *
++	 * We assume the sram regulator is coupled with the mali one and let
++	 * the coupling logic deal with voltage updates.
++	 */
++	static const char * const reg_names[] = { "mali", NULL };
++	struct thermal_cooling_device *cooling;
++	struct device *dev = ptdev->base.dev;
++	struct panthor_devfreq *pdevfreq;
++	struct dev_pm_opp *opp;
++	unsigned long cur_freq;
++	int ret;
++
++	pdevfreq = drmm_kzalloc(&ptdev->base, sizeof(*ptdev->devfreq), GFP_KERNEL);
++	if (!pdevfreq)
++		return -ENOMEM;
++
++	ptdev->devfreq = pdevfreq;
++
++	ret = devm_pm_opp_set_regulators(dev, reg_names);
++	if (ret) {
++		if (ret != -EPROBE_DEFER)
++			DRM_DEV_ERROR(dev, "Couldn't set OPP regulators\n");
++
++		return ret;
++	}
++
++	ret = devm_pm_opp_of_add_table(dev);
++	if (ret)
++		return ret;
++
++	spin_lock_init(&pdevfreq->lock);
++
++	panthor_devfreq_reset(pdevfreq);
++
++	cur_freq = clk_get_rate(ptdev->clks.core);
++
++	opp = devfreq_recommended_opp(dev, &cur_freq, 0);
++	if (IS_ERR(opp))
++		return PTR_ERR(opp);
++
++	panthor_devfreq_profile.initial_freq = cur_freq;
++
++	/* Regulator coupling only takes care of synchronizing/balancing voltage
++	 * updates, but the coupled regulator needs to be enabled manually.
++	 *
++	 * We use devm_regulator_get_enable_optional() and keep the sram supply
++	 * enabled until the device is removed, just like we do for the mali
++	 * supply, which is enabled when dev_pm_opp_set_opp(dev, opp) is called,
++	 * and disabled when the opp_table is torn down, using the devm action.
++	 *
++	 * If we really care about disabling regulators on suspend, we should:
++	 * - use devm_regulator_get_optional() here
++	 * - call dev_pm_opp_set_opp(dev, NULL) before leaving this function
++	 *   (this disables the regulator passed to the OPP layer)
++	 * - call dev_pm_opp_set_opp(dev, NULL) and
++	 *   regulator_disable(ptdev->regulators.sram) in
++	 *   panthor_devfreq_suspend()
++	 * - call dev_pm_opp_set_opp(dev, default_opp) and
++	 *   regulator_enable(ptdev->regulators.sram) in
++	 *   panthor_devfreq_resume()
++	 *
++	 * But without knowing if it's beneficial or not (in term of power
++	 * consumption), or how much it slows down the suspend/resume steps,
++	 * let's just keep regulators enabled for the device lifetime.
++	 */
++	ret = devm_regulator_get_enable_optional(dev, "sram");
++	if (ret && ret != -ENODEV) {
++		if (ret != -EPROBE_DEFER)
++			DRM_DEV_ERROR(dev, "Couldn't retrieve/enable sram supply\n");
++		return ret;
++	}
++
++	/*
++	 * Set the recommend OPP this will enable and configure the regulator
++	 * if any and will avoid a switch off by regulator_late_cleanup()
++	 */
++	ret = dev_pm_opp_set_opp(dev, opp);
++	if (ret) {
++		DRM_DEV_ERROR(dev, "Couldn't set recommended OPP\n");
++		return ret;
++	}
++
++	dev_pm_opp_put(opp);
++
++	/*
++	 * Setup default thresholds for the simple_ondemand governor.
++	 * The values are chosen based on experiments.
++	 */
++	pdevfreq->gov_data.upthreshold = 45;
++	pdevfreq->gov_data.downdifferential = 5;
++
++	pdevfreq->devfreq = devm_devfreq_add_device(dev, &panthor_devfreq_profile,
++						    DEVFREQ_GOV_SIMPLE_ONDEMAND,
++						    &pdevfreq->gov_data);
++	if (IS_ERR(pdevfreq->devfreq)) {
++		DRM_DEV_ERROR(dev, "Couldn't initialize GPU devfreq\n");
++		ret = PTR_ERR(pdevfreq->devfreq);
++		pdevfreq->devfreq = NULL;
++		return ret;
++	}
++
++	cooling = devfreq_cooling_em_register(pdevfreq->devfreq, NULL);
++	if (IS_ERR(cooling))
++		DRM_DEV_INFO(dev, "Failed to register cooling device\n");
++
++	return 0;
++}
++
++int panthor_devfreq_resume(struct panthor_device *ptdev)
++{
++	struct panthor_devfreq *pdevfreq = ptdev->devfreq;
++
++	if (!pdevfreq->devfreq)
++		return 0;
++
++	panthor_devfreq_reset(pdevfreq);
++
++	return devfreq_resume_device(pdevfreq->devfreq);
++}
++
++int panthor_devfreq_suspend(struct panthor_device *ptdev)
++{
++	struct panthor_devfreq *pdevfreq = ptdev->devfreq;
++
++	if (!pdevfreq->devfreq)
++		return 0;
++
++	return devfreq_suspend_device(pdevfreq->devfreq);
++}
++
++void panthor_devfreq_record_busy(struct panthor_device *ptdev)
++{
++	struct panthor_devfreq *pdevfreq = ptdev->devfreq;
++	unsigned long irqflags;
++
++	if (!pdevfreq->devfreq)
++		return;
++
++	spin_lock_irqsave(&pdevfreq->lock, irqflags);
++
++	panthor_devfreq_update_utilization(pdevfreq);
++	pdevfreq->last_busy_state = true;
++
++	spin_unlock_irqrestore(&pdevfreq->lock, irqflags);
++}
++
++void panthor_devfreq_record_idle(struct panthor_device *ptdev)
++{
++	struct panthor_devfreq *pdevfreq = ptdev->devfreq;
++	unsigned long irqflags;
++
++	if (!pdevfreq->devfreq)
++		return;
++
++	spin_lock_irqsave(&pdevfreq->lock, irqflags);
++
++	panthor_devfreq_update_utilization(pdevfreq);
++	pdevfreq->last_busy_state = false;
++
++	spin_unlock_irqrestore(&pdevfreq->lock, irqflags);
++}
+--- /dev/null
++++ b/drivers/gpu/drm/panthor/panthor_devfreq.h
+@@ -0,0 +1,21 @@
++/* SPDX-License-Identifier: GPL-2.0 or MIT */
++/* Copyright 2019 Collabora ltd. */
++
++#ifndef __PANTHOR_DEVFREQ_H__
++#define __PANTHOR_DEVFREQ_H__
++
++struct devfreq;
++struct thermal_cooling_device;
++
++struct panthor_device;
++struct panthor_devfreq;
++
++int panthor_devfreq_init(struct panthor_device *ptdev);
++
++int panthor_devfreq_resume(struct panthor_device *ptdev);
++int panthor_devfreq_suspend(struct panthor_device *ptdev);
++
++void panthor_devfreq_record_busy(struct panthor_device *ptdev);
++void panthor_devfreq_record_idle(struct panthor_device *ptdev);
++
++#endif /* __PANTHOR_DEVFREQ_H__ */
diff --git a/patches-6.6/034-36-v6.10-drm-panthor-Add-the-MMU-VM-logical-block.patch b/patches-6.6/034-36-v6.10-drm-panthor-Add-the-MMU-VM-logical-block.patch
new file mode 100644
index 0000000..097c09a
--- /dev/null
+++ b/patches-6.6/034-36-v6.10-drm-panthor-Add-the-MMU-VM-logical-block.patch
@@ -0,0 +1,2975 @@
+From 647810ec247641eb5aec8caef818919a4518a0b1 Mon Sep 17 00:00:00 2001
+From: Boris Brezillon <boris.brezillon@collabora.com>
+Date: Thu, 29 Feb 2024 17:22:21 +0100
+Subject: [PATCH] drm/panthor: Add the MMU/VM logical block
+
+MMU and VM management is related and placed in the same source file.
+
+Page table updates are delegated to the io-pgtable-arm driver that's in
+the iommu subsystem.
+
+The VM management logic is based on drm_gpuva_mgr, and is assuming the
+VA space is mostly managed by the usermode driver, except for a reserved
+portion of this VA-space that's used for kernel objects (like the heap
+contexts/chunks).
+
+Both asynchronous and synchronous VM operations are supported, and
+internal helpers are exposed to allow other logical blocks to map their
+buffers in the GPU VA space.
+
+There's one VM_BIND queue per-VM (meaning the Vulkan driver can only
+expose one sparse-binding queue), and this bind queue is managed with
+a 1:1 drm_sched_entity:drm_gpu_scheduler, such that each VM gets its own
+independent execution queue, avoiding VM operation serialization at the
+device level (things are still serialized at the VM level).
+
+The rest is just implementation details that are hopefully well explained
+in the documentation.
+
+v6:
+- Add Maxime's and Heiko's acks
+- Add Steve's R-b
+- Adjust the TRANSCFG value to account for SW VA space limitation on
+  32-bit systems
+- Keep header inclusion alphabetically ordered
+
+v5:
+- Fix a double panthor_vm_cleanup_op_ctx() call
+- Fix a race between panthor_vm_prepare_map_op_ctx() and
+  panthor_vm_bo_put()
+- Fix panthor_vm_pool_destroy_vm() kernel doc
+- Fix paddr adjustment in panthor_vm_map_pages()
+- Fix bo_offset calculation in panthor_vm_get_bo_for_va()
+
+v4:
+- Add an helper to return the VM state
+- Check drmm_mutex_init() return code
+- Remove the VM from the AS reclaim list when panthor_vm_active() is
+  called
+- Count the number of active VM users instead of considering there's
+  at most one user (several scheduling groups can point to the same
+  vM)
+- Pre-allocate a VMA object for unmap operations (unmaps can trigger
+  a sm_step_remap() call)
+- Check vm->root_page_table instead of vm->pgtbl_ops to detect if
+  the io-pgtable is trying to allocate the root page table
+- Don't memset() the va_node in panthor_vm_alloc_va(), make it a
+  caller requirement
+- Fix the kernel doc in a few places
+- Drop the panthor_vm::base offset constraint and modify
+  panthor_vm_put() to explicitly check for a NULL value
+- Fix unbalanced vm_bo refcount in panthor_gpuva_sm_step_remap()
+- Drop stale comments about the shared_bos list
+- Patch mmu_features::va_bits on 32-bit builds to reflect the
+  io_pgtable limitation and let the UMD know about it
+
+v3:
+- Add acks for the MIT/GPL2 relicensing
+- Propagate MMU faults to the scheduler
+- Move pages pinning/unpinning out of the dma_signalling path
+- Fix 32-bit support
+- Rework the user/kernel VA range calculation
+- Make the auto-VA range explicit (auto-VA range doesn't cover the full
+  kernel-VA range on the MCU VM)
+- Let callers of panthor_vm_alloc_va() allocate the drm_mm_node
+  (embedded in panthor_kernel_bo now)
+- Adjust things to match the latest drm_gpuvm changes (extobj tracking,
+  resv prep and more)
+- Drop the per-AS lock and use slots_lock (fixes a race on vm->as.id)
+- Set as.id to -1 when reusing an address space from the LRU list
+- Drop misleading comment about page faults
+- Remove check for irq being assigned in panthor_mmu_unplug()
+
+Co-developed-by: Steven Price <steven.price@arm.com>
+Signed-off-by: Steven Price <steven.price@arm.com>
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Acked-by: Steven Price <steven.price@arm.com> # MIT+GPL2 relicensing,Arm
+Acked-by: Grant Likely <grant.likely@linaro.org> # MIT+GPL2 relicensing,Linaro
+Acked-by: Boris Brezillon <boris.brezillon@collabora.com> # MIT+GPL2 relicensing,Collabora
+Reviewed-by: Steven Price <steven.price@arm.com>
+Acked-by: Maxime Ripard <mripard@kernel.org>
+Acked-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-8-boris.brezillon@collabora.com
+---
+ drivers/gpu/drm/panthor/panthor_mmu.c | 2768 +++++++++++++++++++++++++
+ drivers/gpu/drm/panthor/panthor_mmu.h |  102 +
+ 2 files changed, 2870 insertions(+)
+ create mode 100644 drivers/gpu/drm/panthor/panthor_mmu.c
+ create mode 100644 drivers/gpu/drm/panthor/panthor_mmu.h
+
+--- /dev/null
++++ b/drivers/gpu/drm/panthor/panthor_mmu.c
+@@ -0,0 +1,2768 @@
++// SPDX-License-Identifier: GPL-2.0 or MIT
++/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
++/* Copyright 2023 Collabora ltd. */
++
++#include <drm/drm_debugfs.h>
++#include <drm/drm_drv.h>
++#include <drm/drm_exec.h>
++#include <drm/drm_gpuvm.h>
++#include <drm/drm_managed.h>
++#include <drm/gpu_scheduler.h>
++#include <drm/panthor_drm.h>
++
++#include <linux/atomic.h>
++#include <linux/bitfield.h>
++#include <linux/delay.h>
++#include <linux/dma-mapping.h>
++#include <linux/interrupt.h>
++#include <linux/io.h>
++#include <linux/iopoll.h>
++#include <linux/io-pgtable.h>
++#include <linux/iommu.h>
++#include <linux/kmemleak.h>
++#include <linux/platform_device.h>
++#include <linux/pm_runtime.h>
++#include <linux/rwsem.h>
++#include <linux/sched.h>
++#include <linux/shmem_fs.h>
++#include <linux/sizes.h>
++
++#include "panthor_device.h"
++#include "panthor_gem.h"
++#include "panthor_heap.h"
++#include "panthor_mmu.h"
++#include "panthor_regs.h"
++#include "panthor_sched.h"
++
++#define MAX_AS_SLOTS			32
++
++struct panthor_vm;
++
++/**
++ * struct panthor_as_slot - Address space slot
++ */
++struct panthor_as_slot {
++	/** @vm: VM bound to this slot. NULL is no VM is bound. */
++	struct panthor_vm *vm;
++};
++
++/**
++ * struct panthor_mmu - MMU related data
++ */
++struct panthor_mmu {
++	/** @irq: The MMU irq. */
++	struct panthor_irq irq;
++
++	/** @as: Address space related fields.
++	 *
++	 * The GPU has a limited number of address spaces (AS) slots, forcing
++	 * us to re-assign them to re-assign slots on-demand.
++	 */
++	struct {
++		/** @slots_lock: Lock protecting access to all other AS fields. */
++		struct mutex slots_lock;
++
++		/** @alloc_mask: Bitmask encoding the allocated slots. */
++		unsigned long alloc_mask;
++
++		/** @faulty_mask: Bitmask encoding the faulty slots. */
++		unsigned long faulty_mask;
++
++		/** @slots: VMs currently bound to the AS slots. */
++		struct panthor_as_slot slots[MAX_AS_SLOTS];
++
++		/**
++		 * @lru_list: List of least recently used VMs.
++		 *
++		 * We use this list to pick a VM to evict when all slots are
++		 * used.
++		 *
++		 * There should be no more active VMs than there are AS slots,
++		 * so this LRU is just here to keep VMs bound until there's
++		 * a need to release a slot, thus avoid unnecessary TLB/cache
++		 * flushes.
++		 */
++		struct list_head lru_list;
++	} as;
++
++	/** @vm: VMs management fields */
++	struct {
++		/** @lock: Lock protecting access to list. */
++		struct mutex lock;
++
++		/** @list: List containing all VMs. */
++		struct list_head list;
++
++		/** @reset_in_progress: True if a reset is in progress. */
++		bool reset_in_progress;
++
++		/** @wq: Workqueue used for the VM_BIND queues. */
++		struct workqueue_struct *wq;
++	} vm;
++};
++
++/**
++ * struct panthor_vm_pool - VM pool object
++ */
++struct panthor_vm_pool {
++	/** @xa: Array used for VM handle tracking. */
++	struct xarray xa;
++};
++
++/**
++ * struct panthor_vma - GPU mapping object
++ *
++ * This is used to track GEM mappings in GPU space.
++ */
++struct panthor_vma {
++	/** @base: Inherits from drm_gpuva. */
++	struct drm_gpuva base;
++
++	/** @node: Used to implement deferred release of VMAs. */
++	struct list_head node;
++
++	/**
++	 * @flags: Combination of drm_panthor_vm_bind_op_flags.
++	 *
++	 * Only map related flags are accepted.
++	 */
++	u32 flags;
++};
++
++/**
++ * struct panthor_vm_op_ctx - VM operation context
++ *
++ * With VM operations potentially taking place in a dma-signaling path, we
++ * need to make sure everything that might require resource allocation is
++ * pre-allocated upfront. This is what this operation context is far.
++ *
++ * We also collect resources that have been freed, so we can release them
++ * asynchronously, and let the VM_BIND scheduler process the next VM_BIND
++ * request.
++ */
++struct panthor_vm_op_ctx {
++	/** @rsvd_page_tables: Pages reserved for the MMU page table update. */
++	struct {
++		/** @count: Number of pages reserved. */
++		u32 count;
++
++		/** @ptr: Point to the first unused page in the @pages table. */
++		u32 ptr;
++
++		/**
++		 * @page: Array of pages that can be used for an MMU page table update.
++		 *
++		 * After an VM operation, there might be free pages left in this array.
++		 * They should be returned to the pt_cache as part of the op_ctx cleanup.
++		 */
++		void **pages;
++	} rsvd_page_tables;
++
++	/**
++	 * @preallocated_vmas: Pre-allocated VMAs to handle the remap case.
++	 *
++	 * Partial unmap requests or map requests overlapping existing mappings will
++	 * trigger a remap call, which need to register up to three panthor_vma objects
++	 * (one for the new mapping, and two for the previous and next mappings).
++	 */
++	struct panthor_vma *preallocated_vmas[3];
++
++	/** @flags: Combination of drm_panthor_vm_bind_op_flags. */
++	u32 flags;
++
++	/** @va: Virtual range targeted by the VM operation. */
++	struct {
++		/** @addr: Start address. */
++		u64 addr;
++
++		/** @range: Range size. */
++		u64 range;
++	} va;
++
++	/**
++	 * @returned_vmas: List of panthor_vma objects returned after a VM operation.
++	 *
++	 * For unmap operations, this will contain all VMAs that were covered by the
++	 * specified VA range.
++	 *
++	 * For map operations, this will contain all VMAs that previously mapped to
++	 * the specified VA range.
++	 *
++	 * Those VMAs, and the resources they point to will be released as part of
++	 * the op_ctx cleanup operation.
++	 */
++	struct list_head returned_vmas;
++
++	/** @map: Fields specific to a map operation. */
++	struct {
++		/** @vm_bo: Buffer object to map. */
++		struct drm_gpuvm_bo *vm_bo;
++
++		/** @bo_offset: Offset in the buffer object. */
++		u64 bo_offset;
++
++		/**
++		 * @sgt: sg-table pointing to pages backing the GEM object.
++		 *
++		 * This is gathered at job creation time, such that we don't have
++		 * to allocate in ::run_job().
++		 */
++		struct sg_table *sgt;
++
++		/**
++		 * @new_vma: The new VMA object that will be inserted to the VA tree.
++		 */
++		struct panthor_vma *new_vma;
++	} map;
++};
++
++/**
++ * struct panthor_vm - VM object
++ *
++ * A VM is an object representing a GPU (or MCU) virtual address space.
++ * It embeds the MMU page table for this address space, a tree containing
++ * all the virtual mappings of GEM objects, and other things needed to manage
++ * the VM.
++ *
++ * Except for the MCU VM, which is managed by the kernel, all other VMs are
++ * created by userspace and mostly managed by userspace, using the
++ * %DRM_IOCTL_PANTHOR_VM_BIND ioctl.
++ *
++ * A portion of the virtual address space is reserved for kernel objects,
++ * like heap chunks, and userspace gets to decide how much of the virtual
++ * address space is left to the kernel (half of the virtual address space
++ * by default).
++ */
++struct panthor_vm {
++	/**
++	 * @base: Inherit from drm_gpuvm.
++	 *
++	 * We delegate all the VA management to the common drm_gpuvm framework
++	 * and only implement hooks to update the MMU page table.
++	 */
++	struct drm_gpuvm base;
++
++	/**
++	 * @sched: Scheduler used for asynchronous VM_BIND request.
++	 *
++	 * We use a 1:1 scheduler here.
++	 */
++	struct drm_gpu_scheduler sched;
++
++	/**
++	 * @entity: Scheduling entity representing the VM_BIND queue.
++	 *
++	 * There's currently one bind queue per VM. It doesn't make sense to
++	 * allow more given the VM operations are serialized anyway.
++	 */
++	struct drm_sched_entity entity;
++
++	/** @ptdev: Device. */
++	struct panthor_device *ptdev;
++
++	/** @memattr: Value to program to the AS_MEMATTR register. */
++	u64 memattr;
++
++	/** @pgtbl_ops: Page table operations. */
++	struct io_pgtable_ops *pgtbl_ops;
++
++	/** @root_page_table: Stores the root page table pointer. */
++	void *root_page_table;
++
++	/**
++	 * @op_lock: Lock used to serialize operations on a VM.
++	 *
++	 * The serialization of jobs queued to the VM_BIND queue is already
++	 * taken care of by drm_sched, but we need to serialize synchronous
++	 * and asynchronous VM_BIND request. This is what this lock is for.
++	 */
++	struct mutex op_lock;
++
++	/**
++	 * @op_ctx: The context attached to the currently executing VM operation.
++	 *
++	 * NULL when no operation is in progress.
++	 */
++	struct panthor_vm_op_ctx *op_ctx;
++
++	/**
++	 * @mm: Memory management object representing the auto-VA/kernel-VA.
++	 *
++	 * Used to auto-allocate VA space for kernel-managed objects (tiler
++	 * heaps, ...).
++	 *
++	 * For the MCU VM, this is managing the VA range that's used to map
++	 * all shared interfaces.
++	 *
++	 * For user VMs, the range is specified by userspace, and must not
++	 * exceed half of the VA space addressable.
++	 */
++	struct drm_mm mm;
++
++	/** @mm_lock: Lock protecting the @mm field. */
++	struct mutex mm_lock;
++
++	/** @kernel_auto_va: Automatic VA-range for kernel BOs. */
++	struct {
++		/** @start: Start of the automatic VA-range for kernel BOs. */
++		u64 start;
++
++		/** @size: Size of the automatic VA-range for kernel BOs. */
++		u64 end;
++	} kernel_auto_va;
++
++	/** @as: Address space related fields. */
++	struct {
++		/**
++		 * @id: ID of the address space this VM is bound to.
++		 *
++		 * A value of -1 means the VM is inactive/not bound.
++		 */
++		int id;
++
++		/** @active_cnt: Number of active users of this VM. */
++		refcount_t active_cnt;
++
++		/**
++		 * @lru_node: Used to instead the VM in the panthor_mmu::as::lru_list.
++		 *
++		 * Active VMs should not be inserted in the LRU list.
++		 */
++		struct list_head lru_node;
++	} as;
++
++	/**
++	 * @heaps: Tiler heap related fields.
++	 */
++	struct {
++		/**
++		 * @pool: The heap pool attached to this VM.
++		 *
++		 * Will stay NULL until someone creates a heap context on this VM.
++		 */
++		struct panthor_heap_pool *pool;
++
++		/** @lock: Lock used to protect access to @pool. */
++		struct mutex lock;
++	} heaps;
++
++	/** @node: Used to insert the VM in the panthor_mmu::vm::list. */
++	struct list_head node;
++
++	/** @for_mcu: True if this is the MCU VM. */
++	bool for_mcu;
++
++	/**
++	 * @destroyed: True if the VM was destroyed.
++	 *
++	 * No further bind requests should be queued to a destroyed VM.
++	 */
++	bool destroyed;
++
++	/**
++	 * @unusable: True if the VM has turned unusable because something
++	 * bad happened during an asynchronous request.
++	 *
++	 * We don't try to recover from such failures, because this implies
++	 * informing userspace about the specific operation that failed, and
++	 * hoping the userspace driver can replay things from there. This all
++	 * sounds very complicated for little gain.
++	 *
++	 * Instead, we should just flag the VM as unusable, and fail any
++	 * further request targeting this VM.
++	 *
++	 * We also provide a way to query a VM state, so userspace can destroy
++	 * it and create a new one.
++	 *
++	 * As an analogy, this would be mapped to a VK_ERROR_DEVICE_LOST
++	 * situation, where the logical device needs to be re-created.
++	 */
++	bool unusable;
++
++	/**
++	 * @unhandled_fault: Unhandled fault happened.
++	 *
++	 * This should be reported to the scheduler, and the queue/group be
++	 * flagged as faulty as a result.
++	 */
++	bool unhandled_fault;
++};
++
++/**
++ * struct panthor_vm_bind_job - VM bind job
++ */
++struct panthor_vm_bind_job {
++	/** @base: Inherit from drm_sched_job. */
++	struct drm_sched_job base;
++
++	/** @refcount: Reference count. */
++	struct kref refcount;
++
++	/** @cleanup_op_ctx_work: Work used to cleanup the VM operation context. */
++	struct work_struct cleanup_op_ctx_work;
++
++	/** @vm: VM targeted by the VM operation. */
++	struct panthor_vm *vm;
++
++	/** @ctx: Operation context. */
++	struct panthor_vm_op_ctx ctx;
++};
++
++/**
++ * @pt_cache: Cache used to allocate MMU page tables.
++ *
++ * The pre-allocation pattern forces us to over-allocate to plan for
++ * the worst case scenario, and return the pages we didn't use.
++ *
++ * Having a kmem_cache allows us to speed allocations.
++ */
++static struct kmem_cache *pt_cache;
++
++/**
++ * alloc_pt() - Custom page table allocator
++ * @cookie: Cookie passed at page table allocation time.
++ * @size: Size of the page table. This size should be fixed,
++ * and determined at creation time based on the granule size.
++ * @gfp: GFP flags.
++ *
++ * We want a custom allocator so we can use a cache for page table
++ * allocations and amortize the cost of the over-reservation that's
++ * done to allow asynchronous VM operations.
++ *
++ * Return: non-NULL on success, NULL if the allocation failed for any
++ * reason.
++ */
++static void *alloc_pt(void *cookie, size_t size, gfp_t gfp)
++{
++	struct panthor_vm *vm = cookie;
++	void *page;
++
++	/* Allocation of the root page table happening during init. */
++	if (unlikely(!vm->root_page_table)) {
++		struct page *p;
++
++		drm_WARN_ON(&vm->ptdev->base, vm->op_ctx);
++		p = alloc_pages_node(dev_to_node(vm->ptdev->base.dev),
++				     gfp | __GFP_ZERO, get_order(size));
++		page = p ? page_address(p) : NULL;
++		vm->root_page_table = page;
++		return page;
++	}
++
++	/* We're not supposed to have anything bigger than 4k here, because we picked a
++	 * 4k granule size at init time.
++	 */
++	if (drm_WARN_ON(&vm->ptdev->base, size != SZ_4K))
++		return NULL;
++
++	/* We must have some op_ctx attached to the VM and it must have at least one
++	 * free page.
++	 */
++	if (drm_WARN_ON(&vm->ptdev->base, !vm->op_ctx) ||
++	    drm_WARN_ON(&vm->ptdev->base,
++			vm->op_ctx->rsvd_page_tables.ptr >= vm->op_ctx->rsvd_page_tables.count))
++		return NULL;
++
++	page = vm->op_ctx->rsvd_page_tables.pages[vm->op_ctx->rsvd_page_tables.ptr++];
++	memset(page, 0, SZ_4K);
++
++	/* Page table entries don't use virtual addresses, which trips out
++	 * kmemleak. kmemleak_alloc_phys() might work, but physical addresses
++	 * are mixed with other fields, and I fear kmemleak won't detect that
++	 * either.
++	 *
++	 * Let's just ignore memory passed to the page-table driver for now.
++	 */
++	kmemleak_ignore(page);
++	return page;
++}
++
++/**
++ * @free_pt() - Custom page table free function
++ * @cookie: Cookie passed at page table allocation time.
++ * @data: Page table to free.
++ * @size: Size of the page table. This size should be fixed,
++ * and determined at creation time based on the granule size.
++ */
++static void free_pt(void *cookie, void *data, size_t size)
++{
++	struct panthor_vm *vm = cookie;
++
++	if (unlikely(vm->root_page_table == data)) {
++		free_pages((unsigned long)data, get_order(size));
++		vm->root_page_table = NULL;
++		return;
++	}
++
++	if (drm_WARN_ON(&vm->ptdev->base, size != SZ_4K))
++		return;
++
++	/* Return the page to the pt_cache. */
++	kmem_cache_free(pt_cache, data);
++}
++
++static int wait_ready(struct panthor_device *ptdev, u32 as_nr)
++{
++	int ret;
++	u32 val;
++
++	/* Wait for the MMU status to indicate there is no active command, in
++	 * case one is pending.
++	 */
++	ret = readl_relaxed_poll_timeout_atomic(ptdev->iomem + AS_STATUS(as_nr),
++						val, !(val & AS_STATUS_AS_ACTIVE),
++						10, 100000);
++
++	if (ret) {
++		panthor_device_schedule_reset(ptdev);
++		drm_err(&ptdev->base, "AS_ACTIVE bit stuck\n");
++	}
++
++	return ret;
++}
++
++static int write_cmd(struct panthor_device *ptdev, u32 as_nr, u32 cmd)
++{
++	int status;
++
++	/* write AS_COMMAND when MMU is ready to accept another command */
++	status = wait_ready(ptdev, as_nr);
++	if (!status)
++		gpu_write(ptdev, AS_COMMAND(as_nr), cmd);
++
++	return status;
++}
++
++static void lock_region(struct panthor_device *ptdev, u32 as_nr,
++			u64 region_start, u64 size)
++{
++	u8 region_width;
++	u64 region;
++	u64 region_end = region_start + size;
++
++	if (!size)
++		return;
++
++	/*
++	 * The locked region is a naturally aligned power of 2 block encoded as
++	 * log2 minus(1).
++	 * Calculate the desired start/end and look for the highest bit which
++	 * differs. The smallest naturally aligned block must include this bit
++	 * change, the desired region starts with this bit (and subsequent bits)
++	 * zeroed and ends with the bit (and subsequent bits) set to one.
++	 */
++	region_width = max(fls64(region_start ^ (region_end - 1)),
++			   const_ilog2(AS_LOCK_REGION_MIN_SIZE)) - 1;
++
++	/*
++	 * Mask off the low bits of region_start (which would be ignored by
++	 * the hardware anyway)
++	 */
++	region_start &= GENMASK_ULL(63, region_width);
++
++	region = region_width | region_start;
++
++	/* Lock the region that needs to be updated */
++	gpu_write(ptdev, AS_LOCKADDR_LO(as_nr), lower_32_bits(region));
++	gpu_write(ptdev, AS_LOCKADDR_HI(as_nr), upper_32_bits(region));
++	write_cmd(ptdev, as_nr, AS_COMMAND_LOCK);
++}
++
++static int mmu_hw_do_operation_locked(struct panthor_device *ptdev, int as_nr,
++				      u64 iova, u64 size, u32 op)
++{
++	lockdep_assert_held(&ptdev->mmu->as.slots_lock);
++
++	if (as_nr < 0)
++		return 0;
++
++	if (op != AS_COMMAND_UNLOCK)
++		lock_region(ptdev, as_nr, iova, size);
++
++	/* Run the MMU operation */
++	write_cmd(ptdev, as_nr, op);
++
++	/* Wait for the flush to complete */
++	return wait_ready(ptdev, as_nr);
++}
++
++static int mmu_hw_do_operation(struct panthor_vm *vm,
++			       u64 iova, u64 size, u32 op)
++{
++	struct panthor_device *ptdev = vm->ptdev;
++	int ret;
++
++	mutex_lock(&ptdev->mmu->as.slots_lock);
++	ret = mmu_hw_do_operation_locked(ptdev, vm->as.id, iova, size, op);
++	mutex_unlock(&ptdev->mmu->as.slots_lock);
++
++	return ret;
++}
++
++static int panthor_mmu_as_enable(struct panthor_device *ptdev, u32 as_nr,
++				 u64 transtab, u64 transcfg, u64 memattr)
++{
++	int ret;
++
++	ret = mmu_hw_do_operation_locked(ptdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM);
++	if (ret)
++		return ret;
++
++	gpu_write(ptdev, AS_TRANSTAB_LO(as_nr), lower_32_bits(transtab));
++	gpu_write(ptdev, AS_TRANSTAB_HI(as_nr), upper_32_bits(transtab));
++
++	gpu_write(ptdev, AS_MEMATTR_LO(as_nr), lower_32_bits(memattr));
++	gpu_write(ptdev, AS_MEMATTR_HI(as_nr), upper_32_bits(memattr));
++
++	gpu_write(ptdev, AS_TRANSCFG_LO(as_nr), lower_32_bits(transcfg));
++	gpu_write(ptdev, AS_TRANSCFG_HI(as_nr), upper_32_bits(transcfg));
++
++	return write_cmd(ptdev, as_nr, AS_COMMAND_UPDATE);
++}
++
++static int panthor_mmu_as_disable(struct panthor_device *ptdev, u32 as_nr)
++{
++	int ret;
++
++	ret = mmu_hw_do_operation_locked(ptdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM);
++	if (ret)
++		return ret;
++
++	gpu_write(ptdev, AS_TRANSTAB_LO(as_nr), 0);
++	gpu_write(ptdev, AS_TRANSTAB_HI(as_nr), 0);
++
++	gpu_write(ptdev, AS_MEMATTR_LO(as_nr), 0);
++	gpu_write(ptdev, AS_MEMATTR_HI(as_nr), 0);
++
++	gpu_write(ptdev, AS_TRANSCFG_LO(as_nr), AS_TRANSCFG_ADRMODE_UNMAPPED);
++	gpu_write(ptdev, AS_TRANSCFG_HI(as_nr), 0);
++
++	return write_cmd(ptdev, as_nr, AS_COMMAND_UPDATE);
++}
++
++static u32 panthor_mmu_fault_mask(struct panthor_device *ptdev, u32 value)
++{
++	/* Bits 16 to 31 mean REQ_COMPLETE. */
++	return value & GENMASK(15, 0);
++}
++
++static u32 panthor_mmu_as_fault_mask(struct panthor_device *ptdev, u32 as)
++{
++	return BIT(as);
++}
++
++/**
++ * panthor_vm_has_unhandled_faults() - Check if a VM has unhandled faults
++ * @vm: VM to check.
++ *
++ * Return: true if the VM has unhandled faults, false otherwise.
++ */
++bool panthor_vm_has_unhandled_faults(struct panthor_vm *vm)
++{
++	return vm->unhandled_fault;
++}
++
++/**
++ * panthor_vm_is_unusable() - Check if the VM is still usable
++ * @vm: VM to check.
++ *
++ * Return: true if the VM is unusable, false otherwise.
++ */
++bool panthor_vm_is_unusable(struct panthor_vm *vm)
++{
++	return vm->unusable;
++}
++
++static void panthor_vm_release_as_locked(struct panthor_vm *vm)
++{
++	struct panthor_device *ptdev = vm->ptdev;
++
++	lockdep_assert_held(&ptdev->mmu->as.slots_lock);
++
++	if (drm_WARN_ON(&ptdev->base, vm->as.id < 0))
++		return;
++
++	ptdev->mmu->as.slots[vm->as.id].vm = NULL;
++	clear_bit(vm->as.id, &ptdev->mmu->as.alloc_mask);
++	refcount_set(&vm->as.active_cnt, 0);
++	list_del_init(&vm->as.lru_node);
++	vm->as.id = -1;
++}
++
++/**
++ * panthor_vm_active() - Flag a VM as active
++ * @VM: VM to flag as active.
++ *
++ * Assigns an address space to a VM so it can be used by the GPU/MCU.
++ *
++ * Return: 0 on success, a negative error code otherwise.
++ */
++int panthor_vm_active(struct panthor_vm *vm)
++{
++	struct panthor_device *ptdev = vm->ptdev;
++	u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features);
++	struct io_pgtable_cfg *cfg = &io_pgtable_ops_to_pgtable(vm->pgtbl_ops)->cfg;
++	int ret = 0, as, cookie;
++	u64 transtab, transcfg;
++
++	if (!drm_dev_enter(&ptdev->base, &cookie))
++		return -ENODEV;
++
++	if (refcount_inc_not_zero(&vm->as.active_cnt))
++		goto out_dev_exit;
++
++	mutex_lock(&ptdev->mmu->as.slots_lock);
++
++	if (refcount_inc_not_zero(&vm->as.active_cnt))
++		goto out_unlock;
++
++	as = vm->as.id;
++	if (as >= 0) {
++		/* Unhandled pagefault on this AS, the MMU was disabled. We need to
++		 * re-enable the MMU after clearing+unmasking the AS interrupts.
++		 */
++		if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as))
++			goto out_enable_as;
++
++		goto out_make_active;
++	}
++
++	/* Check for a free AS */
++	if (vm->for_mcu) {
++		drm_WARN_ON(&ptdev->base, ptdev->mmu->as.alloc_mask & BIT(0));
++		as = 0;
++	} else {
++		as = ffz(ptdev->mmu->as.alloc_mask | BIT(0));
++	}
++
++	if (!(BIT(as) & ptdev->gpu_info.as_present)) {
++		struct panthor_vm *lru_vm;
++
++		lru_vm = list_first_entry_or_null(&ptdev->mmu->as.lru_list,
++						  struct panthor_vm,
++						  as.lru_node);
++		if (drm_WARN_ON(&ptdev->base, !lru_vm)) {
++			ret = -EBUSY;
++			goto out_unlock;
++		}
++
++		drm_WARN_ON(&ptdev->base, refcount_read(&lru_vm->as.active_cnt));
++		as = lru_vm->as.id;
++		panthor_vm_release_as_locked(lru_vm);
++	}
++
++	/* Assign the free or reclaimed AS to the FD */
++	vm->as.id = as;
++	set_bit(as, &ptdev->mmu->as.alloc_mask);
++	ptdev->mmu->as.slots[as].vm = vm;
++
++out_enable_as:
++	transtab = cfg->arm_lpae_s1_cfg.ttbr;
++	transcfg = AS_TRANSCFG_PTW_MEMATTR_WB |
++		   AS_TRANSCFG_PTW_RA |
++		   AS_TRANSCFG_ADRMODE_AARCH64_4K |
++		   AS_TRANSCFG_INA_BITS(55 - va_bits);
++	if (ptdev->coherent)
++		transcfg |= AS_TRANSCFG_PTW_SH_OS;
++
++	/* If the VM is re-activated, we clear the fault. */
++	vm->unhandled_fault = false;
++
++	/* Unhandled pagefault on this AS, clear the fault and re-enable interrupts
++	 * before enabling the AS.
++	 */
++	if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as)) {
++		gpu_write(ptdev, MMU_INT_CLEAR, panthor_mmu_as_fault_mask(ptdev, as));
++		ptdev->mmu->as.faulty_mask &= ~panthor_mmu_as_fault_mask(ptdev, as);
++		gpu_write(ptdev, MMU_INT_MASK, ~ptdev->mmu->as.faulty_mask);
++	}
++
++	ret = panthor_mmu_as_enable(vm->ptdev, vm->as.id, transtab, transcfg, vm->memattr);
++
++out_make_active:
++	if (!ret) {
++		refcount_set(&vm->as.active_cnt, 1);
++		list_del_init(&vm->as.lru_node);
++	}
++
++out_unlock:
++	mutex_unlock(&ptdev->mmu->as.slots_lock);
++
++out_dev_exit:
++	drm_dev_exit(cookie);
++	return ret;
++}
++
++/**
++ * panthor_vm_idle() - Flag a VM idle
++ * @VM: VM to flag as idle.
++ *
++ * When we know the GPU is done with the VM (no more jobs to process),
++ * we can relinquish the AS slot attached to this VM, if any.
++ *
++ * We don't release the slot immediately, but instead place the VM in
++ * the LRU list, so it can be evicted if another VM needs an AS slot.
++ * This way, VMs keep attached to the AS they were given until we run
++ * out of free slot, limiting the number of MMU operations (TLB flush
++ * and other AS updates).
++ */
++void panthor_vm_idle(struct panthor_vm *vm)
++{
++	struct panthor_device *ptdev = vm->ptdev;
++
++	if (!refcount_dec_and_mutex_lock(&vm->as.active_cnt, &ptdev->mmu->as.slots_lock))
++		return;
++
++	if (!drm_WARN_ON(&ptdev->base, vm->as.id == -1 || !list_empty(&vm->as.lru_node)))
++		list_add_tail(&vm->as.lru_node, &ptdev->mmu->as.lru_list);
++
++	refcount_set(&vm->as.active_cnt, 0);
++	mutex_unlock(&ptdev->mmu->as.slots_lock);
++}
++
++static void panthor_vm_stop(struct panthor_vm *vm)
++{
++	drm_sched_stop(&vm->sched, NULL);
++}
++
++static void panthor_vm_start(struct panthor_vm *vm)
++{
++	drm_sched_start(&vm->sched, true);
++}
++
++/**
++ * panthor_vm_as() - Get the AS slot attached to a VM
++ * @vm: VM to get the AS slot of.
++ *
++ * Return: -1 if the VM is not assigned an AS slot yet, >= 0 otherwise.
++ */
++int panthor_vm_as(struct panthor_vm *vm)
++{
++	return vm->as.id;
++}
++
++static size_t get_pgsize(u64 addr, size_t size, size_t *count)
++{
++	/*
++	 * io-pgtable only operates on multiple pages within a single table
++	 * entry, so we need to split at boundaries of the table size, i.e.
++	 * the next block size up. The distance from address A to the next
++	 * boundary of block size B is logically B - A % B, but in unsigned
++	 * two's complement where B is a power of two we get the equivalence
++	 * B - A % B == (B - A) % B == (n * B - A) % B, and choose n = 0 :)
++	 */
++	size_t blk_offset = -addr % SZ_2M;
++
++	if (blk_offset || size < SZ_2M) {
++		*count = min_not_zero(blk_offset, size) / SZ_4K;
++		return SZ_4K;
++	}
++	blk_offset = -addr % SZ_1G ?: SZ_1G;
++	*count = min(blk_offset, size) / SZ_2M;
++	return SZ_2M;
++}
++
++static int panthor_vm_flush_range(struct panthor_vm *vm, u64 iova, u64 size)
++{
++	struct panthor_device *ptdev = vm->ptdev;
++	int ret = 0, cookie;
++
++	if (vm->as.id < 0)
++		return 0;
++
++	/* If the device is unplugged, we just silently skip the flush. */
++	if (!drm_dev_enter(&ptdev->base, &cookie))
++		return 0;
++
++	/* Flush the PTs only if we're already awake */
++	if (pm_runtime_active(ptdev->base.dev))
++		ret = mmu_hw_do_operation(vm, iova, size, AS_COMMAND_FLUSH_PT);
++
++	drm_dev_exit(cookie);
++	return ret;
++}
++
++static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size)
++{
++	struct panthor_device *ptdev = vm->ptdev;
++	struct io_pgtable_ops *ops = vm->pgtbl_ops;
++	u64 offset = 0;
++
++	drm_dbg(&ptdev->base, "unmap: as=%d, iova=%llx, len=%llx", vm->as.id, iova, size);
++
++	while (offset < size) {
++		size_t unmapped_sz = 0, pgcount;
++		size_t pgsize = get_pgsize(iova + offset, size - offset, &pgcount);
++
++		unmapped_sz = ops->unmap_pages(ops, iova + offset, pgsize, pgcount, NULL);
++
++		if (drm_WARN_ON(&ptdev->base, unmapped_sz != pgsize * pgcount)) {
++			drm_err(&ptdev->base, "failed to unmap range %llx-%llx (requested range %llx-%llx)\n",
++				iova + offset + unmapped_sz,
++				iova + offset + pgsize * pgcount,
++				iova, iova + size);
++			panthor_vm_flush_range(vm, iova, offset + unmapped_sz);
++			return  -EINVAL;
++		}
++		offset += unmapped_sz;
++	}
++
++	return panthor_vm_flush_range(vm, iova, size);
++}
++
++static int
++panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot,
++		     struct sg_table *sgt, u64 offset, u64 size)
++{
++	struct panthor_device *ptdev = vm->ptdev;
++	unsigned int count;
++	struct scatterlist *sgl;
++	struct io_pgtable_ops *ops = vm->pgtbl_ops;
++	u64 start_iova = iova;
++	int ret;
++
++	if (!size)
++		return 0;
++
++	for_each_sgtable_dma_sg(sgt, sgl, count) {
++		dma_addr_t paddr = sg_dma_address(sgl);
++		size_t len = sg_dma_len(sgl);
++
++		if (len <= offset) {
++			offset -= len;
++			continue;
++		}
++
++		paddr += offset;
++		len -= offset;
++		len = min_t(size_t, len, size);
++		size -= len;
++
++		drm_dbg(&ptdev->base, "map: as=%d, iova=%llx, paddr=%pad, len=%zx",
++			vm->as.id, iova, &paddr, len);
++
++		while (len) {
++			size_t pgcount, mapped = 0;
++			size_t pgsize = get_pgsize(iova | paddr, len, &pgcount);
++
++			ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot,
++					     GFP_KERNEL, &mapped);
++			iova += mapped;
++			paddr += mapped;
++			len -= mapped;
++
++			if (drm_WARN_ON(&ptdev->base, !ret && !mapped))
++				ret = -ENOMEM;
++
++			if (ret) {
++				/* If something failed, unmap what we've already mapped before
++				 * returning. The unmap call is not supposed to fail.
++				 */
++				drm_WARN_ON(&ptdev->base,
++					    panthor_vm_unmap_pages(vm, start_iova,
++								   iova - start_iova));
++				return ret;
++			}
++		}
++
++		if (!size)
++			break;
++	}
++
++	return panthor_vm_flush_range(vm, start_iova, iova - start_iova);
++}
++
++static int flags_to_prot(u32 flags)
++{
++	int prot = 0;
++
++	if (flags & DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC)
++		prot |= IOMMU_NOEXEC;
++
++	if (!(flags & DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED))
++		prot |= IOMMU_CACHE;
++
++	if (flags & DRM_PANTHOR_VM_BIND_OP_MAP_READONLY)
++		prot |= IOMMU_READ;
++	else
++		prot |= IOMMU_READ | IOMMU_WRITE;
++
++	return prot;
++}
++
++/**
++ * panthor_vm_alloc_va() - Allocate a region in the auto-va space
++ * @VM: VM to allocate a region on.
++ * @va: start of the VA range. Can be PANTHOR_VM_KERNEL_AUTO_VA if the user
++ * wants the VA to be automatically allocated from the auto-VA range.
++ * @size: size of the VA range.
++ * @va_node: drm_mm_node to initialize. Must be zero-initialized.
++ *
++ * Some GPU objects, like heap chunks, are fully managed by the kernel and
++ * need to be mapped to the userspace VM, in the region reserved for kernel
++ * objects.
++ *
++ * This function takes care of allocating a region in the kernel auto-VA space.
++ *
++ * Return: 0 on success, an error code otherwise.
++ */
++int
++panthor_vm_alloc_va(struct panthor_vm *vm, u64 va, u64 size,
++		    struct drm_mm_node *va_node)
++{
++	int ret;
++
++	if (!size || (size & ~PAGE_MASK))
++		return -EINVAL;
++
++	if (va != PANTHOR_VM_KERNEL_AUTO_VA && (va & ~PAGE_MASK))
++		return -EINVAL;
++
++	mutex_lock(&vm->mm_lock);
++	if (va != PANTHOR_VM_KERNEL_AUTO_VA) {
++		va_node->start = va;
++		va_node->size = size;
++		ret = drm_mm_reserve_node(&vm->mm, va_node);
++	} else {
++		ret = drm_mm_insert_node_in_range(&vm->mm, va_node, size,
++						  size >= SZ_2M ? SZ_2M : SZ_4K,
++						  0, vm->kernel_auto_va.start,
++						  vm->kernel_auto_va.end,
++						  DRM_MM_INSERT_BEST);
++	}
++	mutex_unlock(&vm->mm_lock);
++
++	return ret;
++}
++
++/**
++ * panthor_vm_free_va() - Free a region allocated with panthor_vm_alloc_va()
++ * @VM: VM to free the region on.
++ * @va_node: Memory node representing the region to free.
++ */
++void panthor_vm_free_va(struct panthor_vm *vm, struct drm_mm_node *va_node)
++{
++	mutex_lock(&vm->mm_lock);
++	drm_mm_remove_node(va_node);
++	mutex_unlock(&vm->mm_lock);
++}
++
++static void panthor_vm_bo_put(struct drm_gpuvm_bo *vm_bo)
++{
++	struct panthor_gem_object *bo = to_panthor_bo(vm_bo->obj);
++	struct drm_gpuvm *vm = vm_bo->vm;
++	bool unpin;
++
++	/* We must retain the GEM before calling drm_gpuvm_bo_put(),
++	 * otherwise the mutex might be destroyed while we hold it.
++	 * Same goes for the VM, since we take the VM resv lock.
++	 */
++	drm_gem_object_get(&bo->base.base);
++	drm_gpuvm_get(vm);
++
++	/* We take the resv lock to protect against concurrent accesses to the
++	 * gpuvm evicted/extobj lists that are modified in
++	 * drm_gpuvm_bo_destroy(), which is called if drm_gpuvm_bo_put()
++	 * releases sthe last vm_bo reference.
++	 * We take the BO GPUVA list lock to protect the vm_bo removal from the
++	 * GEM vm_bo list.
++	 */
++	dma_resv_lock(drm_gpuvm_resv(vm), NULL);
++	mutex_lock(&bo->gpuva_list_lock);
++	unpin = drm_gpuvm_bo_put(vm_bo);
++	mutex_unlock(&bo->gpuva_list_lock);
++	dma_resv_unlock(drm_gpuvm_resv(vm));
++
++	/* If the vm_bo object was destroyed, release the pin reference that
++	 * was hold by this object.
++	 */
++	if (unpin && !bo->base.base.import_attach)
++		drm_gem_shmem_unpin(&bo->base);
++
++	drm_gpuvm_put(vm);
++	drm_gem_object_put(&bo->base.base);
++}
++
++static void panthor_vm_cleanup_op_ctx(struct panthor_vm_op_ctx *op_ctx,
++				      struct panthor_vm *vm)
++{
++	struct panthor_vma *vma, *tmp_vma;
++
++	u32 remaining_pt_count = op_ctx->rsvd_page_tables.count -
++				 op_ctx->rsvd_page_tables.ptr;
++
++	if (remaining_pt_count) {
++		kmem_cache_free_bulk(pt_cache, remaining_pt_count,
++				     op_ctx->rsvd_page_tables.pages +
++				     op_ctx->rsvd_page_tables.ptr);
++	}
++
++	kfree(op_ctx->rsvd_page_tables.pages);
++
++	if (op_ctx->map.vm_bo)
++		panthor_vm_bo_put(op_ctx->map.vm_bo);
++
++	for (u32 i = 0; i < ARRAY_SIZE(op_ctx->preallocated_vmas); i++)
++		kfree(op_ctx->preallocated_vmas[i]);
++
++	list_for_each_entry_safe(vma, tmp_vma, &op_ctx->returned_vmas, node) {
++		list_del(&vma->node);
++		panthor_vm_bo_put(vma->base.vm_bo);
++		kfree(vma);
++	}
++}
++
++static struct panthor_vma *
++panthor_vm_op_ctx_get_vma(struct panthor_vm_op_ctx *op_ctx)
++{
++	for (u32 i = 0; i < ARRAY_SIZE(op_ctx->preallocated_vmas); i++) {
++		struct panthor_vma *vma = op_ctx->preallocated_vmas[i];
++
++		if (vma) {
++			op_ctx->preallocated_vmas[i] = NULL;
++			return vma;
++		}
++	}
++
++	return NULL;
++}
++
++static int
++panthor_vm_op_ctx_prealloc_vmas(struct panthor_vm_op_ctx *op_ctx)
++{
++	u32 vma_count;
++
++	switch (op_ctx->flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) {
++	case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP:
++		/* One VMA for the new mapping, and two more VMAs for the remap case
++		 * which might contain both a prev and next VA.
++		 */
++		vma_count = 3;
++		break;
++
++	case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP:
++		/* Partial unmaps might trigger a remap with either a prev or a next VA,
++		 * but not both.
++		 */
++		vma_count = 1;
++		break;
++
++	default:
++		return 0;
++	}
++
++	for (u32 i = 0; i < vma_count; i++) {
++		struct panthor_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL);
++
++		if (!vma)
++			return -ENOMEM;
++
++		op_ctx->preallocated_vmas[i] = vma;
++	}
++
++	return 0;
++}
++
++#define PANTHOR_VM_BIND_OP_MAP_FLAGS \
++	(DRM_PANTHOR_VM_BIND_OP_MAP_READONLY | \
++	 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | \
++	 DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED | \
++	 DRM_PANTHOR_VM_BIND_OP_TYPE_MASK)
++
++static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx,
++					 struct panthor_vm *vm,
++					 struct panthor_gem_object *bo,
++					 u64 offset,
++					 u64 size, u64 va,
++					 u32 flags)
++{
++	struct drm_gpuvm_bo *preallocated_vm_bo;
++	struct sg_table *sgt = NULL;
++	u64 pt_count;
++	int ret;
++
++	if (!bo)
++		return -EINVAL;
++
++	if ((flags & ~PANTHOR_VM_BIND_OP_MAP_FLAGS) ||
++	    (flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) != DRM_PANTHOR_VM_BIND_OP_TYPE_MAP)
++		return -EINVAL;
++
++	/* Make sure the VA and size are aligned and in-bounds. */
++	if (size > bo->base.base.size || offset > bo->base.base.size - size)
++		return -EINVAL;
++
++	/* If the BO has an exclusive VM attached, it can't be mapped to other VMs. */
++	if (bo->exclusive_vm_root_gem &&
++	    bo->exclusive_vm_root_gem != panthor_vm_root_gem(vm))
++		return -EINVAL;
++
++	memset(op_ctx, 0, sizeof(*op_ctx));
++	INIT_LIST_HEAD(&op_ctx->returned_vmas);
++	op_ctx->flags = flags;
++	op_ctx->va.range = size;
++	op_ctx->va.addr = va;
++
++	ret = panthor_vm_op_ctx_prealloc_vmas(op_ctx);
++	if (ret)
++		goto err_cleanup;
++
++	if (!bo->base.base.import_attach) {
++		/* Pre-reserve the BO pages, so the map operation doesn't have to
++		 * allocate.
++		 */
++		ret = drm_gem_shmem_pin(&bo->base);
++		if (ret)
++			goto err_cleanup;
++	}
++
++	sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
++	if (IS_ERR(sgt)) {
++		if (!bo->base.base.import_attach)
++			drm_gem_shmem_unpin(&bo->base);
++
++		ret = PTR_ERR(sgt);
++		goto err_cleanup;
++	}
++
++	op_ctx->map.sgt = sgt;
++
++	preallocated_vm_bo = drm_gpuvm_bo_create(&vm->base, &bo->base.base);
++	if (!preallocated_vm_bo) {
++		if (!bo->base.base.import_attach)
++			drm_gem_shmem_unpin(&bo->base);
++
++		ret = -ENOMEM;
++		goto err_cleanup;
++	}
++
++	mutex_lock(&bo->gpuva_list_lock);
++	op_ctx->map.vm_bo = drm_gpuvm_bo_obtain_prealloc(preallocated_vm_bo);
++	mutex_unlock(&bo->gpuva_list_lock);
++
++	/* If the a vm_bo for this <VM,BO> combination exists, it already
++	 * retains a pin ref, and we can release the one we took earlier.
++	 *
++	 * If our pre-allocated vm_bo is picked, it now retains the pin ref,
++	 * which will be released in panthor_vm_bo_put().
++	 */
++	if (preallocated_vm_bo != op_ctx->map.vm_bo &&
++	    !bo->base.base.import_attach)
++		drm_gem_shmem_unpin(&bo->base);
++
++	op_ctx->map.bo_offset = offset;
++
++	/* L1, L2 and L3 page tables.
++	 * We could optimize L3 allocation by iterating over the sgt and merging
++	 * 2M contiguous blocks, but it's simpler to over-provision and return
++	 * the pages if they're not used.
++	 */
++	pt_count = ((ALIGN(va + size, 1ull << 39) - ALIGN_DOWN(va, 1ull << 39)) >> 39) +
++		   ((ALIGN(va + size, 1ull << 30) - ALIGN_DOWN(va, 1ull << 30)) >> 30) +
++		   ((ALIGN(va + size, 1ull << 21) - ALIGN_DOWN(va, 1ull << 21)) >> 21);
++
++	op_ctx->rsvd_page_tables.pages = kcalloc(pt_count,
++						 sizeof(*op_ctx->rsvd_page_tables.pages),
++						 GFP_KERNEL);
++	if (!op_ctx->rsvd_page_tables.pages)
++		goto err_cleanup;
++
++	ret = kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, pt_count,
++				    op_ctx->rsvd_page_tables.pages);
++	op_ctx->rsvd_page_tables.count = ret;
++	if (ret != pt_count) {
++		ret = -ENOMEM;
++		goto err_cleanup;
++	}
++
++	/* Insert BO into the extobj list last, when we know nothing can fail. */
++	dma_resv_lock(panthor_vm_resv(vm), NULL);
++	drm_gpuvm_bo_extobj_add(op_ctx->map.vm_bo);
++	dma_resv_unlock(panthor_vm_resv(vm));
++
++	return 0;
++
++err_cleanup:
++	panthor_vm_cleanup_op_ctx(op_ctx, vm);
++	return ret;
++}
++
++static int panthor_vm_prepare_unmap_op_ctx(struct panthor_vm_op_ctx *op_ctx,
++					   struct panthor_vm *vm,
++					   u64 va, u64 size)
++{
++	u32 pt_count = 0;
++	int ret;
++
++	memset(op_ctx, 0, sizeof(*op_ctx));
++	INIT_LIST_HEAD(&op_ctx->returned_vmas);
++	op_ctx->va.range = size;
++	op_ctx->va.addr = va;
++	op_ctx->flags = DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP;
++
++	/* Pre-allocate L3 page tables to account for the split-2M-block
++	 * situation on unmap.
++	 */
++	if (va != ALIGN(va, SZ_2M))
++		pt_count++;
++
++	if (va + size != ALIGN(va + size, SZ_2M) &&
++	    ALIGN(va + size, SZ_2M) != ALIGN(va, SZ_2M))
++		pt_count++;
++
++	ret = panthor_vm_op_ctx_prealloc_vmas(op_ctx);
++	if (ret)
++		goto err_cleanup;
++
++	if (pt_count) {
++		op_ctx->rsvd_page_tables.pages = kcalloc(pt_count,
++							 sizeof(*op_ctx->rsvd_page_tables.pages),
++							 GFP_KERNEL);
++		if (!op_ctx->rsvd_page_tables.pages)
++			goto err_cleanup;
++
++		ret = kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, pt_count,
++					    op_ctx->rsvd_page_tables.pages);
++		if (ret != pt_count) {
++			ret = -ENOMEM;
++			goto err_cleanup;
++		}
++		op_ctx->rsvd_page_tables.count = pt_count;
++	}
++
++	return 0;
++
++err_cleanup:
++	panthor_vm_cleanup_op_ctx(op_ctx, vm);
++	return ret;
++}
++
++static void panthor_vm_prepare_sync_only_op_ctx(struct panthor_vm_op_ctx *op_ctx,
++						struct panthor_vm *vm)
++{
++	memset(op_ctx, 0, sizeof(*op_ctx));
++	INIT_LIST_HEAD(&op_ctx->returned_vmas);
++	op_ctx->flags = DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY;
++}
++
++/**
++ * panthor_vm_get_bo_for_va() - Get the GEM object mapped at a virtual address
++ * @vm: VM to look into.
++ * @va: Virtual address to search for.
++ * @bo_offset: Offset of the GEM object mapped at this virtual address.
++ * Only valid on success.
++ *
++ * The object returned by this function might no longer be mapped when the
++ * function returns. It's the caller responsibility to ensure there's no
++ * concurrent map/unmap operations making the returned value invalid, or
++ * make sure it doesn't matter if the object is no longer mapped.
++ *
++ * Return: A valid pointer on success, an ERR_PTR() otherwise.
++ */
++struct panthor_gem_object *
++panthor_vm_get_bo_for_va(struct panthor_vm *vm, u64 va, u64 *bo_offset)
++{
++	struct panthor_gem_object *bo = ERR_PTR(-ENOENT);
++	struct drm_gpuva *gpuva;
++	struct panthor_vma *vma;
++
++	/* Take the VM lock to prevent concurrent map/unmap operations. */
++	mutex_lock(&vm->op_lock);
++	gpuva = drm_gpuva_find_first(&vm->base, va, 1);
++	vma = gpuva ? container_of(gpuva, struct panthor_vma, base) : NULL;
++	if (vma && vma->base.gem.obj) {
++		drm_gem_object_get(vma->base.gem.obj);
++		bo = to_panthor_bo(vma->base.gem.obj);
++		*bo_offset = vma->base.gem.offset + (va - vma->base.va.addr);
++	}
++	mutex_unlock(&vm->op_lock);
++
++	return bo;
++}
++
++#define PANTHOR_VM_MIN_KERNEL_VA_SIZE	SZ_256M
++
++static u64
++panthor_vm_create_get_user_va_range(const struct drm_panthor_vm_create *args,
++				    u64 full_va_range)
++{
++	u64 user_va_range;
++
++	/* Make sure we have a minimum amount of VA space for kernel objects. */
++	if (full_va_range < PANTHOR_VM_MIN_KERNEL_VA_SIZE)
++		return 0;
++
++	if (args->user_va_range) {
++		/* Use the user provided value if != 0. */
++		user_va_range = args->user_va_range;
++	} else if (TASK_SIZE_OF(current) < full_va_range) {
++		/* If the task VM size is smaller than the GPU VA range, pick this
++		 * as our default user VA range, so userspace can CPU/GPU map buffers
++		 * at the same address.
++		 */
++		user_va_range = TASK_SIZE_OF(current);
++	} else {
++		/* If the GPU VA range is smaller than the task VM size, we
++		 * just have to live with the fact we won't be able to map
++		 * all buffers at the same GPU/CPU address.
++		 *
++		 * If the GPU VA range is bigger than 4G (more than 32-bit of
++		 * VA), we split the range in two, and assign half of it to
++		 * the user and the other half to the kernel, if it's not, we
++		 * keep the kernel VA space as small as possible.
++		 */
++		user_va_range = full_va_range > SZ_4G ?
++				full_va_range / 2 :
++				full_va_range - PANTHOR_VM_MIN_KERNEL_VA_SIZE;
++	}
++
++	if (full_va_range - PANTHOR_VM_MIN_KERNEL_VA_SIZE < user_va_range)
++		user_va_range = full_va_range - PANTHOR_VM_MIN_KERNEL_VA_SIZE;
++
++	return user_va_range;
++}
++
++#define PANTHOR_VM_CREATE_FLAGS		0
++
++static int
++panthor_vm_create_check_args(const struct panthor_device *ptdev,
++			     const struct drm_panthor_vm_create *args,
++			     u64 *kernel_va_start, u64 *kernel_va_range)
++{
++	u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features);
++	u64 full_va_range = 1ull << va_bits;
++	u64 user_va_range;
++
++	if (args->flags & ~PANTHOR_VM_CREATE_FLAGS)
++		return -EINVAL;
++
++	user_va_range = panthor_vm_create_get_user_va_range(args, full_va_range);
++	if (!user_va_range || (args->user_va_range && args->user_va_range > user_va_range))
++		return -EINVAL;
++
++	/* Pick a kernel VA range that's a power of two, to have a clear split. */
++	*kernel_va_range = rounddown_pow_of_two(full_va_range - user_va_range);
++	*kernel_va_start = full_va_range - *kernel_va_range;
++	return 0;
++}
++
++/*
++ * Only 32 VMs per open file. If that becomes a limiting factor, we can
++ * increase this number.
++ */
++#define PANTHOR_MAX_VMS_PER_FILE	32
++
++/**
++ * panthor_vm_pool_create_vm() - Create a VM
++ * @pool: The VM to create this VM on.
++ * @kernel_va_start: Start of the region reserved for kernel objects.
++ * @kernel_va_range: Size of the region reserved for kernel objects.
++ *
++ * Return: a positive VM ID on success, a negative error code otherwise.
++ */
++int panthor_vm_pool_create_vm(struct panthor_device *ptdev,
++			      struct panthor_vm_pool *pool,
++			      struct drm_panthor_vm_create *args)
++{
++	u64 kernel_va_start, kernel_va_range;
++	struct panthor_vm *vm;
++	int ret;
++	u32 id;
++
++	ret = panthor_vm_create_check_args(ptdev, args, &kernel_va_start, &kernel_va_range);
++	if (ret)
++		return ret;
++
++	vm = panthor_vm_create(ptdev, false, kernel_va_start, kernel_va_range,
++			       kernel_va_start, kernel_va_range);
++	if (IS_ERR(vm))
++		return PTR_ERR(vm);
++
++	ret = xa_alloc(&pool->xa, &id, vm,
++		       XA_LIMIT(1, PANTHOR_MAX_VMS_PER_FILE), GFP_KERNEL);
++
++	if (ret) {
++		panthor_vm_put(vm);
++		return ret;
++	}
++
++	args->user_va_range = kernel_va_start;
++	return id;
++}
++
++static void panthor_vm_destroy(struct panthor_vm *vm)
++{
++	if (!vm)
++		return;
++
++	vm->destroyed = true;
++
++	mutex_lock(&vm->heaps.lock);
++	panthor_heap_pool_destroy(vm->heaps.pool);
++	vm->heaps.pool = NULL;
++	mutex_unlock(&vm->heaps.lock);
++
++	drm_WARN_ON(&vm->ptdev->base,
++		    panthor_vm_unmap_range(vm, vm->base.mm_start, vm->base.mm_range));
++	panthor_vm_put(vm);
++}
++
++/**
++ * panthor_vm_pool_destroy_vm() - Destroy a VM.
++ * @pool: VM pool.
++ * @handle: VM handle.
++ *
++ * This function doesn't free the VM object or its resources, it just kills
++ * all mappings, and makes sure nothing can be mapped after that point.
++ *
++ * If there was any active jobs at the time this function is called, these
++ * jobs should experience page faults and be killed as a result.
++ *
++ * The VM resources are freed when the last reference on the VM object is
++ * dropped.
++ */
++int panthor_vm_pool_destroy_vm(struct panthor_vm_pool *pool, u32 handle)
++{
++	struct panthor_vm *vm;
++
++	vm = xa_erase(&pool->xa, handle);
++
++	panthor_vm_destroy(vm);
++
++	return vm ? 0 : -EINVAL;
++}
++
++/**
++ * panthor_vm_pool_get_vm() - Retrieve VM object bound to a VM handle
++ * @pool: VM pool to check.
++ * @handle: Handle of the VM to retrieve.
++ *
++ * Return: A valid pointer if the VM exists, NULL otherwise.
++ */
++struct panthor_vm *
++panthor_vm_pool_get_vm(struct panthor_vm_pool *pool, u32 handle)
++{
++	struct panthor_vm *vm;
++
++	vm = panthor_vm_get(xa_load(&pool->xa, handle));
++
++	return vm;
++}
++
++/**
++ * panthor_vm_pool_destroy() - Destroy a VM pool.
++ * @pfile: File.
++ *
++ * Destroy all VMs in the pool, and release the pool resources.
++ *
++ * Note that VMs can outlive the pool they were created from if other
++ * objects hold a reference to there VMs.
++ */
++void panthor_vm_pool_destroy(struct panthor_file *pfile)
++{
++	struct panthor_vm *vm;
++	unsigned long i;
++
++	if (!pfile->vms)
++		return;
++
++	xa_for_each(&pfile->vms->xa, i, vm)
++		panthor_vm_destroy(vm);
++
++	xa_destroy(&pfile->vms->xa);
++	kfree(pfile->vms);
++}
++
++/**
++ * panthor_vm_pool_create() - Create a VM pool
++ * @pfile: File.
++ *
++ * Return: 0 on success, a negative error code otherwise.
++ */
++int panthor_vm_pool_create(struct panthor_file *pfile)
++{
++	pfile->vms = kzalloc(sizeof(*pfile->vms), GFP_KERNEL);
++	if (!pfile->vms)
++		return -ENOMEM;
++
++	xa_init_flags(&pfile->vms->xa, XA_FLAGS_ALLOC1);
++	return 0;
++}
++
++/* dummy TLB ops, the real TLB flush happens in panthor_vm_flush_range() */
++static void mmu_tlb_flush_all(void *cookie)
++{
++}
++
++static void mmu_tlb_flush_walk(unsigned long iova, size_t size, size_t granule, void *cookie)
++{
++}
++
++static const struct iommu_flush_ops mmu_tlb_ops = {
++	.tlb_flush_all = mmu_tlb_flush_all,
++	.tlb_flush_walk = mmu_tlb_flush_walk,
++};
++
++static const char *access_type_name(struct panthor_device *ptdev,
++				    u32 fault_status)
++{
++	switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) {
++	case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC:
++		return "ATOMIC";
++	case AS_FAULTSTATUS_ACCESS_TYPE_READ:
++		return "READ";
++	case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
++		return "WRITE";
++	case AS_FAULTSTATUS_ACCESS_TYPE_EX:
++		return "EXECUTE";
++	default:
++		drm_WARN_ON(&ptdev->base, 1);
++		return NULL;
++	}
++}
++
++static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status)
++{
++	bool has_unhandled_faults = false;
++
++	status = panthor_mmu_fault_mask(ptdev, status);
++	while (status) {
++		u32 as = ffs(status | (status >> 16)) - 1;
++		u32 mask = panthor_mmu_as_fault_mask(ptdev, as);
++		u32 new_int_mask;
++		u64 addr;
++		u32 fault_status;
++		u32 exception_type;
++		u32 access_type;
++		u32 source_id;
++
++		fault_status = gpu_read(ptdev, AS_FAULTSTATUS(as));
++		addr = gpu_read(ptdev, AS_FAULTADDRESS_LO(as));
++		addr |= (u64)gpu_read(ptdev, AS_FAULTADDRESS_HI(as)) << 32;
++
++		/* decode the fault status */
++		exception_type = fault_status & 0xFF;
++		access_type = (fault_status >> 8) & 0x3;
++		source_id = (fault_status >> 16);
++
++		mutex_lock(&ptdev->mmu->as.slots_lock);
++
++		ptdev->mmu->as.faulty_mask |= mask;
++		new_int_mask =
++			panthor_mmu_fault_mask(ptdev, ~ptdev->mmu->as.faulty_mask);
++
++		/* terminal fault, print info about the fault */
++		drm_err(&ptdev->base,
++			"Unhandled Page fault in AS%d at VA 0x%016llX\n"
++			"raw fault status: 0x%X\n"
++			"decoded fault status: %s\n"
++			"exception type 0x%X: %s\n"
++			"access type 0x%X: %s\n"
++			"source id 0x%X\n",
++			as, addr,
++			fault_status,
++			(fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"),
++			exception_type, panthor_exception_name(ptdev, exception_type),
++			access_type, access_type_name(ptdev, fault_status),
++			source_id);
++
++		/* Ignore MMU interrupts on this AS until it's been
++		 * re-enabled.
++		 */
++		ptdev->mmu->irq.mask = new_int_mask;
++		gpu_write(ptdev, MMU_INT_MASK, new_int_mask);
++
++		if (ptdev->mmu->as.slots[as].vm)
++			ptdev->mmu->as.slots[as].vm->unhandled_fault = true;
++
++		/* Disable the MMU to kill jobs on this AS. */
++		panthor_mmu_as_disable(ptdev, as);
++		mutex_unlock(&ptdev->mmu->as.slots_lock);
++
++		status &= ~mask;
++		has_unhandled_faults = true;
++	}
++
++	if (has_unhandled_faults)
++		panthor_sched_report_mmu_fault(ptdev);
++}
++PANTHOR_IRQ_HANDLER(mmu, MMU, panthor_mmu_irq_handler);
++
++/**
++ * panthor_mmu_suspend() - Suspend the MMU logic
++ * @ptdev: Device.
++ *
++ * All we do here is de-assign the AS slots on all active VMs, so things
++ * get flushed to the main memory, and no further access to these VMs are
++ * possible.
++ *
++ * We also suspend the MMU IRQ.
++ */
++void panthor_mmu_suspend(struct panthor_device *ptdev)
++{
++	mutex_lock(&ptdev->mmu->as.slots_lock);
++	for (u32 i = 0; i < ARRAY_SIZE(ptdev->mmu->as.slots); i++) {
++		struct panthor_vm *vm = ptdev->mmu->as.slots[i].vm;
++
++		if (vm) {
++			drm_WARN_ON(&ptdev->base, panthor_mmu_as_disable(ptdev, i));
++			panthor_vm_release_as_locked(vm);
++		}
++	}
++	mutex_unlock(&ptdev->mmu->as.slots_lock);
++
++	panthor_mmu_irq_suspend(&ptdev->mmu->irq);
++}
++
++/**
++ * panthor_mmu_resume() - Resume the MMU logic
++ * @ptdev: Device.
++ *
++ * Resume the IRQ.
++ *
++ * We don't re-enable previously active VMs. We assume other parts of the
++ * driver will call panthor_vm_active() on the VMs they intend to use.
++ */
++void panthor_mmu_resume(struct panthor_device *ptdev)
++{
++	mutex_lock(&ptdev->mmu->as.slots_lock);
++	ptdev->mmu->as.alloc_mask = 0;
++	ptdev->mmu->as.faulty_mask = 0;
++	mutex_unlock(&ptdev->mmu->as.slots_lock);
++
++	panthor_mmu_irq_resume(&ptdev->mmu->irq, panthor_mmu_fault_mask(ptdev, ~0));
++}
++
++/**
++ * panthor_mmu_pre_reset() - Prepare for a reset
++ * @ptdev: Device.
++ *
++ * Suspend the IRQ, and make sure all VM_BIND queues are stopped, so we
++ * don't get asked to do a VM operation while the GPU is down.
++ *
++ * We don't cleanly shutdown the AS slots here, because the reset might
++ * come from an AS_ACTIVE_BIT stuck situation.
++ */
++void panthor_mmu_pre_reset(struct panthor_device *ptdev)
++{
++	struct panthor_vm *vm;
++
++	panthor_mmu_irq_suspend(&ptdev->mmu->irq);
++
++	mutex_lock(&ptdev->mmu->vm.lock);
++	ptdev->mmu->vm.reset_in_progress = true;
++	list_for_each_entry(vm, &ptdev->mmu->vm.list, node)
++		panthor_vm_stop(vm);
++	mutex_unlock(&ptdev->mmu->vm.lock);
++}
++
++/**
++ * panthor_mmu_post_reset() - Restore things after a reset
++ * @ptdev: Device.
++ *
++ * Put the MMU logic back in action after a reset. That implies resuming the
++ * IRQ and re-enabling the VM_BIND queues.
++ */
++void panthor_mmu_post_reset(struct panthor_device *ptdev)
++{
++	struct panthor_vm *vm;
++
++	mutex_lock(&ptdev->mmu->as.slots_lock);
++
++	/* Now that the reset is effective, we can assume that none of the
++	 * AS slots are setup, and clear the faulty flags too.
++	 */
++	ptdev->mmu->as.alloc_mask = 0;
++	ptdev->mmu->as.faulty_mask = 0;
++
++	for (u32 i = 0; i < ARRAY_SIZE(ptdev->mmu->as.slots); i++) {
++		struct panthor_vm *vm = ptdev->mmu->as.slots[i].vm;
++
++		if (vm)
++			panthor_vm_release_as_locked(vm);
++	}
++
++	mutex_unlock(&ptdev->mmu->as.slots_lock);
++
++	panthor_mmu_irq_resume(&ptdev->mmu->irq, panthor_mmu_fault_mask(ptdev, ~0));
++
++	/* Restart the VM_BIND queues. */
++	mutex_lock(&ptdev->mmu->vm.lock);
++	list_for_each_entry(vm, &ptdev->mmu->vm.list, node) {
++		panthor_vm_start(vm);
++	}
++	ptdev->mmu->vm.reset_in_progress = false;
++	mutex_unlock(&ptdev->mmu->vm.lock);
++}
++
++static void panthor_vm_free(struct drm_gpuvm *gpuvm)
++{
++	struct panthor_vm *vm = container_of(gpuvm, struct panthor_vm, base);
++	struct panthor_device *ptdev = vm->ptdev;
++
++	mutex_lock(&vm->heaps.lock);
++	if (drm_WARN_ON(&ptdev->base, vm->heaps.pool))
++		panthor_heap_pool_destroy(vm->heaps.pool);
++	mutex_unlock(&vm->heaps.lock);
++	mutex_destroy(&vm->heaps.lock);
++
++	mutex_lock(&ptdev->mmu->vm.lock);
++	list_del(&vm->node);
++	/* Restore the scheduler state so we can call drm_sched_entity_destroy()
++	 * and drm_sched_fini(). If get there, that means we have no job left
++	 * and no new jobs can be queued, so we can start the scheduler without
++	 * risking interfering with the reset.
++	 */
++	if (ptdev->mmu->vm.reset_in_progress)
++		panthor_vm_start(vm);
++	mutex_unlock(&ptdev->mmu->vm.lock);
++
++	drm_sched_entity_destroy(&vm->entity);
++	drm_sched_fini(&vm->sched);
++
++	mutex_lock(&ptdev->mmu->as.slots_lock);
++	if (vm->as.id >= 0) {
++		int cookie;
++
++		if (drm_dev_enter(&ptdev->base, &cookie)) {
++			panthor_mmu_as_disable(ptdev, vm->as.id);
++			drm_dev_exit(cookie);
++		}
++
++		ptdev->mmu->as.slots[vm->as.id].vm = NULL;
++		clear_bit(vm->as.id, &ptdev->mmu->as.alloc_mask);
++		list_del(&vm->as.lru_node);
++	}
++	mutex_unlock(&ptdev->mmu->as.slots_lock);
++
++	free_io_pgtable_ops(vm->pgtbl_ops);
++
++	drm_mm_takedown(&vm->mm);
++	kfree(vm);
++}
++
++/**
++ * panthor_vm_put() - Release a reference on a VM
++ * @vm: VM to release the reference on. Can be NULL.
++ */
++void panthor_vm_put(struct panthor_vm *vm)
++{
++	drm_gpuvm_put(vm ? &vm->base : NULL);
++}
++
++/**
++ * panthor_vm_get() - Get a VM reference
++ * @vm: VM to get the reference on. Can be NULL.
++ *
++ * Return: @vm value.
++ */
++struct panthor_vm *panthor_vm_get(struct panthor_vm *vm)
++{
++	if (vm)
++		drm_gpuvm_get(&vm->base);
++
++	return vm;
++}
++
++/**
++ * panthor_vm_get_heap_pool() - Get the heap pool attached to a VM
++ * @vm: VM to query the heap pool on.
++ * @create: True if the heap pool should be created when it doesn't exist.
++ *
++ * Heap pools are per-VM. This function allows one to retrieve the heap pool
++ * attached to a VM.
++ *
++ * If no heap pool exists yet, and @create is true, we create one.
++ *
++ * The returned panthor_heap_pool should be released with panthor_heap_pool_put().
++ *
++ * Return: A valid pointer on success, an ERR_PTR() otherwise.
++ */
++struct panthor_heap_pool *panthor_vm_get_heap_pool(struct panthor_vm *vm, bool create)
++{
++	struct panthor_heap_pool *pool;
++
++	mutex_lock(&vm->heaps.lock);
++	if (!vm->heaps.pool && create) {
++		if (vm->destroyed)
++			pool = ERR_PTR(-EINVAL);
++		else
++			pool = panthor_heap_pool_create(vm->ptdev, vm);
++
++		if (!IS_ERR(pool))
++			vm->heaps.pool = panthor_heap_pool_get(pool);
++	} else {
++		pool = panthor_heap_pool_get(vm->heaps.pool);
++	}
++	mutex_unlock(&vm->heaps.lock);
++
++	return pool;
++}
++
++static u64 mair_to_memattr(u64 mair)
++{
++	u64 memattr = 0;
++	u32 i;
++
++	for (i = 0; i < 8; i++) {
++		u8 in_attr = mair >> (8 * i), out_attr;
++		u8 outer = in_attr >> 4, inner = in_attr & 0xf;
++
++		/* For caching to be enabled, inner and outer caching policy
++		 * have to be both write-back, if one of them is write-through
++		 * or non-cacheable, we just choose non-cacheable. Device
++		 * memory is also translated to non-cacheable.
++		 */
++		if (!(outer & 3) || !(outer & 4) || !(inner & 4)) {
++			out_attr = AS_MEMATTR_AARCH64_INNER_OUTER_NC |
++				   AS_MEMATTR_AARCH64_SH_MIDGARD_INNER |
++				   AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(false, false);
++		} else {
++			/* Use SH_CPU_INNER mode so SH_IS, which is used when
++			 * IOMMU_CACHE is set, actually maps to the standard
++			 * definition of inner-shareable and not Mali's
++			 * internal-shareable mode.
++			 */
++			out_attr = AS_MEMATTR_AARCH64_INNER_OUTER_WB |
++				   AS_MEMATTR_AARCH64_SH_CPU_INNER |
++				   AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(inner & 1, inner & 2);
++		}
++
++		memattr |= (u64)out_attr << (8 * i);
++	}
++
++	return memattr;
++}
++
++static void panthor_vma_link(struct panthor_vm *vm,
++			     struct panthor_vma *vma,
++			     struct drm_gpuvm_bo *vm_bo)
++{
++	struct panthor_gem_object *bo = to_panthor_bo(vma->base.gem.obj);
++
++	mutex_lock(&bo->gpuva_list_lock);
++	drm_gpuva_link(&vma->base, vm_bo);
++	drm_WARN_ON(&vm->ptdev->base, drm_gpuvm_bo_put(vm_bo));
++	mutex_unlock(&bo->gpuva_list_lock);
++}
++
++static void panthor_vma_unlink(struct panthor_vm *vm,
++			       struct panthor_vma *vma)
++{
++	struct panthor_gem_object *bo = to_panthor_bo(vma->base.gem.obj);
++	struct drm_gpuvm_bo *vm_bo = drm_gpuvm_bo_get(vma->base.vm_bo);
++
++	mutex_lock(&bo->gpuva_list_lock);
++	drm_gpuva_unlink(&vma->base);
++	mutex_unlock(&bo->gpuva_list_lock);
++
++	/* drm_gpuva_unlink() release the vm_bo, but we manually retained it
++	 * when entering this function, so we can implement deferred VMA
++	 * destruction. Re-assign it here.
++	 */
++	vma->base.vm_bo = vm_bo;
++	list_add_tail(&vma->node, &vm->op_ctx->returned_vmas);
++}
++
++static void panthor_vma_init(struct panthor_vma *vma, u32 flags)
++{
++	INIT_LIST_HEAD(&vma->node);
++	vma->flags = flags;
++}
++
++#define PANTHOR_VM_MAP_FLAGS \
++	(DRM_PANTHOR_VM_BIND_OP_MAP_READONLY | \
++	 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | \
++	 DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED)
++
++static int panthor_gpuva_sm_step_map(struct drm_gpuva_op *op, void *priv)
++{
++	struct panthor_vm *vm = priv;
++	struct panthor_vm_op_ctx *op_ctx = vm->op_ctx;
++	struct panthor_vma *vma = panthor_vm_op_ctx_get_vma(op_ctx);
++	int ret;
++
++	if (!vma)
++		return -EINVAL;
++
++	panthor_vma_init(vma, op_ctx->flags & PANTHOR_VM_MAP_FLAGS);
++
++	ret = panthor_vm_map_pages(vm, op->map.va.addr, flags_to_prot(vma->flags),
++				   op_ctx->map.sgt, op->map.gem.offset,
++				   op->map.va.range);
++	if (ret)
++		return ret;
++
++	/* Ref owned by the mapping now, clear the obj field so we don't release the
++	 * pinning/obj ref behind GPUVA's back.
++	 */
++	drm_gpuva_map(&vm->base, &vma->base, &op->map);
++	panthor_vma_link(vm, vma, op_ctx->map.vm_bo);
++	op_ctx->map.vm_bo = NULL;
++	return 0;
++}
++
++static int panthor_gpuva_sm_step_remap(struct drm_gpuva_op *op,
++				       void *priv)
++{
++	struct panthor_vma *unmap_vma = container_of(op->remap.unmap->va, struct panthor_vma, base);
++	struct panthor_vm *vm = priv;
++	struct panthor_vm_op_ctx *op_ctx = vm->op_ctx;
++	struct panthor_vma *prev_vma = NULL, *next_vma = NULL;
++	u64 unmap_start, unmap_range;
++	int ret;
++
++	drm_gpuva_op_remap_to_unmap_range(&op->remap, &unmap_start, &unmap_range);
++	ret = panthor_vm_unmap_pages(vm, unmap_start, unmap_range);
++	if (ret)
++		return ret;
++
++	if (op->remap.prev) {
++		prev_vma = panthor_vm_op_ctx_get_vma(op_ctx);
++		panthor_vma_init(prev_vma, unmap_vma->flags);
++	}
++
++	if (op->remap.next) {
++		next_vma = panthor_vm_op_ctx_get_vma(op_ctx);
++		panthor_vma_init(next_vma, unmap_vma->flags);
++	}
++
++	drm_gpuva_remap(prev_vma ? &prev_vma->base : NULL,
++			next_vma ? &next_vma->base : NULL,
++			&op->remap);
++
++	if (prev_vma) {
++		/* panthor_vma_link() transfers the vm_bo ownership to
++		 * the VMA object. Since the vm_bo we're passing is still
++		 * owned by the old mapping which will be released when this
++		 * mapping is destroyed, we need to grab a ref here.
++		 */
++		panthor_vma_link(vm, prev_vma,
++				 drm_gpuvm_bo_get(op->remap.unmap->va->vm_bo));
++	}
++
++	if (next_vma) {
++		panthor_vma_link(vm, next_vma,
++				 drm_gpuvm_bo_get(op->remap.unmap->va->vm_bo));
++	}
++
++	panthor_vma_unlink(vm, unmap_vma);
++	return 0;
++}
++
++static int panthor_gpuva_sm_step_unmap(struct drm_gpuva_op *op,
++				       void *priv)
++{
++	struct panthor_vma *unmap_vma = container_of(op->unmap.va, struct panthor_vma, base);
++	struct panthor_vm *vm = priv;
++	int ret;
++
++	ret = panthor_vm_unmap_pages(vm, unmap_vma->base.va.addr,
++				     unmap_vma->base.va.range);
++	if (drm_WARN_ON(&vm->ptdev->base, ret))
++		return ret;
++
++	drm_gpuva_unmap(&op->unmap);
++	panthor_vma_unlink(vm, unmap_vma);
++	return 0;
++}
++
++static const struct drm_gpuvm_ops panthor_gpuvm_ops = {
++	.vm_free = panthor_vm_free,
++	.sm_step_map = panthor_gpuva_sm_step_map,
++	.sm_step_remap = panthor_gpuva_sm_step_remap,
++	.sm_step_unmap = panthor_gpuva_sm_step_unmap,
++};
++
++/**
++ * panthor_vm_resv() - Get the dma_resv object attached to a VM.
++ * @vm: VM to get the dma_resv of.
++ *
++ * Return: A dma_resv object.
++ */
++struct dma_resv *panthor_vm_resv(struct panthor_vm *vm)
++{
++	return drm_gpuvm_resv(&vm->base);
++}
++
++struct drm_gem_object *panthor_vm_root_gem(struct panthor_vm *vm)
++{
++	if (!vm)
++		return NULL;
++
++	return vm->base.r_obj;
++}
++
++static int
++panthor_vm_exec_op(struct panthor_vm *vm, struct panthor_vm_op_ctx *op,
++		   bool flag_vm_unusable_on_failure)
++{
++	u32 op_type = op->flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK;
++	int ret;
++
++	if (op_type == DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY)
++		return 0;
++
++	mutex_lock(&vm->op_lock);
++	vm->op_ctx = op;
++	switch (op_type) {
++	case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP:
++		if (vm->unusable) {
++			ret = -EINVAL;
++			break;
++		}
++
++		ret = drm_gpuvm_sm_map(&vm->base, vm, op->va.addr, op->va.range,
++				       op->map.vm_bo->obj, op->map.bo_offset);
++		break;
++
++	case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP:
++		ret = drm_gpuvm_sm_unmap(&vm->base, vm, op->va.addr, op->va.range);
++		break;
++
++	default:
++		ret = -EINVAL;
++		break;
++	}
++
++	if (ret && flag_vm_unusable_on_failure)
++		vm->unusable = true;
++
++	vm->op_ctx = NULL;
++	mutex_unlock(&vm->op_lock);
++
++	return ret;
++}
++
++static struct dma_fence *
++panthor_vm_bind_run_job(struct drm_sched_job *sched_job)
++{
++	struct panthor_vm_bind_job *job = container_of(sched_job, struct panthor_vm_bind_job, base);
++	bool cookie;
++	int ret;
++
++	/* Not only we report an error whose result is propagated to the
++	 * drm_sched finished fence, but we also flag the VM as unusable, because
++	 * a failure in the async VM_BIND results in an inconsistent state. VM needs
++	 * to be destroyed and recreated.
++	 */
++	cookie = dma_fence_begin_signalling();
++	ret = panthor_vm_exec_op(job->vm, &job->ctx, true);
++	dma_fence_end_signalling(cookie);
++
++	return ret ? ERR_PTR(ret) : NULL;
++}
++
++static void panthor_vm_bind_job_release(struct kref *kref)
++{
++	struct panthor_vm_bind_job *job = container_of(kref, struct panthor_vm_bind_job, refcount);
++
++	if (job->base.s_fence)
++		drm_sched_job_cleanup(&job->base);
++
++	panthor_vm_cleanup_op_ctx(&job->ctx, job->vm);
++	panthor_vm_put(job->vm);
++	kfree(job);
++}
++
++/**
++ * panthor_vm_bind_job_put() - Release a VM_BIND job reference
++ * @sched_job: Job to release the reference on.
++ */
++void panthor_vm_bind_job_put(struct drm_sched_job *sched_job)
++{
++	struct panthor_vm_bind_job *job =
++		container_of(sched_job, struct panthor_vm_bind_job, base);
++
++	if (sched_job)
++		kref_put(&job->refcount, panthor_vm_bind_job_release);
++}
++
++static void
++panthor_vm_bind_free_job(struct drm_sched_job *sched_job)
++{
++	struct panthor_vm_bind_job *job =
++		container_of(sched_job, struct panthor_vm_bind_job, base);
++
++	drm_sched_job_cleanup(sched_job);
++
++	/* Do the heavy cleanups asynchronously, so we're out of the
++	 * dma-signaling path and can acquire dma-resv locks safely.
++	 */
++	queue_work(panthor_cleanup_wq, &job->cleanup_op_ctx_work);
++}
++
++static enum drm_gpu_sched_stat
++panthor_vm_bind_timedout_job(struct drm_sched_job *sched_job)
++{
++	WARN(1, "VM_BIND ops are synchronous for now, there should be no timeout!");
++	return DRM_GPU_SCHED_STAT_NOMINAL;
++}
++
++static const struct drm_sched_backend_ops panthor_vm_bind_ops = {
++	.run_job = panthor_vm_bind_run_job,
++	.free_job = panthor_vm_bind_free_job,
++	.timedout_job = panthor_vm_bind_timedout_job,
++};
++
++/**
++ * panthor_vm_create() - Create a VM
++ * @ptdev: Device.
++ * @for_mcu: True if this is the FW MCU VM.
++ * @kernel_va_start: Start of the range reserved for kernel BO mapping.
++ * @kernel_va_size: Size of the range reserved for kernel BO mapping.
++ * @auto_kernel_va_start: Start of the auto-VA kernel range.
++ * @auto_kernel_va_size: Size of the auto-VA kernel range.
++ *
++ * Return: A valid pointer on success, an ERR_PTR() otherwise.
++ */
++struct panthor_vm *
++panthor_vm_create(struct panthor_device *ptdev, bool for_mcu,
++		  u64 kernel_va_start, u64 kernel_va_size,
++		  u64 auto_kernel_va_start, u64 auto_kernel_va_size)
++{
++	u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features);
++	u32 pa_bits = GPU_MMU_FEATURES_PA_BITS(ptdev->gpu_info.mmu_features);
++	u64 full_va_range = 1ull << va_bits;
++	struct drm_gem_object *dummy_gem;
++	struct drm_gpu_scheduler *sched;
++	struct io_pgtable_cfg pgtbl_cfg;
++	u64 mair, min_va, va_range;
++	struct panthor_vm *vm;
++	int ret;
++
++	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
++	if (!vm)
++		return ERR_PTR(-ENOMEM);
++
++	/* We allocate a dummy GEM for the VM. */
++	dummy_gem = drm_gpuvm_resv_object_alloc(&ptdev->base);
++	if (!dummy_gem) {
++		ret = -ENOMEM;
++		goto err_free_vm;
++	}
++
++	mutex_init(&vm->heaps.lock);
++	vm->for_mcu = for_mcu;
++	vm->ptdev = ptdev;
++	mutex_init(&vm->op_lock);
++
++	if (for_mcu) {
++		/* CSF MCU is a cortex M7, and can only address 4G */
++		min_va = 0;
++		va_range = SZ_4G;
++	} else {
++		min_va = 0;
++		va_range = full_va_range;
++	}
++
++	mutex_init(&vm->mm_lock);
++	drm_mm_init(&vm->mm, kernel_va_start, kernel_va_size);
++	vm->kernel_auto_va.start = auto_kernel_va_start;
++	vm->kernel_auto_va.end = vm->kernel_auto_va.start + auto_kernel_va_size - 1;
++
++	INIT_LIST_HEAD(&vm->node);
++	INIT_LIST_HEAD(&vm->as.lru_node);
++	vm->as.id = -1;
++	refcount_set(&vm->as.active_cnt, 0);
++
++	pgtbl_cfg = (struct io_pgtable_cfg) {
++		.pgsize_bitmap	= SZ_4K | SZ_2M,
++		.ias		= va_bits,
++		.oas		= pa_bits,
++		.coherent_walk	= ptdev->coherent,
++		.tlb		= &mmu_tlb_ops,
++		.iommu_dev	= ptdev->base.dev,
++		.alloc		= alloc_pt,
++		.free		= free_pt,
++	};
++
++	vm->pgtbl_ops = alloc_io_pgtable_ops(ARM_64_LPAE_S1, &pgtbl_cfg, vm);
++	if (!vm->pgtbl_ops) {
++		ret = -EINVAL;
++		goto err_mm_takedown;
++	}
++
++	/* Bind operations are synchronous for now, no timeout needed. */
++	ret = drm_sched_init(&vm->sched, &panthor_vm_bind_ops, ptdev->mmu->vm.wq,
++			     1, 1, 0,
++			     MAX_SCHEDULE_TIMEOUT, NULL, NULL,
++			     "panthor-vm-bind", ptdev->base.dev);
++	if (ret)
++		goto err_free_io_pgtable;
++
++	sched = &vm->sched;
++	ret = drm_sched_entity_init(&vm->entity, 0, &sched, 1, NULL);
++	if (ret)
++		goto err_sched_fini;
++
++	mair = io_pgtable_ops_to_pgtable(vm->pgtbl_ops)->cfg.arm_lpae_s1_cfg.mair;
++	vm->memattr = mair_to_memattr(mair);
++
++	mutex_lock(&ptdev->mmu->vm.lock);
++	list_add_tail(&vm->node, &ptdev->mmu->vm.list);
++
++	/* If a reset is in progress, stop the scheduler. */
++	if (ptdev->mmu->vm.reset_in_progress)
++		panthor_vm_stop(vm);
++	mutex_unlock(&ptdev->mmu->vm.lock);
++
++	/* We intentionally leave the reserved range to zero, because we want kernel VMAs
++	 * to be handled the same way user VMAs are.
++	 */
++	drm_gpuvm_init(&vm->base, for_mcu ? "panthor-MCU-VM" : "panthor-GPU-VM",
++		       DRM_GPUVM_RESV_PROTECTED, &ptdev->base, dummy_gem,
++		       min_va, va_range, 0, 0, &panthor_gpuvm_ops);
++	drm_gem_object_put(dummy_gem);
++	return vm;
++
++err_sched_fini:
++	drm_sched_fini(&vm->sched);
++
++err_free_io_pgtable:
++	free_io_pgtable_ops(vm->pgtbl_ops);
++
++err_mm_takedown:
++	drm_mm_takedown(&vm->mm);
++	drm_gem_object_put(dummy_gem);
++
++err_free_vm:
++	kfree(vm);
++	return ERR_PTR(ret);
++}
++
++static int
++panthor_vm_bind_prepare_op_ctx(struct drm_file *file,
++			       struct panthor_vm *vm,
++			       const struct drm_panthor_vm_bind_op *op,
++			       struct panthor_vm_op_ctx *op_ctx)
++{
++	struct drm_gem_object *gem;
++	int ret;
++
++	/* Aligned on page size. */
++	if ((op->va | op->size) & ~PAGE_MASK)
++		return -EINVAL;
++
++	switch (op->flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) {
++	case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP:
++		gem = drm_gem_object_lookup(file, op->bo_handle);
++		ret = panthor_vm_prepare_map_op_ctx(op_ctx, vm,
++						    gem ? to_panthor_bo(gem) : NULL,
++						    op->bo_offset,
++						    op->size,
++						    op->va,
++						    op->flags);
++		drm_gem_object_put(gem);
++		return ret;
++
++	case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP:
++		if (op->flags & ~DRM_PANTHOR_VM_BIND_OP_TYPE_MASK)
++			return -EINVAL;
++
++		if (op->bo_handle || op->bo_offset)
++			return -EINVAL;
++
++		return panthor_vm_prepare_unmap_op_ctx(op_ctx, vm, op->va, op->size);
++
++	case DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY:
++		if (op->flags & ~DRM_PANTHOR_VM_BIND_OP_TYPE_MASK)
++			return -EINVAL;
++
++		if (op->bo_handle || op->bo_offset)
++			return -EINVAL;
++
++		if (op->va || op->size)
++			return -EINVAL;
++
++		if (!op->syncs.count)
++			return -EINVAL;
++
++		panthor_vm_prepare_sync_only_op_ctx(op_ctx, vm);
++		return 0;
++
++	default:
++		return -EINVAL;
++	}
++}
++
++static void panthor_vm_bind_job_cleanup_op_ctx_work(struct work_struct *work)
++{
++	struct panthor_vm_bind_job *job =
++		container_of(work, struct panthor_vm_bind_job, cleanup_op_ctx_work);
++
++	panthor_vm_bind_job_put(&job->base);
++}
++
++/**
++ * panthor_vm_bind_job_create() - Create a VM_BIND job
++ * @file: File.
++ * @vm: VM targeted by the VM_BIND job.
++ * @op: VM operation data.
++ *
++ * Return: A valid pointer on success, an ERR_PTR() otherwise.
++ */
++struct drm_sched_job *
++panthor_vm_bind_job_create(struct drm_file *file,
++			   struct panthor_vm *vm,
++			   const struct drm_panthor_vm_bind_op *op)
++{
++	struct panthor_vm_bind_job *job;
++	int ret;
++
++	if (!vm)
++		return ERR_PTR(-EINVAL);
++
++	if (vm->destroyed || vm->unusable)
++		return ERR_PTR(-EINVAL);
++
++	job = kzalloc(sizeof(*job), GFP_KERNEL);
++	if (!job)
++		return ERR_PTR(-ENOMEM);
++
++	ret = panthor_vm_bind_prepare_op_ctx(file, vm, op, &job->ctx);
++	if (ret) {
++		kfree(job);
++		return ERR_PTR(ret);
++	}
++
++	INIT_WORK(&job->cleanup_op_ctx_work, panthor_vm_bind_job_cleanup_op_ctx_work);
++	kref_init(&job->refcount);
++	job->vm = panthor_vm_get(vm);
++
++	ret = drm_sched_job_init(&job->base, &vm->entity, 1, vm);
++	if (ret)
++		goto err_put_job;
++
++	return &job->base;
++
++err_put_job:
++	panthor_vm_bind_job_put(&job->base);
++	return ERR_PTR(ret);
++}
++
++/**
++ * panthor_vm_bind_job_prepare_resvs() - Prepare VM_BIND job dma_resvs
++ * @exec: The locking/preparation context.
++ * @sched_job: The job to prepare resvs on.
++ *
++ * Locks and prepare the VM resv.
++ *
++ * If this is a map operation, locks and prepares the GEM resv.
++ *
++ * Return: 0 on success, a negative error code otherwise.
++ */
++int panthor_vm_bind_job_prepare_resvs(struct drm_exec *exec,
++				      struct drm_sched_job *sched_job)
++{
++	struct panthor_vm_bind_job *job = container_of(sched_job, struct panthor_vm_bind_job, base);
++	int ret;
++
++	/* Acquire the VM lock an reserve a slot for this VM bind job. */
++	ret = drm_gpuvm_prepare_vm(&job->vm->base, exec, 1);
++	if (ret)
++		return ret;
++
++	if (job->ctx.map.vm_bo) {
++		/* Lock/prepare the GEM being mapped. */
++		ret = drm_exec_prepare_obj(exec, job->ctx.map.vm_bo->obj, 1);
++		if (ret)
++			return ret;
++	}
++
++	return 0;
++}
++
++/**
++ * panthor_vm_bind_job_update_resvs() - Update the resv objects touched by a job
++ * @exec: drm_exec context.
++ * @sched_job: Job to update the resvs on.
++ */
++void panthor_vm_bind_job_update_resvs(struct drm_exec *exec,
++				      struct drm_sched_job *sched_job)
++{
++	struct panthor_vm_bind_job *job = container_of(sched_job, struct panthor_vm_bind_job, base);
++
++	/* Explicit sync => we just register our job finished fence as bookkeep. */
++	drm_gpuvm_resv_add_fence(&job->vm->base, exec,
++				 &sched_job->s_fence->finished,
++				 DMA_RESV_USAGE_BOOKKEEP,
++				 DMA_RESV_USAGE_BOOKKEEP);
++}
++
++void panthor_vm_update_resvs(struct panthor_vm *vm, struct drm_exec *exec,
++			     struct dma_fence *fence,
++			     enum dma_resv_usage private_usage,
++			     enum dma_resv_usage extobj_usage)
++{
++	drm_gpuvm_resv_add_fence(&vm->base, exec, fence, private_usage, extobj_usage);
++}
++
++/**
++ * panthor_vm_bind_exec_sync_op() - Execute a VM_BIND operation synchronously.
++ * @file: File.
++ * @vm: VM targeted by the VM operation.
++ * @op: Data describing the VM operation.
++ *
++ * Return: 0 on success, a negative error code otherwise.
++ */
++int panthor_vm_bind_exec_sync_op(struct drm_file *file,
++				 struct panthor_vm *vm,
++				 struct drm_panthor_vm_bind_op *op)
++{
++	struct panthor_vm_op_ctx op_ctx;
++	int ret;
++
++	/* No sync objects allowed on synchronous operations. */
++	if (op->syncs.count)
++		return -EINVAL;
++
++	if (!op->size)
++		return 0;
++
++	ret = panthor_vm_bind_prepare_op_ctx(file, vm, op, &op_ctx);
++	if (ret)
++		return ret;
++
++	ret = panthor_vm_exec_op(vm, &op_ctx, false);
++	panthor_vm_cleanup_op_ctx(&op_ctx, vm);
++
++	return ret;
++}
++
++/**
++ * panthor_vm_map_bo_range() - Map a GEM object range to a VM
++ * @vm: VM to map the GEM to.
++ * @bo: GEM object to map.
++ * @offset: Offset in the GEM object.
++ * @size: Size to map.
++ * @va: Virtual address to map the object to.
++ * @flags: Combination of drm_panthor_vm_bind_op_flags flags.
++ * Only map-related flags are valid.
++ *
++ * Internal use only. For userspace requests, use
++ * panthor_vm_bind_exec_sync_op() instead.
++ *
++ * Return: 0 on success, a negative error code otherwise.
++ */
++int panthor_vm_map_bo_range(struct panthor_vm *vm, struct panthor_gem_object *bo,
++			    u64 offset, u64 size, u64 va, u32 flags)
++{
++	struct panthor_vm_op_ctx op_ctx;
++	int ret;
++
++	ret = panthor_vm_prepare_map_op_ctx(&op_ctx, vm, bo, offset, size, va, flags);
++	if (ret)
++		return ret;
++
++	ret = panthor_vm_exec_op(vm, &op_ctx, false);
++	panthor_vm_cleanup_op_ctx(&op_ctx, vm);
++
++	return ret;
++}
++
++/**
++ * panthor_vm_unmap_range() - Unmap a portion of the VA space
++ * @vm: VM to unmap the region from.
++ * @va: Virtual address to unmap. Must be 4k aligned.
++ * @size: Size of the region to unmap. Must be 4k aligned.
++ *
++ * Internal use only. For userspace requests, use
++ * panthor_vm_bind_exec_sync_op() instead.
++ *
++ * Return: 0 on success, a negative error code otherwise.
++ */
++int panthor_vm_unmap_range(struct panthor_vm *vm, u64 va, u64 size)
++{
++	struct panthor_vm_op_ctx op_ctx;
++	int ret;
++
++	ret = panthor_vm_prepare_unmap_op_ctx(&op_ctx, vm, va, size);
++	if (ret)
++		return ret;
++
++	ret = panthor_vm_exec_op(vm, &op_ctx, false);
++	panthor_vm_cleanup_op_ctx(&op_ctx, vm);
++
++	return ret;
++}
++
++/**
++ * panthor_vm_prepare_mapped_bos_resvs() - Prepare resvs on VM BOs.
++ * @exec: Locking/preparation context.
++ * @vm: VM targeted by the GPU job.
++ * @slot_count: Number of slots to reserve.
++ *
++ * GPU jobs assume all BOs bound to the VM at the time the job is submitted
++ * are available when the job is executed. In order to guarantee that, we
++ * need to reserve a slot on all BOs mapped to a VM and update this slot with
++ * the job fence after its submission.
++ *
++ * Return: 0 on success, a negative error code otherwise.
++ */
++int panthor_vm_prepare_mapped_bos_resvs(struct drm_exec *exec, struct panthor_vm *vm,
++					u32 slot_count)
++{
++	int ret;
++
++	/* Acquire the VM lock and reserve a slot for this GPU job. */
++	ret = drm_gpuvm_prepare_vm(&vm->base, exec, slot_count);
++	if (ret)
++		return ret;
++
++	return drm_gpuvm_prepare_objects(&vm->base, exec, slot_count);
++}
++
++/**
++ * panthor_mmu_unplug() - Unplug the MMU logic
++ * @ptdev: Device.
++ *
++ * No access to the MMU regs should be done after this function is called.
++ * We suspend the IRQ and disable all VMs to guarantee that.
++ */
++void panthor_mmu_unplug(struct panthor_device *ptdev)
++{
++	panthor_mmu_irq_suspend(&ptdev->mmu->irq);
++
++	mutex_lock(&ptdev->mmu->as.slots_lock);
++	for (u32 i = 0; i < ARRAY_SIZE(ptdev->mmu->as.slots); i++) {
++		struct panthor_vm *vm = ptdev->mmu->as.slots[i].vm;
++
++		if (vm) {
++			drm_WARN_ON(&ptdev->base, panthor_mmu_as_disable(ptdev, i));
++			panthor_vm_release_as_locked(vm);
++		}
++	}
++	mutex_unlock(&ptdev->mmu->as.slots_lock);
++}
++
++static void panthor_mmu_release_wq(struct drm_device *ddev, void *res)
++{
++	destroy_workqueue(res);
++}
++
++/**
++ * panthor_mmu_init() - Initialize the MMU logic.
++ * @ptdev: Device.
++ *
++ * Return: 0 on success, a negative error code otherwise.
++ */
++int panthor_mmu_init(struct panthor_device *ptdev)
++{
++	u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features);
++	struct panthor_mmu *mmu;
++	int ret, irq;
++
++	mmu = drmm_kzalloc(&ptdev->base, sizeof(*mmu), GFP_KERNEL);
++	if (!mmu)
++		return -ENOMEM;
++
++	INIT_LIST_HEAD(&mmu->as.lru_list);
++
++	ret = drmm_mutex_init(&ptdev->base, &mmu->as.slots_lock);
++	if (ret)
++		return ret;
++
++	INIT_LIST_HEAD(&mmu->vm.list);
++	ret = drmm_mutex_init(&ptdev->base, &mmu->vm.lock);
++	if (ret)
++		return ret;
++
++	ptdev->mmu = mmu;
++
++	irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "mmu");
++	if (irq <= 0)
++		return -ENODEV;
++
++	ret = panthor_request_mmu_irq(ptdev, &mmu->irq, irq,
++				      panthor_mmu_fault_mask(ptdev, ~0));
++	if (ret)
++		return ret;
++
++	mmu->vm.wq = alloc_workqueue("panthor-vm-bind", WQ_UNBOUND, 0);
++	if (!mmu->vm.wq)
++		return -ENOMEM;
++
++	/* On 32-bit kernels, the VA space is limited by the io_pgtable_ops abstraction,
++	 * which passes iova as an unsigned long. Patch the mmu_features to reflect this
++	 * limitation.
++	 */
++	if (sizeof(unsigned long) * 8 < va_bits) {
++		ptdev->gpu_info.mmu_features &= ~GENMASK(7, 0);
++		ptdev->gpu_info.mmu_features |= sizeof(unsigned long) * 8;
++	}
++
++	return drmm_add_action_or_reset(&ptdev->base, panthor_mmu_release_wq, mmu->vm.wq);
++}
++
++#ifdef CONFIG_DEBUG_FS
++static int show_vm_gpuvas(struct panthor_vm *vm, struct seq_file *m)
++{
++	int ret;
++
++	mutex_lock(&vm->op_lock);
++	ret = drm_debugfs_gpuva_info(m, &vm->base);
++	mutex_unlock(&vm->op_lock);
++
++	return ret;
++}
++
++static int show_each_vm(struct seq_file *m, void *arg)
++{
++	struct drm_info_node *node = (struct drm_info_node *)m->private;
++	struct drm_device *ddev = node->minor->dev;
++	struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base);
++	int (*show)(struct panthor_vm *, struct seq_file *) = node->info_ent->data;
++	struct panthor_vm *vm;
++	int ret = 0;
++
++	mutex_lock(&ptdev->mmu->vm.lock);
++	list_for_each_entry(vm, &ptdev->mmu->vm.list, node) {
++		ret = show(vm, m);
++		if (ret < 0)
++			break;
++
++		seq_puts(m, "\n");
++	}
++	mutex_unlock(&ptdev->mmu->vm.lock);
++
++	return ret;
++}
++
++static struct drm_info_list panthor_mmu_debugfs_list[] = {
++	DRM_DEBUGFS_GPUVA_INFO(show_each_vm, show_vm_gpuvas),
++};
++
++/**
++ * panthor_mmu_debugfs_init() - Initialize MMU debugfs entries
++ * @minor: Minor.
++ */
++void panthor_mmu_debugfs_init(struct drm_minor *minor)
++{
++	drm_debugfs_create_files(panthor_mmu_debugfs_list,
++				 ARRAY_SIZE(panthor_mmu_debugfs_list),
++				 minor->debugfs_root, minor);
++}
++#endif /* CONFIG_DEBUG_FS */
++
++/**
++ * panthor_mmu_pt_cache_init() - Initialize the page table cache.
++ *
++ * Return: 0 on success, a negative error code otherwise.
++ */
++int panthor_mmu_pt_cache_init(void)
++{
++	pt_cache = kmem_cache_create("panthor-mmu-pt", SZ_4K, SZ_4K, 0, NULL);
++	if (!pt_cache)
++		return -ENOMEM;
++
++	return 0;
++}
++
++/**
++ * panthor_mmu_pt_cache_fini() - Destroy the page table cache.
++ */
++void panthor_mmu_pt_cache_fini(void)
++{
++	kmem_cache_destroy(pt_cache);
++}
+--- /dev/null
++++ b/drivers/gpu/drm/panthor/panthor_mmu.h
+@@ -0,0 +1,102 @@
++/* SPDX-License-Identifier: GPL-2.0 or MIT */
++/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
++/* Copyright 2023 Collabora ltd. */
++
++#ifndef __PANTHOR_MMU_H__
++#define __PANTHOR_MMU_H__
++
++#include <linux/dma-resv.h>
++
++struct drm_exec;
++struct drm_sched_job;
++struct panthor_gem_object;
++struct panthor_heap_pool;
++struct panthor_vm;
++struct panthor_vma;
++struct panthor_mmu;
++
++int panthor_mmu_init(struct panthor_device *ptdev);
++void panthor_mmu_unplug(struct panthor_device *ptdev);
++void panthor_mmu_pre_reset(struct panthor_device *ptdev);
++void panthor_mmu_post_reset(struct panthor_device *ptdev);
++void panthor_mmu_suspend(struct panthor_device *ptdev);
++void panthor_mmu_resume(struct panthor_device *ptdev);
++
++int panthor_vm_map_bo_range(struct panthor_vm *vm, struct panthor_gem_object *bo,
++			    u64 offset, u64 size, u64 va, u32 flags);
++int panthor_vm_unmap_range(struct panthor_vm *vm, u64 va, u64 size);
++struct panthor_gem_object *
++panthor_vm_get_bo_for_va(struct panthor_vm *vm, u64 va, u64 *bo_offset);
++
++int panthor_vm_active(struct panthor_vm *vm);
++void panthor_vm_idle(struct panthor_vm *vm);
++int panthor_vm_as(struct panthor_vm *vm);
++
++struct panthor_heap_pool *
++panthor_vm_get_heap_pool(struct panthor_vm *vm, bool create);
++
++struct panthor_vm *panthor_vm_get(struct panthor_vm *vm);
++void panthor_vm_put(struct panthor_vm *vm);
++struct panthor_vm *panthor_vm_create(struct panthor_device *ptdev, bool for_mcu,
++				     u64 kernel_va_start, u64 kernel_va_size,
++				     u64 kernel_auto_va_start,
++				     u64 kernel_auto_va_size);
++
++int panthor_vm_prepare_mapped_bos_resvs(struct drm_exec *exec,
++					struct panthor_vm *vm,
++					u32 slot_count);
++int panthor_vm_add_bos_resvs_deps_to_job(struct panthor_vm *vm,
++					 struct drm_sched_job *job);
++void panthor_vm_add_job_fence_to_bos_resvs(struct panthor_vm *vm,
++					   struct drm_sched_job *job);
++
++struct dma_resv *panthor_vm_resv(struct panthor_vm *vm);
++struct drm_gem_object *panthor_vm_root_gem(struct panthor_vm *vm);
++
++void panthor_vm_pool_destroy(struct panthor_file *pfile);
++int panthor_vm_pool_create(struct panthor_file *pfile);
++int panthor_vm_pool_create_vm(struct panthor_device *ptdev,
++			      struct panthor_vm_pool *pool,
++			      struct drm_panthor_vm_create *args);
++int panthor_vm_pool_destroy_vm(struct panthor_vm_pool *pool, u32 handle);
++struct panthor_vm *panthor_vm_pool_get_vm(struct panthor_vm_pool *pool, u32 handle);
++
++bool panthor_vm_has_unhandled_faults(struct panthor_vm *vm);
++bool panthor_vm_is_unusable(struct panthor_vm *vm);
++
++/*
++ * PANTHOR_VM_KERNEL_AUTO_VA: Use this magic address when you want the GEM
++ * logic to auto-allocate the virtual address in the reserved kernel VA range.
++ */
++#define PANTHOR_VM_KERNEL_AUTO_VA		~0ull
++
++int panthor_vm_alloc_va(struct panthor_vm *vm, u64 va, u64 size,
++			struct drm_mm_node *va_node);
++void panthor_vm_free_va(struct panthor_vm *vm, struct drm_mm_node *va_node);
++
++int panthor_vm_bind_exec_sync_op(struct drm_file *file,
++				 struct panthor_vm *vm,
++				 struct drm_panthor_vm_bind_op *op);
++
++struct drm_sched_job *
++panthor_vm_bind_job_create(struct drm_file *file,
++			   struct panthor_vm *vm,
++			   const struct drm_panthor_vm_bind_op *op);
++void panthor_vm_bind_job_put(struct drm_sched_job *job);
++int panthor_vm_bind_job_prepare_resvs(struct drm_exec *exec,
++				      struct drm_sched_job *job);
++void panthor_vm_bind_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *job);
++
++void panthor_vm_update_resvs(struct panthor_vm *vm, struct drm_exec *exec,
++			     struct dma_fence *fence,
++			     enum dma_resv_usage private_usage,
++			     enum dma_resv_usage extobj_usage);
++
++int panthor_mmu_pt_cache_init(void);
++void panthor_mmu_pt_cache_fini(void);
++
++#ifdef CONFIG_DEBUG_FS
++void panthor_mmu_debugfs_init(struct drm_minor *minor);
++#endif
++
++#endif
diff --git a/patches-6.6/034-37-v6.10-drm-panthor-Add-the-FW-logical-block.patch b/patches-6.6/034-37-v6.10-drm-panthor-Add-the-FW-logical-block.patch
new file mode 100644
index 0000000..4b97490
--- /dev/null
+++ b/patches-6.6/034-37-v6.10-drm-panthor-Add-the-FW-logical-block.patch
@@ -0,0 +1,1929 @@
+From 2718d91816eeed03c09c8abe872e45f59078768c Mon Sep 17 00:00:00 2001
+From: Boris Brezillon <boris.brezillon@collabora.com>
+Date: Thu, 29 Feb 2024 17:22:22 +0100
+Subject: [PATCH] drm/panthor: Add the FW logical block
+
+Contains everything that's FW related, that includes the code dealing
+with the microcontroller unit (MCU) that's running the FW, and anything
+related to allocating memory shared between the FW and the CPU.
+
+A few global FW events are processed in the IRQ handler, the rest is
+forwarded to the scheduler, since scheduling is the primary reason for
+the FW existence, and also the main source of FW <-> kernel
+interactions.
+
+v6:
+- Add Maxime's and Heiko's acks
+- Keep header inclusion alphabetically ordered
+
+v5:
+- Fix typo in GLB_PERFCNT_SAMPLE definition
+- Fix unbalanced panthor_vm_idle/active() calls
+- Fallback to a slow reset when the fast reset fails
+- Add extra information when reporting a FW boot failure
+
+v4:
+- Add a MODULE_FIRMWARE() entry for gen 10.8
+- Fix a wrong return ERR_PTR() in panthor_fw_load_section_entry()
+- Fix typos
+- Add Steve's R-b
+
+v3:
+- Make the FW path more future-proof (Liviu)
+- Use one waitqueue for all FW events
+- Simplify propagation of FW events to the scheduler logic
+- Drop the panthor_fw_mem abstraction and use panthor_kernel_bo instead
+- Account for the panthor_vm changes
+- Replace magic number with 0x7fffffff with ~0 to better signify that
+  it's the maximum permitted value.
+- More accurate rounding when computing the firmware timeout.
+- Add a 'sub iterator' helper function. This also adds a check that a
+  firmware entry doesn't overflow the firmware image.
+- Drop __packed from FW structures, natural alignment is good enough.
+- Other minor code improvements.
+
+Co-developed-by: Steven Price <steven.price@arm.com>
+Signed-off-by: Steven Price <steven.price@arm.com>
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Reviewed-by: Steven Price <steven.price@arm.com>
+Acked-by: Maxime Ripard <mripard@kernel.org>
+Acked-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-9-boris.brezillon@collabora.com
+---
+ drivers/gpu/drm/panthor/panthor_fw.c | 1362 ++++++++++++++++++++++++++
+ drivers/gpu/drm/panthor/panthor_fw.h |  503 ++++++++++
+ 2 files changed, 1865 insertions(+)
+ create mode 100644 drivers/gpu/drm/panthor/panthor_fw.c
+ create mode 100644 drivers/gpu/drm/panthor/panthor_fw.h
+
+--- /dev/null
++++ b/drivers/gpu/drm/panthor/panthor_fw.c
+@@ -0,0 +1,1362 @@
++// SPDX-License-Identifier: GPL-2.0 or MIT
++/* Copyright 2023 Collabora ltd. */
++
++#ifdef CONFIG_ARM_ARCH_TIMER
++#include <asm/arch_timer.h>
++#endif
++
++#include <linux/clk.h>
++#include <linux/dma-mapping.h>
++#include <linux/firmware.h>
++#include <linux/iopoll.h>
++#include <linux/iosys-map.h>
++#include <linux/mutex.h>
++#include <linux/platform_device.h>
++
++#include <drm/drm_drv.h>
++#include <drm/drm_managed.h>
++
++#include "panthor_device.h"
++#include "panthor_fw.h"
++#include "panthor_gem.h"
++#include "panthor_gpu.h"
++#include "panthor_mmu.h"
++#include "panthor_regs.h"
++#include "panthor_sched.h"
++
++#define CSF_FW_NAME "mali_csffw.bin"
++
++#define PING_INTERVAL_MS			12000
++#define PROGRESS_TIMEOUT_CYCLES			(5ull * 500 * 1024 * 1024)
++#define PROGRESS_TIMEOUT_SCALE_SHIFT		10
++#define IDLE_HYSTERESIS_US			800
++#define PWROFF_HYSTERESIS_US			10000
++
++/**
++ * struct panthor_fw_binary_hdr - Firmware binary header.
++ */
++struct panthor_fw_binary_hdr {
++	/** @magic: Magic value to check binary validity. */
++	u32 magic;
++#define CSF_FW_BINARY_HEADER_MAGIC		0xc3f13a6e
++
++	/** @minor: Minor FW version. */
++	u8 minor;
++
++	/** @major: Major FW version. */
++	u8 major;
++#define CSF_FW_BINARY_HEADER_MAJOR_MAX		0
++
++	/** @padding1: MBZ. */
++	u16 padding1;
++
++	/** @version_hash: FW version hash. */
++	u32 version_hash;
++
++	/** @padding2: MBZ. */
++	u32 padding2;
++
++	/** @size: FW binary size. */
++	u32 size;
++};
++
++/**
++ * enum panthor_fw_binary_entry_type - Firmware binary entry type
++ */
++enum panthor_fw_binary_entry_type {
++	/** @CSF_FW_BINARY_ENTRY_TYPE_IFACE: Host <-> FW interface. */
++	CSF_FW_BINARY_ENTRY_TYPE_IFACE = 0,
++
++	/** @CSF_FW_BINARY_ENTRY_TYPE_CONFIG: FW config. */
++	CSF_FW_BINARY_ENTRY_TYPE_CONFIG = 1,
++
++	/** @CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST: Unit-tests. */
++	CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST = 2,
++
++	/** @CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER: Trace buffer interface. */
++	CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER = 3,
++
++	/** @CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: Timeline metadata interface. */
++	CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA = 4,
++};
++
++#define CSF_FW_BINARY_ENTRY_TYPE(ehdr)					((ehdr) & 0xff)
++#define CSF_FW_BINARY_ENTRY_SIZE(ehdr)					(((ehdr) >> 8) & 0xff)
++#define CSF_FW_BINARY_ENTRY_UPDATE					BIT(30)
++#define CSF_FW_BINARY_ENTRY_OPTIONAL					BIT(31)
++
++#define CSF_FW_BINARY_IFACE_ENTRY_RD_RD					BIT(0)
++#define CSF_FW_BINARY_IFACE_ENTRY_RD_WR					BIT(1)
++#define CSF_FW_BINARY_IFACE_ENTRY_RD_EX					BIT(2)
++#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_NONE			(0 << 3)
++#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED			(1 << 3)
++#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_UNCACHED_COHERENT	(2 << 3)
++#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED_COHERENT		(3 << 3)
++#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK			GENMASK(4, 3)
++#define CSF_FW_BINARY_IFACE_ENTRY_RD_PROT				BIT(5)
++#define CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED				BIT(30)
++#define CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO				BIT(31)
++
++#define CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS			\
++	(CSF_FW_BINARY_IFACE_ENTRY_RD_RD |				\
++	 CSF_FW_BINARY_IFACE_ENTRY_RD_WR |				\
++	 CSF_FW_BINARY_IFACE_ENTRY_RD_EX |				\
++	 CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK |			\
++	 CSF_FW_BINARY_IFACE_ENTRY_RD_PROT |				\
++	 CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED  |				\
++	 CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO)
++
++/**
++ * struct panthor_fw_binary_section_entry_hdr - Describes a section of FW binary
++ */
++struct panthor_fw_binary_section_entry_hdr {
++	/** @flags: Section flags. */
++	u32 flags;
++
++	/** @va: MCU virtual range to map this binary section to. */
++	struct {
++		/** @start: Start address. */
++		u32 start;
++
++		/** @end: End address. */
++		u32 end;
++	} va;
++
++	/** @data: Data to initialize the FW section with. */
++	struct {
++		/** @start: Start offset in the FW binary. */
++		u32 start;
++
++		/** @end: End offset in the FW binary. */
++		u32 end;
++	} data;
++};
++
++/**
++ * struct panthor_fw_binary_iter - Firmware binary iterator
++ *
++ * Used to parse a firmware binary.
++ */
++struct panthor_fw_binary_iter {
++	/** @data: FW binary data. */
++	const void *data;
++
++	/** @size: FW binary size. */
++	size_t size;
++
++	/** @offset: Iterator offset. */
++	size_t offset;
++};
++
++/**
++ * struct panthor_fw_section - FW section
++ */
++struct panthor_fw_section {
++	/** @node: Used to keep track of FW sections. */
++	struct list_head node;
++
++	/** @flags: Section flags, as encoded in the FW binary. */
++	u32 flags;
++
++	/** @mem: Section memory. */
++	struct panthor_kernel_bo *mem;
++
++	/**
++	 * @name: Name of the section, as specified in the binary.
++	 *
++	 * Can be NULL.
++	 */
++	const char *name;
++
++	/**
++	 * @data: Initial data copied to the FW memory.
++	 *
++	 * We keep data around so we can reload sections after a reset.
++	 */
++	struct {
++		/** @buf: Buffed used to store init data. */
++		const void *buf;
++
++		/** @size: Size of @buf in bytes. */
++		size_t size;
++	} data;
++};
++
++#define CSF_MCU_SHARED_REGION_START		0x04000000ULL
++#define CSF_MCU_SHARED_REGION_SIZE		0x04000000ULL
++
++#define MIN_CS_PER_CSG				8
++#define MIN_CSGS				3
++#define MAX_CSG_PRIO				0xf
++
++#define CSF_IFACE_VERSION(major, minor, patch)	\
++	(((major) << 24) | ((minor) << 16) | (patch))
++#define CSF_IFACE_VERSION_MAJOR(v)		((v) >> 24)
++#define CSF_IFACE_VERSION_MINOR(v)		(((v) >> 16) & 0xff)
++#define CSF_IFACE_VERSION_PATCH(v)		((v) & 0xffff)
++
++#define CSF_GROUP_CONTROL_OFFSET		0x1000
++#define CSF_STREAM_CONTROL_OFFSET		0x40
++#define CSF_UNPRESERVED_REG_COUNT		4
++
++/**
++ * struct panthor_fw_iface - FW interfaces
++ */
++struct panthor_fw_iface {
++	/** @global: Global interface. */
++	struct panthor_fw_global_iface global;
++
++	/** @groups: Group slot interfaces. */
++	struct panthor_fw_csg_iface groups[MAX_CSGS];
++
++	/** @streams: Command stream slot interfaces. */
++	struct panthor_fw_cs_iface streams[MAX_CSGS][MAX_CS_PER_CSG];
++};
++
++/**
++ * struct panthor_fw - Firmware management
++ */
++struct panthor_fw {
++	/** @vm: MCU VM. */
++	struct panthor_vm *vm;
++
++	/** @sections: List of FW sections. */
++	struct list_head sections;
++
++	/** @shared_section: The section containing the FW interfaces. */
++	struct panthor_fw_section *shared_section;
++
++	/** @iface: FW interfaces. */
++	struct panthor_fw_iface iface;
++
++	/** @watchdog: Collection of fields relating to the FW watchdog. */
++	struct {
++		/** @ping_work: Delayed work used to ping the FW. */
++		struct delayed_work ping_work;
++	} watchdog;
++
++	/**
++	 * @req_waitqueue: FW request waitqueue.
++	 *
++	 * Everytime a request is sent to a command stream group or the global
++	 * interface, the caller will first busy wait for the request to be
++	 * acknowledged, and then fallback to a sleeping wait.
++	 *
++	 * This wait queue is here to support the sleeping wait flavor.
++	 */
++	wait_queue_head_t req_waitqueue;
++
++	/** @booted: True is the FW is booted */
++	bool booted;
++
++	/**
++	 * @fast_reset: True if the post_reset logic can proceed with a fast reset.
++	 *
++	 * A fast reset is just a reset where the driver doesn't reload the FW sections.
++	 *
++	 * Any time the firmware is properly suspended, a fast reset can take place.
++	 * On the other hand, if the halt operation failed, the driver will reload
++	 * all sections to make sure we start from a fresh state.
++	 */
++	bool fast_reset;
++
++	/** @irq: Job irq data. */
++	struct panthor_irq irq;
++};
++
++struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev)
++{
++	return ptdev->fw->vm;
++}
++
++/**
++ * panthor_fw_get_glb_iface() - Get the global interface
++ * @ptdev: Device.
++ *
++ * Return: The global interface.
++ */
++struct panthor_fw_global_iface *
++panthor_fw_get_glb_iface(struct panthor_device *ptdev)
++{
++	return &ptdev->fw->iface.global;
++}
++
++/**
++ * panthor_fw_get_csg_iface() - Get a command stream group slot interface
++ * @ptdev: Device.
++ * @csg_slot: Index of the command stream group slot.
++ *
++ * Return: The command stream group slot interface.
++ */
++struct panthor_fw_csg_iface *
++panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot)
++{
++	if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS))
++		return NULL;
++
++	return &ptdev->fw->iface.groups[csg_slot];
++}
++
++/**
++ * panthor_fw_get_cs_iface() - Get a command stream slot interface
++ * @ptdev: Device.
++ * @csg_slot: Index of the command stream group slot.
++ * @cs_slot: Index of the command stream slot.
++ *
++ * Return: The command stream slot interface.
++ */
++struct panthor_fw_cs_iface *
++panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot)
++{
++	if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS || cs_slot > MAX_CS_PER_CSG))
++		return NULL;
++
++	return &ptdev->fw->iface.streams[csg_slot][cs_slot];
++}
++
++/**
++ * panthor_fw_conv_timeout() - Convert a timeout into a cycle-count
++ * @ptdev: Device.
++ * @timeout_us: Timeout expressed in micro-seconds.
++ *
++ * The FW has two timer sources: the GPU counter or arch-timer. We need
++ * to express timeouts in term of number of cycles and specify which
++ * timer source should be used.
++ *
++ * Return: A value suitable for timeout fields in the global interface.
++ */
++static u32 panthor_fw_conv_timeout(struct panthor_device *ptdev, u32 timeout_us)
++{
++	bool use_cycle_counter = false;
++	u32 timer_rate = 0;
++	u64 mod_cycles;
++
++#ifdef CONFIG_ARM_ARCH_TIMER
++	timer_rate = arch_timer_get_cntfrq();
++#endif
++
++	if (!timer_rate) {
++		use_cycle_counter = true;
++		timer_rate = clk_get_rate(ptdev->clks.core);
++	}
++
++	if (drm_WARN_ON(&ptdev->base, !timer_rate)) {
++		/* We couldn't get a valid clock rate, let's just pick the
++		 * maximum value so the FW still handles the core
++		 * power on/off requests.
++		 */
++		return GLB_TIMER_VAL(~0) |
++		       GLB_TIMER_SOURCE_GPU_COUNTER;
++	}
++
++	mod_cycles = DIV_ROUND_UP_ULL((u64)timeout_us * timer_rate,
++				      1000000ull << 10);
++	if (drm_WARN_ON(&ptdev->base, mod_cycles > GLB_TIMER_VAL(~0)))
++		mod_cycles = GLB_TIMER_VAL(~0);
++
++	return GLB_TIMER_VAL(mod_cycles) |
++	       (use_cycle_counter ? GLB_TIMER_SOURCE_GPU_COUNTER : 0);
++}
++
++static int panthor_fw_binary_iter_read(struct panthor_device *ptdev,
++				       struct panthor_fw_binary_iter *iter,
++				       void *out, size_t size)
++{
++	size_t new_offset = iter->offset + size;
++
++	if (new_offset > iter->size || new_offset < iter->offset) {
++		drm_err(&ptdev->base, "Firmware too small\n");
++		return -EINVAL;
++	}
++
++	memcpy(out, iter->data + iter->offset, size);
++	iter->offset = new_offset;
++	return 0;
++}
++
++static int panthor_fw_binary_sub_iter_init(struct panthor_device *ptdev,
++					   struct panthor_fw_binary_iter *iter,
++					   struct panthor_fw_binary_iter *sub_iter,
++					   size_t size)
++{
++	size_t new_offset = iter->offset + size;
++
++	if (new_offset > iter->size || new_offset < iter->offset) {
++		drm_err(&ptdev->base, "Firmware entry too long\n");
++		return -EINVAL;
++	}
++
++	sub_iter->offset = 0;
++	sub_iter->data = iter->data + iter->offset;
++	sub_iter->size = size;
++	iter->offset = new_offset;
++	return 0;
++}
++
++static void panthor_fw_init_section_mem(struct panthor_device *ptdev,
++					struct panthor_fw_section *section)
++{
++	bool was_mapped = !!section->mem->kmap;
++	int ret;
++
++	if (!section->data.size &&
++	    !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO))
++		return;
++
++	ret = panthor_kernel_bo_vmap(section->mem);
++	if (drm_WARN_ON(&ptdev->base, ret))
++		return;
++
++	memcpy(section->mem->kmap, section->data.buf, section->data.size);
++	if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO) {
++		memset(section->mem->kmap + section->data.size, 0,
++		       panthor_kernel_bo_size(section->mem) - section->data.size);
++	}
++
++	if (!was_mapped)
++		panthor_kernel_bo_vunmap(section->mem);
++}
++
++/**
++ * panthor_fw_alloc_queue_iface_mem() - Allocate a ring-buffer interfaces.
++ * @ptdev: Device.
++ * @input: Pointer holding the input interface on success.
++ * Should be ignored on failure.
++ * @output: Pointer holding the output interface on success.
++ * Should be ignored on failure.
++ * @input_fw_va: Pointer holding the input interface FW VA on success.
++ * Should be ignored on failure.
++ * @output_fw_va: Pointer holding the output interface FW VA on success.
++ * Should be ignored on failure.
++ *
++ * Allocates panthor_fw_ringbuf_{input,out}_iface interfaces. The input
++ * interface is at offset 0, and the output interface at offset 4096.
++ *
++ * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
++ */
++struct panthor_kernel_bo *
++panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev,
++				 struct panthor_fw_ringbuf_input_iface **input,
++				 const struct panthor_fw_ringbuf_output_iface **output,
++				 u32 *input_fw_va, u32 *output_fw_va)
++{
++	struct panthor_kernel_bo *mem;
++	int ret;
++
++	mem = panthor_kernel_bo_create(ptdev, ptdev->fw->vm, SZ_8K,
++				       DRM_PANTHOR_BO_NO_MMAP,
++				       DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
++				       DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
++				       PANTHOR_VM_KERNEL_AUTO_VA);
++	if (IS_ERR(mem))
++		return mem;
++
++	ret = panthor_kernel_bo_vmap(mem);
++	if (ret) {
++		panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), mem);
++		return ERR_PTR(ret);
++	}
++
++	memset(mem->kmap, 0, panthor_kernel_bo_size(mem));
++	*input = mem->kmap;
++	*output = mem->kmap + SZ_4K;
++	*input_fw_va = panthor_kernel_bo_gpuva(mem);
++	*output_fw_va = *input_fw_va + SZ_4K;
++
++	return mem;
++}
++
++/**
++ * panthor_fw_alloc_suspend_buf_mem() - Allocate a suspend buffer for a command stream group.
++ * @ptdev: Device.
++ * @size: Size of the suspend buffer.
++ *
++ * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
++ */
++struct panthor_kernel_bo *
++panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size)
++{
++	return panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), size,
++					DRM_PANTHOR_BO_NO_MMAP,
++					DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
++					PANTHOR_VM_KERNEL_AUTO_VA);
++}
++
++static int panthor_fw_load_section_entry(struct panthor_device *ptdev,
++					 const struct firmware *fw,
++					 struct panthor_fw_binary_iter *iter,
++					 u32 ehdr)
++{
++	struct panthor_fw_binary_section_entry_hdr hdr;
++	struct panthor_fw_section *section;
++	u32 section_size;
++	u32 name_len;
++	int ret;
++
++	ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr));
++	if (ret)
++		return ret;
++
++	if (hdr.data.end < hdr.data.start) {
++		drm_err(&ptdev->base, "Firmware corrupted, data.end < data.start (0x%x < 0x%x)\n",
++			hdr.data.end, hdr.data.start);
++		return -EINVAL;
++	}
++
++	if (hdr.va.end < hdr.va.start) {
++		drm_err(&ptdev->base, "Firmware corrupted, hdr.va.end < hdr.va.start (0x%x < 0x%x)\n",
++			hdr.va.end, hdr.va.start);
++		return -EINVAL;
++	}
++
++	if (hdr.data.end > fw->size) {
++		drm_err(&ptdev->base, "Firmware corrupted, file truncated? data_end=0x%x > fw size=0x%zx\n",
++			hdr.data.end, fw->size);
++		return -EINVAL;
++	}
++
++	if ((hdr.va.start & ~PAGE_MASK) != 0 ||
++	    (hdr.va.end & ~PAGE_MASK) != 0) {
++		drm_err(&ptdev->base, "Firmware corrupted, virtual addresses not page aligned: 0x%x-0x%x\n",
++			hdr.va.start, hdr.va.end);
++		return -EINVAL;
++	}
++
++	if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS) {
++		drm_err(&ptdev->base, "Firmware contains interface with unsupported flags (0x%x)\n",
++			hdr.flags);
++		return -EINVAL;
++	}
++
++	if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_PROT) {
++		drm_warn(&ptdev->base,
++			 "Firmware protected mode entry not be supported, ignoring");
++		return 0;
++	}
++
++	if (hdr.va.start == CSF_MCU_SHARED_REGION_START &&
++	    !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED)) {
++		drm_err(&ptdev->base,
++			"Interface at 0x%llx must be shared", CSF_MCU_SHARED_REGION_START);
++		return -EINVAL;
++	}
++
++	name_len = iter->size - iter->offset;
++
++	section = drmm_kzalloc(&ptdev->base, sizeof(*section), GFP_KERNEL);
++	if (!section)
++		return -ENOMEM;
++
++	list_add_tail(&section->node, &ptdev->fw->sections);
++	section->flags = hdr.flags;
++	section->data.size = hdr.data.end - hdr.data.start;
++
++	if (section->data.size > 0) {
++		void *data = drmm_kmalloc(&ptdev->base, section->data.size, GFP_KERNEL);
++
++		if (!data)
++			return -ENOMEM;
++
++		memcpy(data, fw->data + hdr.data.start, section->data.size);
++		section->data.buf = data;
++	}
++
++	if (name_len > 0) {
++		char *name = drmm_kmalloc(&ptdev->base, name_len + 1, GFP_KERNEL);
++
++		if (!name)
++			return -ENOMEM;
++
++		memcpy(name, iter->data + iter->offset, name_len);
++		name[name_len] = '\0';
++		section->name = name;
++	}
++
++	section_size = hdr.va.end - hdr.va.start;
++	if (section_size) {
++		u32 cache_mode = hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK;
++		struct panthor_gem_object *bo;
++		u32 vm_map_flags = 0;
++		struct sg_table *sgt;
++		u64 va = hdr.va.start;
++
++		if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR))
++			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_READONLY;
++
++		if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_EX))
++			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC;
++
++		/* TODO: CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_*_COHERENT are mapped to
++		 * non-cacheable for now. We might want to introduce a new
++		 * IOMMU_xxx flag (or abuse IOMMU_MMIO, which maps to device
++		 * memory and is currently not used by our driver) for
++		 * AS_MEMATTR_AARCH64_SHARED memory, so we can take benefit
++		 * of IO-coherent systems.
++		 */
++		if (cache_mode != CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED)
++			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED;
++
++		section->mem = panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev),
++							section_size,
++							DRM_PANTHOR_BO_NO_MMAP,
++							vm_map_flags, va);
++		if (IS_ERR(section->mem))
++			return PTR_ERR(section->mem);
++
++		if (drm_WARN_ON(&ptdev->base, section->mem->va_node.start != hdr.va.start))
++			return -EINVAL;
++
++		if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED) {
++			ret = panthor_kernel_bo_vmap(section->mem);
++			if (ret)
++				return ret;
++		}
++
++		panthor_fw_init_section_mem(ptdev, section);
++
++		bo = to_panthor_bo(section->mem->obj);
++		sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
++		if (IS_ERR(sgt))
++			return PTR_ERR(sgt);
++
++		dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE);
++	}
++
++	if (hdr.va.start == CSF_MCU_SHARED_REGION_START)
++		ptdev->fw->shared_section = section;
++
++	return 0;
++}
++
++static void
++panthor_reload_fw_sections(struct panthor_device *ptdev, bool full_reload)
++{
++	struct panthor_fw_section *section;
++
++	list_for_each_entry(section, &ptdev->fw->sections, node) {
++		struct sg_table *sgt;
++
++		if (!full_reload && !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR))
++			continue;
++
++		panthor_fw_init_section_mem(ptdev, section);
++		sgt = drm_gem_shmem_get_pages_sgt(&to_panthor_bo(section->mem->obj)->base);
++		if (!drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(sgt)))
++			dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE);
++	}
++}
++
++static int panthor_fw_load_entry(struct panthor_device *ptdev,
++				 const struct firmware *fw,
++				 struct panthor_fw_binary_iter *iter)
++{
++	struct panthor_fw_binary_iter eiter;
++	u32 ehdr;
++	int ret;
++
++	ret = panthor_fw_binary_iter_read(ptdev, iter, &ehdr, sizeof(ehdr));
++	if (ret)
++		return ret;
++
++	if ((iter->offset % sizeof(u32)) ||
++	    (CSF_FW_BINARY_ENTRY_SIZE(ehdr) % sizeof(u32))) {
++		drm_err(&ptdev->base, "Firmware entry isn't 32 bit aligned, offset=0x%x size=0x%x\n",
++			(u32)(iter->offset - sizeof(u32)), CSF_FW_BINARY_ENTRY_SIZE(ehdr));
++		return -EINVAL;
++	}
++
++	if (panthor_fw_binary_sub_iter_init(ptdev, iter, &eiter,
++					    CSF_FW_BINARY_ENTRY_SIZE(ehdr) - sizeof(ehdr)))
++		return -EINVAL;
++
++	switch (CSF_FW_BINARY_ENTRY_TYPE(ehdr)) {
++	case CSF_FW_BINARY_ENTRY_TYPE_IFACE:
++		return panthor_fw_load_section_entry(ptdev, fw, &eiter, ehdr);
++
++	/* FIXME: handle those entry types? */
++	case CSF_FW_BINARY_ENTRY_TYPE_CONFIG:
++	case CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST:
++	case CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER:
++	case CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA:
++		return 0;
++	default:
++		break;
++	}
++
++	if (ehdr & CSF_FW_BINARY_ENTRY_OPTIONAL)
++		return 0;
++
++	drm_err(&ptdev->base,
++		"Unsupported non-optional entry type %u in firmware\n",
++		CSF_FW_BINARY_ENTRY_TYPE(ehdr));
++	return -EINVAL;
++}
++
++static int panthor_fw_load(struct panthor_device *ptdev)
++{
++	const struct firmware *fw = NULL;
++	struct panthor_fw_binary_iter iter = {};
++	struct panthor_fw_binary_hdr hdr;
++	char fw_path[128];
++	int ret;
++
++	snprintf(fw_path, sizeof(fw_path), "arm/mali/arch%d.%d/%s",
++		 (u32)GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id),
++		 (u32)GPU_ARCH_MINOR(ptdev->gpu_info.gpu_id),
++		 CSF_FW_NAME);
++
++	ret = request_firmware(&fw, fw_path, ptdev->base.dev);
++	if (ret) {
++		drm_err(&ptdev->base, "Failed to load firmware image '%s'\n",
++			CSF_FW_NAME);
++		return ret;
++	}
++
++	iter.data = fw->data;
++	iter.size = fw->size;
++	ret = panthor_fw_binary_iter_read(ptdev, &iter, &hdr, sizeof(hdr));
++	if (ret)
++		goto out;
++
++	if (hdr.magic != CSF_FW_BINARY_HEADER_MAGIC) {
++		ret = -EINVAL;
++		drm_err(&ptdev->base, "Invalid firmware magic\n");
++		goto out;
++	}
++
++	if (hdr.major != CSF_FW_BINARY_HEADER_MAJOR_MAX) {
++		ret = -EINVAL;
++		drm_err(&ptdev->base, "Unsupported firmware binary header version %d.%d (expected %d.x)\n",
++			hdr.major, hdr.minor, CSF_FW_BINARY_HEADER_MAJOR_MAX);
++		goto out;
++	}
++
++	if (hdr.size > iter.size) {
++		drm_err(&ptdev->base, "Firmware image is truncated\n");
++		goto out;
++	}
++
++	iter.size = hdr.size;
++
++	while (iter.offset < hdr.size) {
++		ret = panthor_fw_load_entry(ptdev, fw, &iter);
++		if (ret)
++			goto out;
++	}
++
++	if (!ptdev->fw->shared_section) {
++		drm_err(&ptdev->base, "Shared interface region not found\n");
++		ret = -EINVAL;
++		goto out;
++	}
++
++out:
++	release_firmware(fw);
++	return ret;
++}
++
++/**
++ * iface_fw_to_cpu_addr() - Turn an MCU address into a CPU address
++ * @ptdev: Device.
++ * @mcu_va: MCU address.
++ *
++ * Return: NULL if the address is not part of the shared section, non-NULL otherwise.
++ */
++static void *iface_fw_to_cpu_addr(struct panthor_device *ptdev, u32 mcu_va)
++{
++	u64 shared_mem_start = panthor_kernel_bo_gpuva(ptdev->fw->shared_section->mem);
++	u64 shared_mem_end = shared_mem_start +
++			     panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
++	if (mcu_va < shared_mem_start || mcu_va >= shared_mem_end)
++		return NULL;
++
++	return ptdev->fw->shared_section->mem->kmap + (mcu_va - shared_mem_start);
++}
++
++static int panthor_init_cs_iface(struct panthor_device *ptdev,
++				 unsigned int csg_idx, unsigned int cs_idx)
++{
++	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
++	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_idx);
++	struct panthor_fw_cs_iface *cs_iface = &ptdev->fw->iface.streams[csg_idx][cs_idx];
++	u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
++	u32 iface_offset = CSF_GROUP_CONTROL_OFFSET +
++			   (csg_idx * glb_iface->control->group_stride) +
++			   CSF_STREAM_CONTROL_OFFSET +
++			   (cs_idx * csg_iface->control->stream_stride);
++	struct panthor_fw_cs_iface *first_cs_iface =
++		panthor_fw_get_cs_iface(ptdev, 0, 0);
++
++	if (iface_offset + sizeof(*cs_iface) >= shared_section_sz)
++		return -EINVAL;
++
++	spin_lock_init(&cs_iface->lock);
++	cs_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
++	cs_iface->input = iface_fw_to_cpu_addr(ptdev, cs_iface->control->input_va);
++	cs_iface->output = iface_fw_to_cpu_addr(ptdev, cs_iface->control->output_va);
++
++	if (!cs_iface->input || !cs_iface->output) {
++		drm_err(&ptdev->base, "Invalid stream control interface input/output VA");
++		return -EINVAL;
++	}
++
++	if (cs_iface != first_cs_iface) {
++		if (cs_iface->control->features != first_cs_iface->control->features) {
++			drm_err(&ptdev->base, "Expecting identical CS slots");
++			return -EINVAL;
++		}
++	} else {
++		u32 reg_count = CS_FEATURES_WORK_REGS(cs_iface->control->features);
++
++		ptdev->csif_info.cs_reg_count = reg_count;
++		ptdev->csif_info.unpreserved_cs_reg_count = CSF_UNPRESERVED_REG_COUNT;
++	}
++
++	return 0;
++}
++
++static bool compare_csg(const struct panthor_fw_csg_control_iface *a,
++			const struct panthor_fw_csg_control_iface *b)
++{
++	if (a->features != b->features)
++		return false;
++	if (a->suspend_size != b->suspend_size)
++		return false;
++	if (a->protm_suspend_size != b->protm_suspend_size)
++		return false;
++	if (a->stream_num != b->stream_num)
++		return false;
++	return true;
++}
++
++static int panthor_init_csg_iface(struct panthor_device *ptdev,
++				  unsigned int csg_idx)
++{
++	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
++	struct panthor_fw_csg_iface *csg_iface = &ptdev->fw->iface.groups[csg_idx];
++	u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
++	u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + (csg_idx * glb_iface->control->group_stride);
++	unsigned int i;
++
++	if (iface_offset + sizeof(*csg_iface) >= shared_section_sz)
++		return -EINVAL;
++
++	spin_lock_init(&csg_iface->lock);
++	csg_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
++	csg_iface->input = iface_fw_to_cpu_addr(ptdev, csg_iface->control->input_va);
++	csg_iface->output = iface_fw_to_cpu_addr(ptdev, csg_iface->control->output_va);
++
++	if (csg_iface->control->stream_num < MIN_CS_PER_CSG ||
++	    csg_iface->control->stream_num > MAX_CS_PER_CSG)
++		return -EINVAL;
++
++	if (!csg_iface->input || !csg_iface->output) {
++		drm_err(&ptdev->base, "Invalid group control interface input/output VA");
++		return -EINVAL;
++	}
++
++	if (csg_idx > 0) {
++		struct panthor_fw_csg_iface *first_csg_iface =
++			panthor_fw_get_csg_iface(ptdev, 0);
++
++		if (!compare_csg(first_csg_iface->control, csg_iface->control)) {
++			drm_err(&ptdev->base, "Expecting identical CSG slots");
++			return -EINVAL;
++		}
++	}
++
++	for (i = 0; i < csg_iface->control->stream_num; i++) {
++		int ret = panthor_init_cs_iface(ptdev, csg_idx, i);
++
++		if (ret)
++			return ret;
++	}
++
++	return 0;
++}
++
++static u32 panthor_get_instr_features(struct panthor_device *ptdev)
++{
++	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
++
++	if (glb_iface->control->version < CSF_IFACE_VERSION(1, 1, 0))
++		return 0;
++
++	return glb_iface->control->instr_features;
++}
++
++static int panthor_fw_init_ifaces(struct panthor_device *ptdev)
++{
++	struct panthor_fw_global_iface *glb_iface = &ptdev->fw->iface.global;
++	unsigned int i;
++
++	if (!ptdev->fw->shared_section->mem->kmap)
++		return -EINVAL;
++
++	spin_lock_init(&glb_iface->lock);
++	glb_iface->control = ptdev->fw->shared_section->mem->kmap;
++
++	if (!glb_iface->control->version) {
++		drm_err(&ptdev->base, "Firmware version is 0. Firmware may have failed to boot");
++		return -EINVAL;
++	}
++
++	glb_iface->input = iface_fw_to_cpu_addr(ptdev, glb_iface->control->input_va);
++	glb_iface->output = iface_fw_to_cpu_addr(ptdev, glb_iface->control->output_va);
++	if (!glb_iface->input || !glb_iface->output) {
++		drm_err(&ptdev->base, "Invalid global control interface input/output VA");
++		return -EINVAL;
++	}
++
++	if (glb_iface->control->group_num > MAX_CSGS ||
++	    glb_iface->control->group_num < MIN_CSGS) {
++		drm_err(&ptdev->base, "Invalid number of control groups");
++		return -EINVAL;
++	}
++
++	for (i = 0; i < glb_iface->control->group_num; i++) {
++		int ret = panthor_init_csg_iface(ptdev, i);
++
++		if (ret)
++			return ret;
++	}
++
++	drm_info(&ptdev->base, "CSF FW v%d.%d.%d, Features %#x Instrumentation features %#x",
++		 CSF_IFACE_VERSION_MAJOR(glb_iface->control->version),
++		 CSF_IFACE_VERSION_MINOR(glb_iface->control->version),
++		 CSF_IFACE_VERSION_PATCH(glb_iface->control->version),
++		 glb_iface->control->features,
++		 panthor_get_instr_features(ptdev));
++	return 0;
++}
++
++static void panthor_fw_init_global_iface(struct panthor_device *ptdev)
++{
++	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
++
++	/* Enable all cores. */
++	glb_iface->input->core_en_mask = ptdev->gpu_info.shader_present;
++
++	/* Setup timers. */
++	glb_iface->input->poweroff_timer = panthor_fw_conv_timeout(ptdev, PWROFF_HYSTERESIS_US);
++	glb_iface->input->progress_timer = PROGRESS_TIMEOUT_CYCLES >> PROGRESS_TIMEOUT_SCALE_SHIFT;
++	glb_iface->input->idle_timer = panthor_fw_conv_timeout(ptdev, IDLE_HYSTERESIS_US);
++
++	/* Enable interrupts we care about. */
++	glb_iface->input->ack_irq_mask = GLB_CFG_ALLOC_EN |
++					 GLB_PING |
++					 GLB_CFG_PROGRESS_TIMER |
++					 GLB_CFG_POWEROFF_TIMER |
++					 GLB_IDLE_EN |
++					 GLB_IDLE;
++
++	panthor_fw_update_reqs(glb_iface, req, GLB_IDLE_EN, GLB_IDLE_EN);
++	panthor_fw_toggle_reqs(glb_iface, req, ack,
++			       GLB_CFG_ALLOC_EN |
++			       GLB_CFG_POWEROFF_TIMER |
++			       GLB_CFG_PROGRESS_TIMER);
++
++	gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
++
++	/* Kick the watchdog. */
++	mod_delayed_work(ptdev->reset.wq, &ptdev->fw->watchdog.ping_work,
++			 msecs_to_jiffies(PING_INTERVAL_MS));
++}
++
++static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status)
++{
++	if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF))
++		ptdev->fw->booted = true;
++
++	wake_up_all(&ptdev->fw->req_waitqueue);
++
++	/* If the FW is not booted, don't process IRQs, just flag the FW as booted. */
++	if (!ptdev->fw->booted)
++		return;
++
++	panthor_sched_report_fw_events(ptdev, status);
++}
++PANTHOR_IRQ_HANDLER(job, JOB, panthor_job_irq_handler);
++
++static int panthor_fw_start(struct panthor_device *ptdev)
++{
++	bool timedout = false;
++
++	ptdev->fw->booted = false;
++	panthor_job_irq_resume(&ptdev->fw->irq, ~0);
++	gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_AUTO);
++
++	if (!wait_event_timeout(ptdev->fw->req_waitqueue,
++				ptdev->fw->booted,
++				msecs_to_jiffies(1000))) {
++		if (!ptdev->fw->booted &&
++		    !(gpu_read(ptdev, JOB_INT_STAT) & JOB_INT_GLOBAL_IF))
++			timedout = true;
++	}
++
++	if (timedout) {
++		static const char * const status_str[] = {
++			[MCU_STATUS_DISABLED] = "disabled",
++			[MCU_STATUS_ENABLED] = "enabled",
++			[MCU_STATUS_HALT] = "halt",
++			[MCU_STATUS_FATAL] = "fatal",
++		};
++		u32 status = gpu_read(ptdev, MCU_STATUS);
++
++		drm_err(&ptdev->base, "Failed to boot MCU (status=%s)",
++			status < ARRAY_SIZE(status_str) ? status_str[status] : "unknown");
++		return -ETIMEDOUT;
++	}
++
++	return 0;
++}
++
++static void panthor_fw_stop(struct panthor_device *ptdev)
++{
++	u32 status;
++
++	gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE);
++	if (readl_poll_timeout(ptdev->iomem + MCU_STATUS, status,
++			       status == MCU_STATUS_DISABLED, 10, 100000))
++		drm_err(&ptdev->base, "Failed to stop MCU");
++}
++
++/**
++ * panthor_fw_pre_reset() - Call before a reset.
++ * @ptdev: Device.
++ * @on_hang: true if the reset was triggered on a GPU hang.
++ *
++ * If the reset is not triggered on a hang, we try to gracefully halt the
++ * MCU, so we can do a fast-reset when panthor_fw_post_reset() is called.
++ */
++void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang)
++{
++	/* Make sure we won't be woken up by a ping. */
++	cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
++
++	ptdev->fw->fast_reset = false;
++
++	if (!on_hang) {
++		struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
++		u32 status;
++
++		panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT);
++		gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
++		if (!readl_poll_timeout(ptdev->iomem + MCU_STATUS, status,
++					status == MCU_STATUS_HALT, 10, 100000) &&
++		    glb_iface->output->halt_status == PANTHOR_FW_HALT_OK) {
++			ptdev->fw->fast_reset = true;
++		} else {
++			drm_warn(&ptdev->base, "Failed to cleanly suspend MCU");
++		}
++
++		/* The FW detects 0 -> 1 transitions. Make sure we reset
++		 * the HALT bit before the FW is rebooted.
++		 */
++		panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT);
++	}
++
++	panthor_job_irq_suspend(&ptdev->fw->irq);
++}
++
++/**
++ * panthor_fw_post_reset() - Call after a reset.
++ * @ptdev: Device.
++ *
++ * Start the FW. If this is not a fast reset, all FW sections are reloaded to
++ * make sure we can recover from a memory corruption.
++ */
++int panthor_fw_post_reset(struct panthor_device *ptdev)
++{
++	int ret;
++
++	/* Make the MCU VM active. */
++	ret = panthor_vm_active(ptdev->fw->vm);
++	if (ret)
++		return ret;
++
++	/* If this is a fast reset, try to start the MCU without reloading
++	 * the FW sections. If it fails, go for a full reset.
++	 */
++	if (ptdev->fw->fast_reset) {
++		ret = panthor_fw_start(ptdev);
++		if (!ret)
++			goto out;
++
++		/* Force a disable, so we get a fresh boot on the next
++		 * panthor_fw_start() call.
++		 */
++		gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE);
++		drm_err(&ptdev->base, "FW fast reset failed, trying a slow reset");
++	}
++
++	/* Reload all sections, including RO ones. We're not supposed
++	 * to end up here anyway, let's just assume the overhead of
++	 * reloading everything is acceptable.
++	 */
++	panthor_reload_fw_sections(ptdev, true);
++
++	ret = panthor_fw_start(ptdev);
++	if (ret) {
++		drm_err(&ptdev->base, "FW slow reset failed");
++		return ret;
++	}
++
++out:
++	/* We must re-initialize the global interface even on fast-reset. */
++	panthor_fw_init_global_iface(ptdev);
++	return 0;
++}
++
++/**
++ * panthor_fw_unplug() - Called when the device is unplugged.
++ * @ptdev: Device.
++ *
++ * This function must make sure all pending operations are flushed before
++ * will release device resources, thus preventing any interaction with
++ * the HW.
++ *
++ * If there is still FW-related work running after this function returns,
++ * they must use drm_dev_{enter,exit}() and skip any HW access when
++ * drm_dev_enter() returns false.
++ */
++void panthor_fw_unplug(struct panthor_device *ptdev)
++{
++	struct panthor_fw_section *section;
++
++	cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
++
++	/* Make sure the IRQ handler can be called after that point. */
++	if (ptdev->fw->irq.irq)
++		panthor_job_irq_suspend(&ptdev->fw->irq);
++
++	panthor_fw_stop(ptdev);
++
++	list_for_each_entry(section, &ptdev->fw->sections, node)
++		panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), section->mem);
++
++	/* We intentionally don't call panthor_vm_idle() and let
++	 * panthor_mmu_unplug() release the AS we acquired with
++	 * panthor_vm_active() so we don't have to track the VM active/idle
++	 * state to keep the active_refcnt balanced.
++	 */
++	panthor_vm_put(ptdev->fw->vm);
++
++	panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000);
++}
++
++/**
++ * panthor_fw_wait_acks() - Wait for requests to be acknowledged by the FW.
++ * @req_ptr: Pointer to the req register.
++ * @ack_ptr: Pointer to the ack register.
++ * @wq: Wait queue to use for the sleeping wait.
++ * @req_mask: Mask of requests to wait for.
++ * @acked: Pointer to field that's updated with the acked requests.
++ * If the function returns 0, *acked == req_mask.
++ * @timeout_ms: Timeout expressed in milliseconds.
++ *
++ * Return: 0 on success, -ETIMEDOUT otherwise.
++ */
++static int panthor_fw_wait_acks(const u32 *req_ptr, const u32 *ack_ptr,
++				wait_queue_head_t *wq,
++				u32 req_mask, u32 *acked,
++				u32 timeout_ms)
++{
++	u32 ack, req = READ_ONCE(*req_ptr) & req_mask;
++	int ret;
++
++	/* Busy wait for a few µsecs before falling back to a sleeping wait. */
++	*acked = req_mask;
++	ret = read_poll_timeout_atomic(READ_ONCE, ack,
++				       (ack & req_mask) == req,
++				       0, 10, 0,
++				       *ack_ptr);
++	if (!ret)
++		return 0;
++
++	if (wait_event_timeout(*wq, (READ_ONCE(*ack_ptr) & req_mask) == req,
++			       msecs_to_jiffies(timeout_ms)))
++		return 0;
++
++	/* Check one last time, in case we were not woken up for some reason. */
++	ack = READ_ONCE(*ack_ptr);
++	if ((ack & req_mask) == req)
++		return 0;
++
++	*acked = ~(req ^ ack) & req_mask;
++	return -ETIMEDOUT;
++}
++
++/**
++ * panthor_fw_glb_wait_acks() - Wait for global requests to be acknowledged.
++ * @ptdev: Device.
++ * @req_mask: Mask of requests to wait for.
++ * @acked: Pointer to field that's updated with the acked requests.
++ * If the function returns 0, *acked == req_mask.
++ * @timeout_ms: Timeout expressed in milliseconds.
++ *
++ * Return: 0 on success, -ETIMEDOUT otherwise.
++ */
++int panthor_fw_glb_wait_acks(struct panthor_device *ptdev,
++			     u32 req_mask, u32 *acked,
++			     u32 timeout_ms)
++{
++	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
++
++	/* GLB_HALT doesn't get acked through the FW interface. */
++	if (drm_WARN_ON(&ptdev->base, req_mask & (~GLB_REQ_MASK | GLB_HALT)))
++		return -EINVAL;
++
++	return panthor_fw_wait_acks(&glb_iface->input->req,
++				    &glb_iface->output->ack,
++				    &ptdev->fw->req_waitqueue,
++				    req_mask, acked, timeout_ms);
++}
++
++/**
++ * panthor_fw_csg_wait_acks() - Wait for command stream group requests to be acknowledged.
++ * @ptdev: Device.
++ * @csg_slot: CSG slot ID.
++ * @req_mask: Mask of requests to wait for.
++ * @acked: Pointer to field that's updated with the acked requests.
++ * If the function returns 0, *acked == req_mask.
++ * @timeout_ms: Timeout expressed in milliseconds.
++ *
++ * Return: 0 on success, -ETIMEDOUT otherwise.
++ */
++int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_slot,
++			     u32 req_mask, u32 *acked, u32 timeout_ms)
++{
++	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot);
++	int ret;
++
++	if (drm_WARN_ON(&ptdev->base, req_mask & ~CSG_REQ_MASK))
++		return -EINVAL;
++
++	ret = panthor_fw_wait_acks(&csg_iface->input->req,
++				   &csg_iface->output->ack,
++				   &ptdev->fw->req_waitqueue,
++				   req_mask, acked, timeout_ms);
++
++	/*
++	 * Check that all bits in the state field were updated, if any mismatch
++	 * then clear all bits in the state field. This allows code to do
++	 * (acked & CSG_STATE_MASK) and get the right value.
++	 */
++
++	if ((*acked & CSG_STATE_MASK) != CSG_STATE_MASK)
++		*acked &= ~CSG_STATE_MASK;
++
++	return ret;
++}
++
++/**
++ * panthor_fw_ring_csg_doorbells() - Ring command stream group doorbells.
++ * @ptdev: Device.
++ * @csg_mask: Bitmask encoding the command stream group doorbells to ring.
++ *
++ * This function is toggling bits in the doorbell_req and ringing the
++ * global doorbell. It doesn't require a user doorbell to be attached to
++ * the group.
++ */
++void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_mask)
++{
++	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
++
++	panthor_fw_toggle_reqs(glb_iface, doorbell_req, doorbell_ack, csg_mask);
++	gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
++}
++
++static void panthor_fw_ping_work(struct work_struct *work)
++{
++	struct panthor_fw *fw = container_of(work, struct panthor_fw, watchdog.ping_work.work);
++	struct panthor_device *ptdev = fw->irq.ptdev;
++	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
++	u32 acked;
++	int ret;
++
++	if (panthor_device_reset_is_pending(ptdev))
++		return;
++
++	panthor_fw_toggle_reqs(glb_iface, req, ack, GLB_PING);
++	gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
++
++	ret = panthor_fw_glb_wait_acks(ptdev, GLB_PING, &acked, 100);
++	if (ret) {
++		panthor_device_schedule_reset(ptdev);
++		drm_err(&ptdev->base, "FW ping timeout, scheduling a reset");
++	} else {
++		mod_delayed_work(ptdev->reset.wq, &fw->watchdog.ping_work,
++				 msecs_to_jiffies(PING_INTERVAL_MS));
++	}
++}
++
++/**
++ * panthor_fw_init() - Initialize FW related data.
++ * @ptdev: Device.
++ *
++ * Return: 0 on success, a negative error code otherwise.
++ */
++int panthor_fw_init(struct panthor_device *ptdev)
++{
++	struct panthor_fw *fw;
++	int ret, irq;
++
++	fw = drmm_kzalloc(&ptdev->base, sizeof(*fw), GFP_KERNEL);
++	if (!fw)
++		return -ENOMEM;
++
++	ptdev->fw = fw;
++	init_waitqueue_head(&fw->req_waitqueue);
++	INIT_LIST_HEAD(&fw->sections);
++	INIT_DELAYED_WORK(&fw->watchdog.ping_work, panthor_fw_ping_work);
++
++	irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "job");
++	if (irq <= 0)
++		return -ENODEV;
++
++	ret = panthor_request_job_irq(ptdev, &fw->irq, irq, 0);
++	if (ret) {
++		drm_err(&ptdev->base, "failed to request job irq");
++		return ret;
++	}
++
++	ret = panthor_gpu_l2_power_on(ptdev);
++	if (ret)
++		return ret;
++
++	fw->vm = panthor_vm_create(ptdev, true,
++				   0, SZ_4G,
++				   CSF_MCU_SHARED_REGION_START,
++				   CSF_MCU_SHARED_REGION_SIZE);
++	if (IS_ERR(fw->vm)) {
++		ret = PTR_ERR(fw->vm);
++		fw->vm = NULL;
++		goto err_unplug_fw;
++	}
++
++	ret = panthor_fw_load(ptdev);
++	if (ret)
++		goto err_unplug_fw;
++
++	ret = panthor_vm_active(fw->vm);
++	if (ret)
++		goto err_unplug_fw;
++
++	ret = panthor_fw_start(ptdev);
++	if (ret)
++		goto err_unplug_fw;
++
++	ret = panthor_fw_init_ifaces(ptdev);
++	if (ret)
++		goto err_unplug_fw;
++
++	panthor_fw_init_global_iface(ptdev);
++	return 0;
++
++err_unplug_fw:
++	panthor_fw_unplug(ptdev);
++	return ret;
++}
++
++MODULE_FIRMWARE("arm/mali/arch10.8/mali_csffw.bin");
+--- /dev/null
++++ b/drivers/gpu/drm/panthor/panthor_fw.h
+@@ -0,0 +1,503 @@
++/* SPDX-License-Identifier: GPL-2.0 or MIT */
++/* Copyright 2023 Collabora ltd. */
++
++#ifndef __PANTHOR_MCU_H__
++#define __PANTHOR_MCU_H__
++
++#include <linux/types.h>
++
++struct panthor_device;
++struct panthor_kernel_bo;
++
++#define MAX_CSGS				31
++#define MAX_CS_PER_CSG                          32
++
++struct panthor_fw_ringbuf_input_iface {
++	u64 insert;
++	u64 extract;
++};
++
++struct panthor_fw_ringbuf_output_iface {
++	u64 extract;
++	u32 active;
++};
++
++struct panthor_fw_cs_control_iface {
++#define CS_FEATURES_WORK_REGS(x)		(((x) & GENMASK(7, 0)) + 1)
++#define CS_FEATURES_SCOREBOARDS(x)		(((x) & GENMASK(15, 8)) >> 8)
++#define CS_FEATURES_COMPUTE			BIT(16)
++#define CS_FEATURES_FRAGMENT			BIT(17)
++#define CS_FEATURES_TILER			BIT(18)
++	u32 features;
++	u32 input_va;
++	u32 output_va;
++};
++
++struct panthor_fw_cs_input_iface {
++#define CS_STATE_MASK				GENMASK(2, 0)
++#define CS_STATE_STOP				0
++#define CS_STATE_START				1
++#define CS_EXTRACT_EVENT			BIT(4)
++#define CS_IDLE_SYNC_WAIT			BIT(8)
++#define CS_IDLE_PROTM_PENDING			BIT(9)
++#define CS_IDLE_EMPTY				BIT(10)
++#define CS_IDLE_RESOURCE_REQ			BIT(11)
++#define CS_TILER_OOM				BIT(26)
++#define CS_PROTM_PENDING			BIT(27)
++#define CS_FATAL				BIT(30)
++#define CS_FAULT				BIT(31)
++#define CS_REQ_MASK				(CS_STATE_MASK | \
++						 CS_EXTRACT_EVENT | \
++						 CS_IDLE_SYNC_WAIT | \
++						 CS_IDLE_PROTM_PENDING | \
++						 CS_IDLE_EMPTY | \
++						 CS_IDLE_RESOURCE_REQ)
++#define CS_EVT_MASK				(CS_TILER_OOM | \
++						 CS_PROTM_PENDING | \
++						 CS_FATAL | \
++						 CS_FAULT)
++	u32 req;
++
++#define CS_CONFIG_PRIORITY(x)			((x) & GENMASK(3, 0))
++#define CS_CONFIG_DOORBELL(x)			(((x) << 8) & GENMASK(15, 8))
++	u32 config;
++	u32 reserved1;
++	u32 ack_irq_mask;
++	u64 ringbuf_base;
++	u32 ringbuf_size;
++	u32 reserved2;
++	u64 heap_start;
++	u64 heap_end;
++	u64 ringbuf_input;
++	u64 ringbuf_output;
++	u32 instr_config;
++	u32 instrbuf_size;
++	u64 instrbuf_base;
++	u64 instrbuf_offset_ptr;
++};
++
++struct panthor_fw_cs_output_iface {
++	u32 ack;
++	u32 reserved1[15];
++	u64 status_cmd_ptr;
++
++#define CS_STATUS_WAIT_SB_MASK			GENMASK(15, 0)
++#define CS_STATUS_WAIT_SB_SRC_MASK		GENMASK(19, 16)
++#define CS_STATUS_WAIT_SB_SRC_NONE		(0 << 16)
++#define CS_STATUS_WAIT_SB_SRC_WAIT		(8 << 16)
++#define CS_STATUS_WAIT_SYNC_COND_LE		(0 << 24)
++#define CS_STATUS_WAIT_SYNC_COND_GT		(1 << 24)
++#define CS_STATUS_WAIT_SYNC_COND_MASK		GENMASK(27, 24)
++#define CS_STATUS_WAIT_PROGRESS			BIT(28)
++#define CS_STATUS_WAIT_PROTM			BIT(29)
++#define CS_STATUS_WAIT_SYNC_64B			BIT(30)
++#define CS_STATUS_WAIT_SYNC			BIT(31)
++	u32 status_wait;
++	u32 status_req_resource;
++	u64 status_wait_sync_ptr;
++	u32 status_wait_sync_value;
++	u32 status_scoreboards;
++
++#define CS_STATUS_BLOCKED_REASON_UNBLOCKED	0
++#define CS_STATUS_BLOCKED_REASON_SB_WAIT	1
++#define CS_STATUS_BLOCKED_REASON_PROGRESS_WAIT	2
++#define CS_STATUS_BLOCKED_REASON_SYNC_WAIT	3
++#define CS_STATUS_BLOCKED_REASON_DEFERRED	5
++#define CS_STATUS_BLOCKED_REASON_RES		6
++#define CS_STATUS_BLOCKED_REASON_FLUSH		7
++#define CS_STATUS_BLOCKED_REASON_MASK		GENMASK(3, 0)
++	u32 status_blocked_reason;
++	u32 status_wait_sync_value_hi;
++	u32 reserved2[6];
++
++#define CS_EXCEPTION_TYPE(x)			((x) & GENMASK(7, 0))
++#define CS_EXCEPTION_DATA(x)			(((x) >> 8) & GENMASK(23, 0))
++	u32 fault;
++	u32 fatal;
++	u64 fault_info;
++	u64 fatal_info;
++	u32 reserved3[10];
++	u32 heap_vt_start;
++	u32 heap_vt_end;
++	u32 reserved4;
++	u32 heap_frag_end;
++	u64 heap_address;
++};
++
++struct panthor_fw_csg_control_iface {
++	u32 features;
++	u32 input_va;
++	u32 output_va;
++	u32 suspend_size;
++	u32 protm_suspend_size;
++	u32 stream_num;
++	u32 stream_stride;
++};
++
++struct panthor_fw_csg_input_iface {
++#define CSG_STATE_MASK				GENMASK(2, 0)
++#define CSG_STATE_TERMINATE			0
++#define CSG_STATE_START				1
++#define CSG_STATE_SUSPEND			2
++#define CSG_STATE_RESUME			3
++#define CSG_ENDPOINT_CONFIG			BIT(4)
++#define CSG_STATUS_UPDATE			BIT(5)
++#define CSG_SYNC_UPDATE				BIT(28)
++#define CSG_IDLE				BIT(29)
++#define CSG_DOORBELL				BIT(30)
++#define CSG_PROGRESS_TIMER_EVENT		BIT(31)
++#define CSG_REQ_MASK				(CSG_STATE_MASK | \
++						 CSG_ENDPOINT_CONFIG | \
++						 CSG_STATUS_UPDATE)
++#define CSG_EVT_MASK				(CSG_SYNC_UPDATE | \
++						 CSG_IDLE | \
++						 CSG_PROGRESS_TIMER_EVENT)
++	u32 req;
++	u32 ack_irq_mask;
++
++	u32 doorbell_req;
++	u32 cs_irq_ack;
++	u32 reserved1[4];
++	u64 allow_compute;
++	u64 allow_fragment;
++	u32 allow_other;
++
++#define CSG_EP_REQ_COMPUTE(x)			((x) & GENMASK(7, 0))
++#define CSG_EP_REQ_FRAGMENT(x)			(((x) << 8) & GENMASK(15, 8))
++#define CSG_EP_REQ_TILER(x)			(((x) << 16) & GENMASK(19, 16))
++#define CSG_EP_REQ_EXCL_COMPUTE			BIT(20)
++#define CSG_EP_REQ_EXCL_FRAGMENT		BIT(21)
++#define CSG_EP_REQ_PRIORITY(x)			(((x) << 28) & GENMASK(31, 28))
++#define CSG_EP_REQ_PRIORITY_MASK		GENMASK(31, 28)
++	u32 endpoint_req;
++	u32 reserved2[2];
++	u64 suspend_buf;
++	u64 protm_suspend_buf;
++	u32 config;
++	u32 iter_trace_config;
++};
++
++struct panthor_fw_csg_output_iface {
++	u32 ack;
++	u32 reserved1;
++	u32 doorbell_ack;
++	u32 cs_irq_req;
++	u32 status_endpoint_current;
++	u32 status_endpoint_req;
++
++#define CSG_STATUS_STATE_IS_IDLE		BIT(0)
++	u32 status_state;
++	u32 resource_dep;
++};
++
++struct panthor_fw_global_control_iface {
++	u32 version;
++	u32 features;
++	u32 input_va;
++	u32 output_va;
++	u32 group_num;
++	u32 group_stride;
++	u32 perfcnt_size;
++	u32 instr_features;
++};
++
++struct panthor_fw_global_input_iface {
++#define GLB_HALT				BIT(0)
++#define GLB_CFG_PROGRESS_TIMER			BIT(1)
++#define GLB_CFG_ALLOC_EN			BIT(2)
++#define GLB_CFG_POWEROFF_TIMER			BIT(3)
++#define GLB_PROTM_ENTER				BIT(4)
++#define GLB_PERFCNT_EN				BIT(5)
++#define GLB_PERFCNT_SAMPLE			BIT(6)
++#define GLB_COUNTER_EN				BIT(7)
++#define GLB_PING				BIT(8)
++#define GLB_FWCFG_UPDATE			BIT(9)
++#define GLB_IDLE_EN				BIT(10)
++#define GLB_SLEEP				BIT(12)
++#define GLB_INACTIVE_COMPUTE			BIT(20)
++#define GLB_INACTIVE_FRAGMENT			BIT(21)
++#define GLB_INACTIVE_TILER			BIT(22)
++#define GLB_PROTM_EXIT				BIT(23)
++#define GLB_PERFCNT_THRESHOLD			BIT(24)
++#define GLB_PERFCNT_OVERFLOW			BIT(25)
++#define GLB_IDLE				BIT(26)
++#define GLB_DBG_CSF				BIT(30)
++#define GLB_DBG_HOST				BIT(31)
++#define GLB_REQ_MASK				GENMASK(10, 0)
++#define GLB_EVT_MASK				GENMASK(26, 20)
++	u32 req;
++	u32 ack_irq_mask;
++	u32 doorbell_req;
++	u32 reserved1;
++	u32 progress_timer;
++
++#define GLB_TIMER_VAL(x)			((x) & GENMASK(30, 0))
++#define GLB_TIMER_SOURCE_GPU_COUNTER		BIT(31)
++	u32 poweroff_timer;
++	u64 core_en_mask;
++	u32 reserved2;
++	u32 perfcnt_as;
++	u64 perfcnt_base;
++	u32 perfcnt_extract;
++	u32 reserved3[3];
++	u32 perfcnt_config;
++	u32 perfcnt_csg_select;
++	u32 perfcnt_fw_enable;
++	u32 perfcnt_csg_enable;
++	u32 perfcnt_csf_enable;
++	u32 perfcnt_shader_enable;
++	u32 perfcnt_tiler_enable;
++	u32 perfcnt_mmu_l2_enable;
++	u32 reserved4[8];
++	u32 idle_timer;
++};
++
++enum panthor_fw_halt_status {
++	PANTHOR_FW_HALT_OK = 0,
++	PANTHOR_FW_HALT_ON_PANIC = 0x4e,
++	PANTHOR_FW_HALT_ON_WATCHDOG_EXPIRATION = 0x4f,
++};
++
++struct panthor_fw_global_output_iface {
++	u32 ack;
++	u32 reserved1;
++	u32 doorbell_ack;
++	u32 reserved2;
++	u32 halt_status;
++	u32 perfcnt_status;
++	u32 perfcnt_insert;
++};
++
++/**
++ * struct panthor_fw_cs_iface - Firmware command stream slot interface
++ */
++struct panthor_fw_cs_iface {
++	/**
++	 * @lock: Lock protecting access to the panthor_fw_cs_input_iface::req
++	 * field.
++	 *
++	 * Needed so we can update the req field concurrently from the interrupt
++	 * handler and the scheduler logic.
++	 *
++	 * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW
++	 * interface sections are mapped uncached/write-combined right now, and
++	 * using cmpxchg() on such mappings leads to SError faults. Revisit when
++	 * we have 'SHARED' GPU mappings hooked up.
++	 */
++	spinlock_t lock;
++
++	/**
++	 * @control: Command stream slot control interface.
++	 *
++	 * Used to expose command stream slot properties.
++	 *
++	 * This interface is read-only.
++	 */
++	struct panthor_fw_cs_control_iface *control;
++
++	/**
++	 * @input: Command stream slot input interface.
++	 *
++	 * Used for host updates/events.
++	 */
++	struct panthor_fw_cs_input_iface *input;
++
++	/**
++	 * @output: Command stream slot output interface.
++	 *
++	 * Used for FW updates/events.
++	 *
++	 * This interface is read-only.
++	 */
++	const struct panthor_fw_cs_output_iface *output;
++};
++
++/**
++ * struct panthor_fw_csg_iface - Firmware command stream group slot interface
++ */
++struct panthor_fw_csg_iface {
++	/**
++	 * @lock: Lock protecting access to the panthor_fw_csg_input_iface::req
++	 * field.
++	 *
++	 * Needed so we can update the req field concurrently from the interrupt
++	 * handler and the scheduler logic.
++	 *
++	 * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW
++	 * interface sections are mapped uncached/write-combined right now, and
++	 * using cmpxchg() on such mappings leads to SError faults. Revisit when
++	 * we have 'SHARED' GPU mappings hooked up.
++	 */
++	spinlock_t lock;
++
++	/**
++	 * @control: Command stream group slot control interface.
++	 *
++	 * Used to expose command stream group slot properties.
++	 *
++	 * This interface is read-only.
++	 */
++	const struct panthor_fw_csg_control_iface *control;
++
++	/**
++	 * @input: Command stream slot input interface.
++	 *
++	 * Used for host updates/events.
++	 */
++	struct panthor_fw_csg_input_iface *input;
++
++	/**
++	 * @output: Command stream group slot output interface.
++	 *
++	 * Used for FW updates/events.
++	 *
++	 * This interface is read-only.
++	 */
++	const struct panthor_fw_csg_output_iface *output;
++};
++
++/**
++ * struct panthor_fw_global_iface - Firmware global interface
++ */
++struct panthor_fw_global_iface {
++	/**
++	 * @lock: Lock protecting access to the panthor_fw_global_input_iface::req
++	 * field.
++	 *
++	 * Needed so we can update the req field concurrently from the interrupt
++	 * handler and the scheduler/FW management logic.
++	 *
++	 * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW
++	 * interface sections are mapped uncached/write-combined right now, and
++	 * using cmpxchg() on such mappings leads to SError faults. Revisit when
++	 * we have 'SHARED' GPU mappings hooked up.
++	 */
++	spinlock_t lock;
++
++	/**
++	 * @control: Command stream group slot control interface.
++	 *
++	 * Used to expose global FW properties.
++	 *
++	 * This interface is read-only.
++	 */
++	const struct panthor_fw_global_control_iface *control;
++
++	/**
++	 * @input: Global input interface.
++	 *
++	 * Used for host updates/events.
++	 */
++	struct panthor_fw_global_input_iface *input;
++
++	/**
++	 * @output: Global output interface.
++	 *
++	 * Used for FW updates/events.
++	 *
++	 * This interface is read-only.
++	 */
++	const struct panthor_fw_global_output_iface *output;
++};
++
++/**
++ * panthor_fw_toggle_reqs() - Toggle acknowledge bits to send an event to the FW
++ * @__iface: The interface to operate on.
++ * @__in_reg: Name of the register to update in the input section of the interface.
++ * @__out_reg: Name of the register to take as a reference in the output section of the
++ * interface.
++ * @__mask: Mask to apply to the update.
++ *
++ * The Host -> FW event/message passing was designed to be lockless, with each side of
++ * the channel having its writeable section. Events are signaled as a difference between
++ * the host and FW side in the req/ack registers (when a bit differs, there's an event
++ * pending, when they are the same, nothing needs attention).
++ *
++ * This helper allows one to update the req register based on the current value of the
++ * ack register managed by the FW. Toggling a specific bit will flag an event. In order
++ * for events to be re-evaluated, the interface doorbell needs to be rung.
++ *
++ * Concurrent accesses to the same req register is covered.
++ *
++ * Anything requiring atomic updates to multiple registers requires a dedicated lock.
++ */
++#define panthor_fw_toggle_reqs(__iface, __in_reg, __out_reg, __mask) \
++	do { \
++		u32 __cur_val, __new_val, __out_val; \
++		spin_lock(&(__iface)->lock); \
++		__cur_val = READ_ONCE((__iface)->input->__in_reg); \
++		__out_val = READ_ONCE((__iface)->output->__out_reg); \
++		__new_val = ((__out_val ^ (__mask)) & (__mask)) | (__cur_val & ~(__mask)); \
++		WRITE_ONCE((__iface)->input->__in_reg, __new_val); \
++		spin_unlock(&(__iface)->lock); \
++	} while (0)
++
++/**
++ * panthor_fw_update_reqs() - Update bits to reflect a configuration change
++ * @__iface: The interface to operate on.
++ * @__in_reg: Name of the register to update in the input section of the interface.
++ * @__val: Value to set.
++ * @__mask: Mask to apply to the update.
++ *
++ * Some configuration get passed through req registers that are also used to
++ * send events to the FW. Those req registers being updated from the interrupt
++ * handler, they require special helpers to update the configuration part as well.
++ *
++ * Concurrent accesses to the same req register is covered.
++ *
++ * Anything requiring atomic updates to multiple registers requires a dedicated lock.
++ */
++#define panthor_fw_update_reqs(__iface, __in_reg, __val, __mask) \
++	do { \
++		u32 __cur_val, __new_val; \
++		spin_lock(&(__iface)->lock); \
++		__cur_val = READ_ONCE((__iface)->input->__in_reg); \
++		__new_val = (__cur_val & ~(__mask)) | ((__val) & (__mask)); \
++		WRITE_ONCE((__iface)->input->__in_reg, __new_val); \
++		spin_unlock(&(__iface)->lock); \
++	} while (0)
++
++struct panthor_fw_global_iface *
++panthor_fw_get_glb_iface(struct panthor_device *ptdev);
++
++struct panthor_fw_csg_iface *
++panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot);
++
++struct panthor_fw_cs_iface *
++panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot);
++
++int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_id, u32 req_mask,
++			     u32 *acked, u32 timeout_ms);
++
++int panthor_fw_glb_wait_acks(struct panthor_device *ptdev, u32 req_mask, u32 *acked,
++			     u32 timeout_ms);
++
++void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_slot);
++
++struct panthor_kernel_bo *
++panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev,
++				 struct panthor_fw_ringbuf_input_iface **input,
++				 const struct panthor_fw_ringbuf_output_iface **output,
++				 u32 *input_fw_va, u32 *output_fw_va);
++struct panthor_kernel_bo *
++panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size);
++
++struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev);
++
++void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang);
++int panthor_fw_post_reset(struct panthor_device *ptdev);
++
++static inline void panthor_fw_suspend(struct panthor_device *ptdev)
++{
++	panthor_fw_pre_reset(ptdev, false);
++}
++
++static inline int panthor_fw_resume(struct panthor_device *ptdev)
++{
++	return panthor_fw_post_reset(ptdev);
++}
++
++int panthor_fw_init(struct panthor_device *ptdev);
++void panthor_fw_unplug(struct panthor_device *ptdev);
++
++#endif
diff --git a/patches-6.6/034-38-v6.10-drm-panthor-Add-the-heap-logical-block.patch b/patches-6.6/034-38-v6.10-drm-panthor-Add-the-heap-logical-block.patch
new file mode 100644
index 0000000..a509424
--- /dev/null
+++ b/patches-6.6/034-38-v6.10-drm-panthor-Add-the-heap-logical-block.patch
@@ -0,0 +1,696 @@
+From 9cca48fa4f8933a2dadf2f011d461329ca0a8337 Mon Sep 17 00:00:00 2001
+From: Boris Brezillon <boris.brezillon@collabora.com>
+Date: Thu, 29 Feb 2024 17:22:23 +0100
+Subject: [PATCH] drm/panthor: Add the heap logical block
+
+Tiler heap growing requires some kernel driver involvement: when the
+tiler runs out of heap memory, it will raise an exception which is
+either directly handled by the firmware if some free heap chunks are
+available in the heap context, or passed back to the kernel otherwise.
+The heap helpers will be used by the scheduler logic to allocate more
+heap chunks to a heap context, when such a situation happens.
+
+Heap context creation is explicitly requested by userspace (using
+the TILER_HEAP_CREATE ioctl), and the returned context is attached to a
+queue through some command stream instruction.
+
+All the kernel does is keep the list of heap chunks allocated to a
+context, so they can be freed when TILER_HEAP_DESTROY is called, or
+extended when the FW requests a new chunk.
+
+v6:
+- Add Maxime's and Heiko's acks
+
+v5:
+- Fix FIXME comment
+- Add Steve's R-b
+
+v4:
+- Rework locking to allow concurrent calls to panthor_heap_grow()
+- Add a helper to return a heap chunk if we couldn't pass it to the
+  FW because the group was scheduled out
+
+v3:
+- Add a FIXME for the heap OOM deadlock
+- Use the panthor_kernel_bo abstraction for the heap context and heap
+  chunks
+- Drop the panthor_heap_gpu_ctx struct as it is opaque to the driver
+- Ensure that the heap context is aligned to the GPU cache line size
+- Minor code tidy ups
+
+Co-developed-by: Steven Price <steven.price@arm.com>
+Signed-off-by: Steven Price <steven.price@arm.com>
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Reviewed-by: Steven Price <steven.price@arm.com>
+Acked-by: Maxime Ripard <mripard@kernel.org>
+Acked-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-10-boris.brezillon@collabora.com
+---
+ drivers/gpu/drm/panthor/panthor_heap.c | 597 +++++++++++++++++++++++++
+ drivers/gpu/drm/panthor/panthor_heap.h |  39 ++
+ 2 files changed, 636 insertions(+)
+ create mode 100644 drivers/gpu/drm/panthor/panthor_heap.c
+ create mode 100644 drivers/gpu/drm/panthor/panthor_heap.h
+
+--- /dev/null
++++ b/drivers/gpu/drm/panthor/panthor_heap.c
+@@ -0,0 +1,597 @@
++// SPDX-License-Identifier: GPL-2.0 or MIT
++/* Copyright 2023 Collabora ltd. */
++
++#include <linux/iosys-map.h>
++#include <linux/rwsem.h>
++
++#include <drm/panthor_drm.h>
++
++#include "panthor_device.h"
++#include "panthor_gem.h"
++#include "panthor_heap.h"
++#include "panthor_mmu.h"
++#include "panthor_regs.h"
++
++/*
++ * The GPU heap context is an opaque structure used by the GPU to track the
++ * heap allocations. The driver should only touch it to initialize it (zero all
++ * fields). Because the CPU and GPU can both access this structure it is
++ * required to be GPU cache line aligned.
++ */
++#define HEAP_CONTEXT_SIZE	32
++
++/**
++ * struct panthor_heap_chunk_header - Heap chunk header
++ */
++struct panthor_heap_chunk_header {
++	/**
++	 * @next: Next heap chunk in the list.
++	 *
++	 * This is a GPU VA.
++	 */
++	u64 next;
++
++	/** @unknown: MBZ. */
++	u32 unknown[14];
++};
++
++/**
++ * struct panthor_heap_chunk - Structure used to keep track of allocated heap chunks.
++ */
++struct panthor_heap_chunk {
++	/** @node: Used to insert the heap chunk in panthor_heap::chunks. */
++	struct list_head node;
++
++	/** @bo: Buffer object backing the heap chunk. */
++	struct panthor_kernel_bo *bo;
++};
++
++/**
++ * struct panthor_heap - Structure used to manage tiler heap contexts.
++ */
++struct panthor_heap {
++	/** @chunks: List containing all heap chunks allocated so far. */
++	struct list_head chunks;
++
++	/** @lock: Lock protecting insertion in the chunks list. */
++	struct mutex lock;
++
++	/** @chunk_size: Size of each chunk. */
++	u32 chunk_size;
++
++	/** @max_chunks: Maximum number of chunks. */
++	u32 max_chunks;
++
++	/**
++	 * @target_in_flight: Number of in-flight render passes after which
++	 * we'd let the FW wait for fragment job to finish instead of allocating new chunks.
++	 */
++	u32 target_in_flight;
++
++	/** @chunk_count: Number of heap chunks currently allocated. */
++	u32 chunk_count;
++};
++
++#define MAX_HEAPS_PER_POOL    128
++
++/**
++ * struct panthor_heap_pool - Pool of heap contexts
++ *
++ * The pool is attached to a panthor_file and can't be shared across processes.
++ */
++struct panthor_heap_pool {
++	/** @refcount: Reference count. */
++	struct kref refcount;
++
++	/** @ptdev: Device. */
++	struct panthor_device *ptdev;
++
++	/** @vm: VM this pool is bound to. */
++	struct panthor_vm *vm;
++
++	/** @lock: Lock protecting access to @xa. */
++	struct rw_semaphore lock;
++
++	/** @xa: Array storing panthor_heap objects. */
++	struct xarray xa;
++
++	/** @gpu_contexts: Buffer object containing the GPU heap contexts. */
++	struct panthor_kernel_bo *gpu_contexts;
++};
++
++static int panthor_heap_ctx_stride(struct panthor_device *ptdev)
++{
++	u32 l2_features = ptdev->gpu_info.l2_features;
++	u32 gpu_cache_line_size = GPU_L2_FEATURES_LINE_SIZE(l2_features);
++
++	return ALIGN(HEAP_CONTEXT_SIZE, gpu_cache_line_size);
++}
++
++static int panthor_get_heap_ctx_offset(struct panthor_heap_pool *pool, int id)
++{
++	return panthor_heap_ctx_stride(pool->ptdev) * id;
++}
++
++static void *panthor_get_heap_ctx(struct panthor_heap_pool *pool, int id)
++{
++	return pool->gpu_contexts->kmap +
++	       panthor_get_heap_ctx_offset(pool, id);
++}
++
++static void panthor_free_heap_chunk(struct panthor_vm *vm,
++				    struct panthor_heap *heap,
++				    struct panthor_heap_chunk *chunk)
++{
++	mutex_lock(&heap->lock);
++	list_del(&chunk->node);
++	heap->chunk_count--;
++	mutex_unlock(&heap->lock);
++
++	panthor_kernel_bo_destroy(vm, chunk->bo);
++	kfree(chunk);
++}
++
++static int panthor_alloc_heap_chunk(struct panthor_device *ptdev,
++				    struct panthor_vm *vm,
++				    struct panthor_heap *heap,
++				    bool initial_chunk)
++{
++	struct panthor_heap_chunk *chunk;
++	struct panthor_heap_chunk_header *hdr;
++	int ret;
++
++	chunk = kmalloc(sizeof(*chunk), GFP_KERNEL);
++	if (!chunk)
++		return -ENOMEM;
++
++	chunk->bo = panthor_kernel_bo_create(ptdev, vm, heap->chunk_size,
++					     DRM_PANTHOR_BO_NO_MMAP,
++					     DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
++					     PANTHOR_VM_KERNEL_AUTO_VA);
++	if (IS_ERR(chunk->bo)) {
++		ret = PTR_ERR(chunk->bo);
++		goto err_free_chunk;
++	}
++
++	ret = panthor_kernel_bo_vmap(chunk->bo);
++	if (ret)
++		goto err_destroy_bo;
++
++	hdr = chunk->bo->kmap;
++	memset(hdr, 0, sizeof(*hdr));
++
++	if (initial_chunk && !list_empty(&heap->chunks)) {
++		struct panthor_heap_chunk *prev_chunk;
++		u64 prev_gpuva;
++
++		prev_chunk = list_first_entry(&heap->chunks,
++					      struct panthor_heap_chunk,
++					      node);
++
++		prev_gpuva = panthor_kernel_bo_gpuva(prev_chunk->bo);
++		hdr->next = (prev_gpuva & GENMASK_ULL(63, 12)) |
++			    (heap->chunk_size >> 12);
++	}
++
++	panthor_kernel_bo_vunmap(chunk->bo);
++
++	mutex_lock(&heap->lock);
++	list_add(&chunk->node, &heap->chunks);
++	heap->chunk_count++;
++	mutex_unlock(&heap->lock);
++
++	return 0;
++
++err_destroy_bo:
++	panthor_kernel_bo_destroy(vm, chunk->bo);
++
++err_free_chunk:
++	kfree(chunk);
++
++	return ret;
++}
++
++static void panthor_free_heap_chunks(struct panthor_vm *vm,
++				     struct panthor_heap *heap)
++{
++	struct panthor_heap_chunk *chunk, *tmp;
++
++	list_for_each_entry_safe(chunk, tmp, &heap->chunks, node)
++		panthor_free_heap_chunk(vm, heap, chunk);
++}
++
++static int panthor_alloc_heap_chunks(struct panthor_device *ptdev,
++				     struct panthor_vm *vm,
++				     struct panthor_heap *heap,
++				     u32 chunk_count)
++{
++	int ret;
++	u32 i;
++
++	for (i = 0; i < chunk_count; i++) {
++		ret = panthor_alloc_heap_chunk(ptdev, vm, heap, true);
++		if (ret)
++			return ret;
++	}
++
++	return 0;
++}
++
++static int
++panthor_heap_destroy_locked(struct panthor_heap_pool *pool, u32 handle)
++{
++	struct panthor_heap *heap;
++
++	heap = xa_erase(&pool->xa, handle);
++	if (!heap)
++		return -EINVAL;
++
++	panthor_free_heap_chunks(pool->vm, heap);
++	mutex_destroy(&heap->lock);
++	kfree(heap);
++	return 0;
++}
++
++/**
++ * panthor_heap_destroy() - Destroy a heap context
++ * @pool: Pool this context belongs to.
++ * @handle: Handle returned by panthor_heap_create().
++ */
++int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle)
++{
++	int ret;
++
++	down_write(&pool->lock);
++	ret = panthor_heap_destroy_locked(pool, handle);
++	up_write(&pool->lock);
++
++	return ret;
++}
++
++/**
++ * panthor_heap_create() - Create a heap context
++ * @pool: Pool to instantiate the heap context from.
++ * @initial_chunk_count: Number of chunk allocated at initialization time.
++ * Must be at least 1.
++ * @chunk_size: The size of each chunk. Must be a power of two between 256k
++ * and 2M.
++ * @max_chunks: Maximum number of chunks that can be allocated.
++ * @target_in_flight: Maximum number of in-flight render passes.
++ * @heap_ctx_gpu_va: Pointer holding the GPU address of the allocated heap
++ * context.
++ * @first_chunk_gpu_va: Pointer holding the GPU address of the first chunk
++ * assigned to the heap context.
++ *
++ * Return: a positive handle on success, a negative error otherwise.
++ */
++int panthor_heap_create(struct panthor_heap_pool *pool,
++			u32 initial_chunk_count,
++			u32 chunk_size,
++			u32 max_chunks,
++			u32 target_in_flight,
++			u64 *heap_ctx_gpu_va,
++			u64 *first_chunk_gpu_va)
++{
++	struct panthor_heap *heap;
++	struct panthor_heap_chunk *first_chunk;
++	struct panthor_vm *vm;
++	int ret = 0;
++	u32 id;
++
++	if (initial_chunk_count == 0)
++		return -EINVAL;
++
++	if (hweight32(chunk_size) != 1 ||
++	    chunk_size < SZ_256K || chunk_size > SZ_2M)
++		return -EINVAL;
++
++	down_read(&pool->lock);
++	vm = panthor_vm_get(pool->vm);
++	up_read(&pool->lock);
++
++	/* The pool has been destroyed, we can't create a new heap. */
++	if (!vm)
++		return -EINVAL;
++
++	heap = kzalloc(sizeof(*heap), GFP_KERNEL);
++	if (!heap) {
++		ret = -ENOMEM;
++		goto err_put_vm;
++	}
++
++	mutex_init(&heap->lock);
++	INIT_LIST_HEAD(&heap->chunks);
++	heap->chunk_size = chunk_size;
++	heap->max_chunks = max_chunks;
++	heap->target_in_flight = target_in_flight;
++
++	ret = panthor_alloc_heap_chunks(pool->ptdev, vm, heap,
++					initial_chunk_count);
++	if (ret)
++		goto err_free_heap;
++
++	first_chunk = list_first_entry(&heap->chunks,
++				       struct panthor_heap_chunk,
++				       node);
++	*first_chunk_gpu_va = panthor_kernel_bo_gpuva(first_chunk->bo);
++
++	down_write(&pool->lock);
++	/* The pool has been destroyed, we can't create a new heap. */
++	if (!pool->vm) {
++		ret = -EINVAL;
++	} else {
++		ret = xa_alloc(&pool->xa, &id, heap, XA_LIMIT(1, MAX_HEAPS_PER_POOL), GFP_KERNEL);
++		if (!ret) {
++			void *gpu_ctx = panthor_get_heap_ctx(pool, id);
++
++			memset(gpu_ctx, 0, panthor_heap_ctx_stride(pool->ptdev));
++			*heap_ctx_gpu_va = panthor_kernel_bo_gpuva(pool->gpu_contexts) +
++					   panthor_get_heap_ctx_offset(pool, id);
++		}
++	}
++	up_write(&pool->lock);
++
++	if (ret)
++		goto err_free_heap;
++
++	panthor_vm_put(vm);
++	return id;
++
++err_free_heap:
++	panthor_free_heap_chunks(pool->vm, heap);
++	mutex_destroy(&heap->lock);
++	kfree(heap);
++
++err_put_vm:
++	panthor_vm_put(vm);
++	return ret;
++}
++
++/**
++ * panthor_heap_return_chunk() - Return an unused heap chunk
++ * @pool: The pool this heap belongs to.
++ * @heap_gpu_va: The GPU address of the heap context.
++ * @chunk_gpu_va: The chunk VA to return.
++ *
++ * This function is used when a chunk allocated with panthor_heap_grow()
++ * couldn't be linked to the heap context through the FW interface because
++ * the group requesting the allocation was scheduled out in the meantime.
++ */
++int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
++			      u64 heap_gpu_va,
++			      u64 chunk_gpu_va)
++{
++	u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
++	u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
++	struct panthor_heap_chunk *chunk, *tmp, *removed = NULL;
++	struct panthor_heap *heap;
++	int ret;
++
++	if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL)
++		return -EINVAL;
++
++	down_read(&pool->lock);
++	heap = xa_load(&pool->xa, heap_id);
++	if (!heap) {
++		ret = -EINVAL;
++		goto out_unlock;
++	}
++
++	chunk_gpu_va &= GENMASK_ULL(63, 12);
++
++	mutex_lock(&heap->lock);
++	list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) {
++		if (panthor_kernel_bo_gpuva(chunk->bo) == chunk_gpu_va) {
++			removed = chunk;
++			list_del(&chunk->node);
++			heap->chunk_count--;
++			break;
++		}
++	}
++	mutex_unlock(&heap->lock);
++
++	if (removed) {
++		panthor_kernel_bo_destroy(pool->vm, chunk->bo);
++		kfree(chunk);
++		ret = 0;
++	} else {
++		ret = -EINVAL;
++	}
++
++out_unlock:
++	up_read(&pool->lock);
++	return ret;
++}
++
++/**
++ * panthor_heap_grow() - Make a heap context grow.
++ * @pool: The pool this heap belongs to.
++ * @heap_gpu_va: The GPU address of the heap context.
++ * @renderpasses_in_flight: Number of render passes currently in-flight.
++ * @pending_frag_count: Number of fragment jobs waiting for execution/completion.
++ * @new_chunk_gpu_va: Pointer used to return the chunk VA.
++ */
++int panthor_heap_grow(struct panthor_heap_pool *pool,
++		      u64 heap_gpu_va,
++		      u32 renderpasses_in_flight,
++		      u32 pending_frag_count,
++		      u64 *new_chunk_gpu_va)
++{
++	u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
++	u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
++	struct panthor_heap_chunk *chunk;
++	struct panthor_heap *heap;
++	int ret;
++
++	if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL)
++		return -EINVAL;
++
++	down_read(&pool->lock);
++	heap = xa_load(&pool->xa, heap_id);
++	if (!heap) {
++		ret = -EINVAL;
++		goto out_unlock;
++	}
++
++	/* If we reached the target in-flight render passes, or if we
++	 * reached the maximum number of chunks, let the FW figure another way to
++	 * find some memory (wait for render passes to finish, or call the exception
++	 * handler provided by the userspace driver, if any).
++	 */
++	if (renderpasses_in_flight > heap->target_in_flight ||
++	    (pending_frag_count > 0 && heap->chunk_count >= heap->max_chunks)) {
++		ret = -EBUSY;
++		goto out_unlock;
++	} else if (heap->chunk_count >= heap->max_chunks) {
++		ret = -ENOMEM;
++		goto out_unlock;
++	}
++
++	/* FIXME: panthor_alloc_heap_chunk() triggers a kernel BO creation,
++	 * which goes through the blocking allocation path. Ultimately, we
++	 * want a non-blocking allocation, so we can immediately report to the
++	 * FW when the system is running out of memory. In that case, the FW
++	 * can call a user-provided exception handler, which might try to free
++	 * some tiler memory by issuing an intermediate fragment job. If the
++	 * exception handler can't do anything, it will flag the queue as
++	 * faulty so the job that triggered this tiler chunk allocation and all
++	 * further jobs in this queue fail immediately instead of having to
++	 * wait for the job timeout.
++	 */
++	ret = panthor_alloc_heap_chunk(pool->ptdev, pool->vm, heap, false);
++	if (ret)
++		goto out_unlock;
++
++	chunk = list_first_entry(&heap->chunks,
++				 struct panthor_heap_chunk,
++				 node);
++	*new_chunk_gpu_va = (panthor_kernel_bo_gpuva(chunk->bo) & GENMASK_ULL(63, 12)) |
++			    (heap->chunk_size >> 12);
++	ret = 0;
++
++out_unlock:
++	up_read(&pool->lock);
++	return ret;
++}
++
++static void panthor_heap_pool_release(struct kref *refcount)
++{
++	struct panthor_heap_pool *pool =
++		container_of(refcount, struct panthor_heap_pool, refcount);
++
++	xa_destroy(&pool->xa);
++	kfree(pool);
++}
++
++/**
++ * panthor_heap_pool_put() - Release a heap pool reference
++ * @pool: Pool to release the reference on. Can be NULL.
++ */
++void panthor_heap_pool_put(struct panthor_heap_pool *pool)
++{
++	if (pool)
++		kref_put(&pool->refcount, panthor_heap_pool_release);
++}
++
++/**
++ * panthor_heap_pool_get() - Get a heap pool reference
++ * @pool: Pool to get the reference on. Can be NULL.
++ *
++ * Return: @pool.
++ */
++struct panthor_heap_pool *
++panthor_heap_pool_get(struct panthor_heap_pool *pool)
++{
++	if (pool)
++		kref_get(&pool->refcount);
++
++	return pool;
++}
++
++/**
++ * panthor_heap_pool_create() - Create a heap pool
++ * @ptdev: Device.
++ * @vm: The VM this heap pool will be attached to.
++ *
++ * Heap pools might contain up to 128 heap contexts, and are per-VM.
++ *
++ * Return: A valid pointer on success, a negative error code otherwise.
++ */
++struct panthor_heap_pool *
++panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm)
++{
++	size_t bosize = ALIGN(MAX_HEAPS_PER_POOL *
++			      panthor_heap_ctx_stride(ptdev),
++			      4096);
++	struct panthor_heap_pool *pool;
++	int ret = 0;
++
++	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
++	if (!pool)
++		return ERR_PTR(-ENOMEM);
++
++	/* We want a weak ref here: the heap pool belongs to the VM, so we're
++	 * sure that, as long as the heap pool exists, the VM exists too.
++	 */
++	pool->vm = vm;
++	pool->ptdev = ptdev;
++	init_rwsem(&pool->lock);
++	xa_init_flags(&pool->xa, XA_FLAGS_ALLOC1);
++	kref_init(&pool->refcount);
++
++	pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, bosize,
++						      DRM_PANTHOR_BO_NO_MMAP,
++						      DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
++						      PANTHOR_VM_KERNEL_AUTO_VA);
++	if (IS_ERR(pool->gpu_contexts)) {
++		ret = PTR_ERR(pool->gpu_contexts);
++		goto err_destroy_pool;
++	}
++
++	ret = panthor_kernel_bo_vmap(pool->gpu_contexts);
++	if (ret)
++		goto err_destroy_pool;
++
++	return pool;
++
++err_destroy_pool:
++	panthor_heap_pool_destroy(pool);
++	return ERR_PTR(ret);
++}
++
++/**
++ * panthor_heap_pool_destroy() - Destroy a heap pool.
++ * @pool: Pool to destroy.
++ *
++ * This function destroys all heap contexts and their resources. Thus
++ * preventing any use of the heap context or the chunk attached to them
++ * after that point.
++ *
++ * If the GPU still has access to some heap contexts, a fault should be
++ * triggered, which should flag the command stream groups using these
++ * context as faulty.
++ *
++ * The heap pool object is only released when all references to this pool
++ * are released.
++ */
++void panthor_heap_pool_destroy(struct panthor_heap_pool *pool)
++{
++	struct panthor_heap *heap;
++	unsigned long i;
++
++	if (!pool)
++		return;
++
++	down_write(&pool->lock);
++	xa_for_each(&pool->xa, i, heap)
++		drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i));
++
++	if (!IS_ERR_OR_NULL(pool->gpu_contexts))
++		panthor_kernel_bo_destroy(pool->vm, pool->gpu_contexts);
++
++	/* Reflects the fact the pool has been destroyed. */
++	pool->vm = NULL;
++	up_write(&pool->lock);
++
++	panthor_heap_pool_put(pool);
++}
+--- /dev/null
++++ b/drivers/gpu/drm/panthor/panthor_heap.h
+@@ -0,0 +1,39 @@
++/* SPDX-License-Identifier: GPL-2.0 or MIT */
++/* Copyright 2023 Collabora ltd. */
++
++#ifndef __PANTHOR_HEAP_H__
++#define __PANTHOR_HEAP_H__
++
++#include <linux/types.h>
++
++struct panthor_device;
++struct panthor_heap_pool;
++struct panthor_vm;
++
++int panthor_heap_create(struct panthor_heap_pool *pool,
++			u32 initial_chunk_count,
++			u32 chunk_size,
++			u32 max_chunks,
++			u32 target_in_flight,
++			u64 *heap_ctx_gpu_va,
++			u64 *first_chunk_gpu_va);
++int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle);
++
++struct panthor_heap_pool *
++panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm);
++void panthor_heap_pool_destroy(struct panthor_heap_pool *pool);
++
++struct panthor_heap_pool *
++panthor_heap_pool_get(struct panthor_heap_pool *pool);
++void panthor_heap_pool_put(struct panthor_heap_pool *pool);
++
++int panthor_heap_grow(struct panthor_heap_pool *pool,
++		      u64 heap_gpu_va,
++		      u32 renderpasses_in_flight,
++		      u32 pending_frag_count,
++		      u64 *new_chunk_gpu_va);
++int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
++			      u64 heap_gpu_va,
++			      u64 chunk_gpu_va);
++
++#endif
diff --git a/patches-6.6/034-39-v6.10-drm-panthor-Add-the-scheduler-logical-block.patch b/patches-6.6/034-39-v6.10-drm-panthor-Add-the-scheduler-logical-block.patch
new file mode 100644
index 0000000..a32107c
--- /dev/null
+++ b/patches-6.6/034-39-v6.10-drm-panthor-Add-the-scheduler-logical-block.patch
@@ -0,0 +1,3647 @@
+From de85488138247d034eb3241840424a54d660926b Mon Sep 17 00:00:00 2001
+From: Boris Brezillon <boris.brezillon@collabora.com>
+Date: Thu, 29 Feb 2024 17:22:24 +0100
+Subject: [PATCH] drm/panthor: Add the scheduler logical block
+
+This is the piece of software interacting with the FW scheduler, and
+taking care of some scheduling aspects when the FW comes short of slots
+scheduling slots. Indeed, the FW only expose a few slots, and the kernel
+has to give all submission contexts, a chance to execute their jobs.
+
+The kernel-side scheduler is timeslice-based, with a round-robin queue
+per priority level.
+
+Job submission is handled with a 1:1 drm_sched_entity:drm_gpu_scheduler,
+allowing us to delegate the dependency tracking to the core.
+
+All the gory details should be documented inline.
+
+v6:
+- Add Maxime's and Heiko's acks
+- Make sure the scheduler is initialized before queueing the tick work
+  in the MMU fault handler
+- Keep header inclusion alphabetically ordered
+
+v5:
+- Fix typos
+- Call panthor_kernel_bo_destroy(group->syncobjs) unconditionally
+- Don't move the group to the waiting list tail when it was already
+  waiting for a different syncobj
+- Fix fatal_queues flagging in the tiler OOM path
+- Don't warn when more than one job timesout on a group
+- Add a warning message when we fail to allocate a heap chunk
+- Add Steve's R-b
+
+v4:
+- Check drmm_mutex_init() return code
+- s/drm_gem_vmap_unlocked/drm_gem_vunmap_unlocked/ in
+  panthor_queue_put_syncwait_obj()
+- Drop unneeded WARN_ON() in cs_slot_sync_queue_state_locked()
+- Use atomic_xchg() instead of atomic_fetch_and(0)
+- Fix typos
+- Let panthor_kernel_bo_destroy() check for IS_ERR_OR_NULL() BOs
+- Defer TILER_OOM event handling to a separate workqueue to prevent
+  deadlocks when the heap chunk allocation is blocked on mem-reclaim.
+  This is just a temporary solution, until we add support for
+  non-blocking/failable allocations
+- Pass the scheduler workqueue to drm_sched instead of instantiating
+  a separate one (no longer needed now that heap chunk allocation
+  happens on a dedicated wq)
+- Set WQ_MEM_RECLAIM on the scheduler workqueue, so we can handle
+  job timeouts when the system is under mem pressure, and hopefully
+  free up some memory retained by these jobs
+
+v3:
+- Rework the FW event handling logic to avoid races
+- Make sure MMU faults kill the group immediately
+- Use the panthor_kernel_bo abstraction for group/queue buffers
+- Make in_progress an atomic_t, so we can check it without the reset lock
+  held
+- Don't limit the number of groups per context to the FW scheduler
+  capacity. Fix the limit to 128 for now.
+- Add a panthor_job_vm() helper
+- Account for panthor_vm changes
+- Add our job fence as DMA_RESV_USAGE_WRITE to all external objects
+  (was previously DMA_RESV_USAGE_BOOKKEEP). I don't get why, given
+  we're supposed to be fully-explicit, but other drivers do that, so
+  there must be a good reason
+- Account for drm_sched changes
+- Provide a panthor_queue_put_syncwait_obj()
+- Unconditionally return groups to their idle list in
+  panthor_sched_suspend()
+- Condition of sched_queue_{,delayed_}work fixed to be only when a reset
+  isn't pending or in progress.
+- Several typos in comments fixed.
+
+Co-developed-by: Steven Price <steven.price@arm.com>
+Signed-off-by: Steven Price <steven.price@arm.com>
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Reviewed-by: Steven Price <steven.price@arm.com>
+Acked-by: Maxime Ripard <mripard@kernel.org>
+Acked-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-11-boris.brezillon@collabora.com
+---
+ drivers/gpu/drm/panthor/panthor_sched.c | 3502 +++++++++++++++++++++++
+ drivers/gpu/drm/panthor/panthor_sched.h |   50 +
+ 2 files changed, 3552 insertions(+)
+ create mode 100644 drivers/gpu/drm/panthor/panthor_sched.c
+ create mode 100644 drivers/gpu/drm/panthor/panthor_sched.h
+
+--- /dev/null
++++ b/drivers/gpu/drm/panthor/panthor_sched.c
+@@ -0,0 +1,3502 @@
++// SPDX-License-Identifier: GPL-2.0 or MIT
++/* Copyright 2023 Collabora ltd. */
++
++#include <drm/drm_drv.h>
++#include <drm/drm_exec.h>
++#include <drm/drm_gem_shmem_helper.h>
++#include <drm/drm_managed.h>
++#include <drm/gpu_scheduler.h>
++#include <drm/panthor_drm.h>
++
++#include <linux/build_bug.h>
++#include <linux/clk.h>
++#include <linux/delay.h>
++#include <linux/dma-mapping.h>
++#include <linux/dma-resv.h>
++#include <linux/firmware.h>
++#include <linux/interrupt.h>
++#include <linux/io.h>
++#include <linux/iopoll.h>
++#include <linux/iosys-map.h>
++#include <linux/module.h>
++#include <linux/platform_device.h>
++#include <linux/pm_runtime.h>
++
++#include "panthor_devfreq.h"
++#include "panthor_device.h"
++#include "panthor_fw.h"
++#include "panthor_gem.h"
++#include "panthor_gpu.h"
++#include "panthor_heap.h"
++#include "panthor_mmu.h"
++#include "panthor_regs.h"
++#include "panthor_sched.h"
++
++/**
++ * DOC: Scheduler
++ *
++ * Mali CSF hardware adopts a firmware-assisted scheduling model, where
++ * the firmware takes care of scheduling aspects, to some extent.
++ *
++ * The scheduling happens at the scheduling group level, each group
++ * contains 1 to N queues (N is FW/hardware dependent, and exposed
++ * through the firmware interface). Each queue is assigned a command
++ * stream ring buffer, which serves as a way to get jobs submitted to
++ * the GPU, among other things.
++ *
++ * The firmware can schedule a maximum of M groups (M is FW/hardware
++ * dependent, and exposed through the firmware interface). Passed
++ * this maximum number of groups, the kernel must take care of
++ * rotating the groups passed to the firmware so every group gets
++ * a chance to have his queues scheduled for execution.
++ *
++ * The current implementation only supports with kernel-mode queues.
++ * In other terms, userspace doesn't have access to the ring-buffer.
++ * Instead, userspace passes indirect command stream buffers that are
++ * called from the queue ring-buffer by the kernel using a pre-defined
++ * sequence of command stream instructions to ensure the userspace driver
++ * always gets consistent results (cache maintenance,
++ * synchronization, ...).
++ *
++ * We rely on the drm_gpu_scheduler framework to deal with job
++ * dependencies and submission. As any other driver dealing with a
++ * FW-scheduler, we use the 1:1 entity:scheduler mode, such that each
++ * entity has its own job scheduler. When a job is ready to be executed
++ * (all its dependencies are met), it is pushed to the appropriate
++ * queue ring-buffer, and the group is scheduled for execution if it
++ * wasn't already active.
++ *
++ * Kernel-side group scheduling is timeslice-based. When we have less
++ * groups than there are slots, the periodic tick is disabled and we
++ * just let the FW schedule the active groups. When there are more
++ * groups than slots, we let each group a chance to execute stuff for
++ * a given amount of time, and then re-evaluate and pick new groups
++ * to schedule. The group selection algorithm is based on
++ * priority+round-robin.
++ *
++ * Even though user-mode queues is out of the scope right now, the
++ * current design takes them into account by avoiding any guess on the
++ * group/queue state that would be based on information we wouldn't have
++ * if userspace was in charge of the ring-buffer. That's also one of the
++ * reason we don't do 'cooperative' scheduling (encoding FW group slot
++ * reservation as dma_fence that would be returned from the
++ * drm_gpu_scheduler::prepare_job() hook, and treating group rotation as
++ * a queue of waiters, ordered by job submission order). This approach
++ * would work for kernel-mode queues, but would make user-mode queues a
++ * lot more complicated to retrofit.
++ */
++
++#define JOB_TIMEOUT_MS				5000
++
++#define MIN_CS_PER_CSG				8
++
++#define MIN_CSGS				3
++#define MAX_CSG_PRIO				0xf
++
++struct panthor_group;
++
++/**
++ * struct panthor_csg_slot - Command stream group slot
++ *
++ * This represents a FW slot for a scheduling group.
++ */
++struct panthor_csg_slot {
++	/** @group: Scheduling group bound to this slot. */
++	struct panthor_group *group;
++
++	/** @priority: Group priority. */
++	u8 priority;
++
++	/**
++	 * @idle: True if the group bound to this slot is idle.
++	 *
++	 * A group is idle when it has nothing waiting for execution on
++	 * all its queues, or when queues are blocked waiting for something
++	 * to happen (synchronization object).
++	 */
++	bool idle;
++};
++
++/**
++ * enum panthor_csg_priority - Group priority
++ */
++enum panthor_csg_priority {
++	/** @PANTHOR_CSG_PRIORITY_LOW: Low priority group. */
++	PANTHOR_CSG_PRIORITY_LOW = 0,
++
++	/** @PANTHOR_CSG_PRIORITY_MEDIUM: Medium priority group. */
++	PANTHOR_CSG_PRIORITY_MEDIUM,
++
++	/** @PANTHOR_CSG_PRIORITY_HIGH: High priority group. */
++	PANTHOR_CSG_PRIORITY_HIGH,
++
++	/**
++	 * @PANTHOR_CSG_PRIORITY_RT: Real-time priority group.
++	 *
++	 * Real-time priority allows one to preempt scheduling of other
++	 * non-real-time groups. When such a group becomes executable,
++	 * it will evict the group with the lowest non-rt priority if
++	 * there's no free group slot available.
++	 *
++	 * Currently not exposed to userspace.
++	 */
++	PANTHOR_CSG_PRIORITY_RT,
++
++	/** @PANTHOR_CSG_PRIORITY_COUNT: Number of priority levels. */
++	PANTHOR_CSG_PRIORITY_COUNT,
++};
++
++/**
++ * struct panthor_scheduler - Object used to manage the scheduler
++ */
++struct panthor_scheduler {
++	/** @ptdev: Device. */
++	struct panthor_device *ptdev;
++
++	/**
++	 * @wq: Workqueue used by our internal scheduler logic and
++	 * drm_gpu_scheduler.
++	 *
++	 * Used for the scheduler tick, group update or other kind of FW
++	 * event processing that can't be handled in the threaded interrupt
++	 * path. Also passed to the drm_gpu_scheduler instances embedded
++	 * in panthor_queue.
++	 */
++	struct workqueue_struct *wq;
++
++	/**
++	 * @heap_alloc_wq: Workqueue used to schedule tiler_oom works.
++	 *
++	 * We have a queue dedicated to heap chunk allocation works to avoid
++	 * blocking the rest of the scheduler if the allocation tries to
++	 * reclaim memory.
++	 */
++	struct workqueue_struct *heap_alloc_wq;
++
++	/** @tick_work: Work executed on a scheduling tick. */
++	struct delayed_work tick_work;
++
++	/**
++	 * @sync_upd_work: Work used to process synchronization object updates.
++	 *
++	 * We use this work to unblock queues/groups that were waiting on a
++	 * synchronization object.
++	 */
++	struct work_struct sync_upd_work;
++
++	/**
++	 * @fw_events_work: Work used to process FW events outside the interrupt path.
++	 *
++	 * Even if the interrupt is threaded, we need any event processing
++	 * that require taking the panthor_scheduler::lock to be processed
++	 * outside the interrupt path so we don't block the tick logic when
++	 * it calls panthor_fw_{csg,wait}_wait_acks(). Since most of the
++	 * event processing requires taking this lock, we just delegate all
++	 * FW event processing to the scheduler workqueue.
++	 */
++	struct work_struct fw_events_work;
++
++	/**
++	 * @fw_events: Bitmask encoding pending FW events.
++	 */
++	atomic_t fw_events;
++
++	/**
++	 * @resched_target: When the next tick should occur.
++	 *
++	 * Expressed in jiffies.
++	 */
++	u64 resched_target;
++
++	/**
++	 * @last_tick: When the last tick occurred.
++	 *
++	 * Expressed in jiffies.
++	 */
++	u64 last_tick;
++
++	/** @tick_period: Tick period in jiffies. */
++	u64 tick_period;
++
++	/**
++	 * @lock: Lock protecting access to all the scheduler fields.
++	 *
++	 * Should be taken in the tick work, the irq handler, and anywhere the @groups
++	 * fields are touched.
++	 */
++	struct mutex lock;
++
++	/** @groups: Various lists used to classify groups. */
++	struct {
++		/**
++		 * @runnable: Runnable group lists.
++		 *
++		 * When a group has queues that want to execute something,
++		 * its panthor_group::run_node should be inserted here.
++		 *
++		 * One list per-priority.
++		 */
++		struct list_head runnable[PANTHOR_CSG_PRIORITY_COUNT];
++
++		/**
++		 * @idle: Idle group lists.
++		 *
++		 * When all queues of a group are idle (either because they
++		 * have nothing to execute, or because they are blocked), the
++		 * panthor_group::run_node field should be inserted here.
++		 *
++		 * One list per-priority.
++		 */
++		struct list_head idle[PANTHOR_CSG_PRIORITY_COUNT];
++
++		/**
++		 * @waiting: List of groups whose queues are blocked on a
++		 * synchronization object.
++		 *
++		 * Insert panthor_group::wait_node here when a group is waiting
++		 * for synchronization objects to be signaled.
++		 *
++		 * This list is evaluated in the @sync_upd_work work.
++		 */
++		struct list_head waiting;
++	} groups;
++
++	/**
++	 * @csg_slots: FW command stream group slots.
++	 */
++	struct panthor_csg_slot csg_slots[MAX_CSGS];
++
++	/** @csg_slot_count: Number of command stream group slots exposed by the FW. */
++	u32 csg_slot_count;
++
++	/** @cs_slot_count: Number of command stream slot per group slot exposed by the FW. */
++	u32 cs_slot_count;
++
++	/** @as_slot_count: Number of address space slots supported by the MMU. */
++	u32 as_slot_count;
++
++	/** @used_csg_slot_count: Number of command stream group slot currently used. */
++	u32 used_csg_slot_count;
++
++	/** @sb_slot_count: Number of scoreboard slots. */
++	u32 sb_slot_count;
++
++	/**
++	 * @might_have_idle_groups: True if an active group might have become idle.
++	 *
++	 * This will force a tick, so other runnable groups can be scheduled if one
++	 * or more active groups became idle.
++	 */
++	bool might_have_idle_groups;
++
++	/** @pm: Power management related fields. */
++	struct {
++		/** @has_ref: True if the scheduler owns a runtime PM reference. */
++		bool has_ref;
++	} pm;
++
++	/** @reset: Reset related fields. */
++	struct {
++		/** @lock: Lock protecting the other reset fields. */
++		struct mutex lock;
++
++		/**
++		 * @in_progress: True if a reset is in progress.
++		 *
++		 * Set to true in panthor_sched_pre_reset() and back to false in
++		 * panthor_sched_post_reset().
++		 */
++		atomic_t in_progress;
++
++		/**
++		 * @stopped_groups: List containing all groups that were stopped
++		 * before a reset.
++		 *
++		 * Insert panthor_group::run_node in the pre_reset path.
++		 */
++		struct list_head stopped_groups;
++	} reset;
++};
++
++/**
++ * struct panthor_syncobj_32b - 32-bit FW synchronization object
++ */
++struct panthor_syncobj_32b {
++	/** @seqno: Sequence number. */
++	u32 seqno;
++
++	/**
++	 * @status: Status.
++	 *
++	 * Not zero on failure.
++	 */
++	u32 status;
++};
++
++/**
++ * struct panthor_syncobj_64b - 64-bit FW synchronization object
++ */
++struct panthor_syncobj_64b {
++	/** @seqno: Sequence number. */
++	u64 seqno;
++
++	/**
++	 * @status: Status.
++	 *
++	 * Not zero on failure.
++	 */
++	u32 status;
++
++	/** @pad: MBZ. */
++	u32 pad;
++};
++
++/**
++ * struct panthor_queue - Execution queue
++ */
++struct panthor_queue {
++	/** @scheduler: DRM scheduler used for this queue. */
++	struct drm_gpu_scheduler scheduler;
++
++	/** @entity: DRM scheduling entity used for this queue. */
++	struct drm_sched_entity entity;
++
++	/**
++	 * @remaining_time: Time remaining before the job timeout expires.
++	 *
++	 * The job timeout is suspended when the queue is not scheduled by the
++	 * FW. Every time we suspend the timer, we need to save the remaining
++	 * time so we can restore it later on.
++	 */
++	unsigned long remaining_time;
++
++	/** @timeout_suspended: True if the job timeout was suspended. */
++	bool timeout_suspended;
++
++	/**
++	 * @doorbell_id: Doorbell assigned to this queue.
++	 *
++	 * Right now, all groups share the same doorbell, and the doorbell ID
++	 * is assigned to group_slot + 1 when the group is assigned a slot. But
++	 * we might decide to provide fine grained doorbell assignment at some
++	 * point, so don't have to wake up all queues in a group every time one
++	 * of them is updated.
++	 */
++	u8 doorbell_id;
++
++	/**
++	 * @priority: Priority of the queue inside the group.
++	 *
++	 * Must be less than 16 (Only 4 bits available).
++	 */
++	u8 priority;
++#define CSF_MAX_QUEUE_PRIO	GENMASK(3, 0)
++
++	/** @ringbuf: Command stream ring-buffer. */
++	struct panthor_kernel_bo *ringbuf;
++
++	/** @iface: Firmware interface. */
++	struct {
++		/** @mem: FW memory allocated for this interface. */
++		struct panthor_kernel_bo *mem;
++
++		/** @input: Input interface. */
++		struct panthor_fw_ringbuf_input_iface *input;
++
++		/** @output: Output interface. */
++		const struct panthor_fw_ringbuf_output_iface *output;
++
++		/** @input_fw_va: FW virtual address of the input interface buffer. */
++		u32 input_fw_va;
++
++		/** @output_fw_va: FW virtual address of the output interface buffer. */
++		u32 output_fw_va;
++	} iface;
++
++	/**
++	 * @syncwait: Stores information about the synchronization object this
++	 * queue is waiting on.
++	 */
++	struct {
++		/** @gpu_va: GPU address of the synchronization object. */
++		u64 gpu_va;
++
++		/** @ref: Reference value to compare against. */
++		u64 ref;
++
++		/** @gt: True if this is a greater-than test. */
++		bool gt;
++
++		/** @sync64: True if this is a 64-bit sync object. */
++		bool sync64;
++
++		/** @bo: Buffer object holding the synchronization object. */
++		struct drm_gem_object *obj;
++
++		/** @offset: Offset of the synchronization object inside @bo. */
++		u64 offset;
++
++		/**
++		 * @kmap: Kernel mapping of the buffer object holding the
++		 * synchronization object.
++		 */
++		void *kmap;
++	} syncwait;
++
++	/** @fence_ctx: Fence context fields. */
++	struct {
++		/** @lock: Used to protect access to all fences allocated by this context. */
++		spinlock_t lock;
++
++		/**
++		 * @id: Fence context ID.
++		 *
++		 * Allocated with dma_fence_context_alloc().
++		 */
++		u64 id;
++
++		/** @seqno: Sequence number of the last initialized fence. */
++		atomic64_t seqno;
++
++		/**
++		 * @in_flight_jobs: List containing all in-flight jobs.
++		 *
++		 * Used to keep track and signal panthor_job::done_fence when the
++		 * synchronization object attached to the queue is signaled.
++		 */
++		struct list_head in_flight_jobs;
++	} fence_ctx;
++};
++
++/**
++ * enum panthor_group_state - Scheduling group state.
++ */
++enum panthor_group_state {
++	/** @PANTHOR_CS_GROUP_CREATED: Group was created, but not scheduled yet. */
++	PANTHOR_CS_GROUP_CREATED,
++
++	/** @PANTHOR_CS_GROUP_ACTIVE: Group is currently scheduled. */
++	PANTHOR_CS_GROUP_ACTIVE,
++
++	/**
++	 * @PANTHOR_CS_GROUP_SUSPENDED: Group was scheduled at least once, but is
++	 * inactive/suspended right now.
++	 */
++	PANTHOR_CS_GROUP_SUSPENDED,
++
++	/**
++	 * @PANTHOR_CS_GROUP_TERMINATED: Group was terminated.
++	 *
++	 * Can no longer be scheduled. The only allowed action is a destruction.
++	 */
++	PANTHOR_CS_GROUP_TERMINATED,
++};
++
++/**
++ * struct panthor_group - Scheduling group object
++ */
++struct panthor_group {
++	/** @refcount: Reference count */
++	struct kref refcount;
++
++	/** @ptdev: Device. */
++	struct panthor_device *ptdev;
++
++	/** @vm: VM bound to the group. */
++	struct panthor_vm *vm;
++
++	/** @compute_core_mask: Mask of shader cores that can be used for compute jobs. */
++	u64 compute_core_mask;
++
++	/** @fragment_core_mask: Mask of shader cores that can be used for fragment jobs. */
++	u64 fragment_core_mask;
++
++	/** @tiler_core_mask: Mask of tiler cores that can be used for tiler jobs. */
++	u64 tiler_core_mask;
++
++	/** @max_compute_cores: Maximum number of shader cores used for compute jobs. */
++	u8 max_compute_cores;
++
++	/** @max_compute_cores: Maximum number of shader cores used for fragment jobs. */
++	u8 max_fragment_cores;
++
++	/** @max_tiler_cores: Maximum number of tiler cores used for tiler jobs. */
++	u8 max_tiler_cores;
++
++	/** @priority: Group priority (check panthor_csg_priority). */
++	u8 priority;
++
++	/** @blocked_queues: Bitmask reflecting the blocked queues. */
++	u32 blocked_queues;
++
++	/** @idle_queues: Bitmask reflecting the idle queues. */
++	u32 idle_queues;
++
++	/** @fatal_lock: Lock used to protect access to fatal fields. */
++	spinlock_t fatal_lock;
++
++	/** @fatal_queues: Bitmask reflecting the queues that hit a fatal exception. */
++	u32 fatal_queues;
++
++	/** @tiler_oom: Mask of queues that have a tiler OOM event to process. */
++	atomic_t tiler_oom;
++
++	/** @queue_count: Number of queues in this group. */
++	u32 queue_count;
++
++	/** @queues: Queues owned by this group. */
++	struct panthor_queue *queues[MAX_CS_PER_CSG];
++
++	/**
++	 * @csg_id: ID of the FW group slot.
++	 *
++	 * -1 when the group is not scheduled/active.
++	 */
++	int csg_id;
++
++	/**
++	 * @destroyed: True when the group has been destroyed.
++	 *
++	 * If a group is destroyed it becomes useless: no further jobs can be submitted
++	 * to its queues. We simply wait for all references to be dropped so we can
++	 * release the group object.
++	 */
++	bool destroyed;
++
++	/**
++	 * @timedout: True when a timeout occurred on any of the queues owned by
++	 * this group.
++	 *
++	 * Timeouts can be reported by drm_sched or by the FW. In any case, any
++	 * timeout situation is unrecoverable, and the group becomes useless.
++	 * We simply wait for all references to be dropped so we can release the
++	 * group object.
++	 */
++	bool timedout;
++
++	/**
++	 * @syncobjs: Pool of per-queue synchronization objects.
++	 *
++	 * One sync object per queue. The position of the sync object is
++	 * determined by the queue index.
++	 */
++	struct panthor_kernel_bo *syncobjs;
++
++	/** @state: Group state. */
++	enum panthor_group_state state;
++
++	/**
++	 * @suspend_buf: Suspend buffer.
++	 *
++	 * Stores the state of the group and its queues when a group is suspended.
++	 * Used at resume time to restore the group in its previous state.
++	 *
++	 * The size of the suspend buffer is exposed through the FW interface.
++	 */
++	struct panthor_kernel_bo *suspend_buf;
++
++	/**
++	 * @protm_suspend_buf: Protection mode suspend buffer.
++	 *
++	 * Stores the state of the group and its queues when a group that's in
++	 * protection mode is suspended.
++	 *
++	 * Used at resume time to restore the group in its previous state.
++	 *
++	 * The size of the protection mode suspend buffer is exposed through the
++	 * FW interface.
++	 */
++	struct panthor_kernel_bo *protm_suspend_buf;
++
++	/** @sync_upd_work: Work used to check/signal job fences. */
++	struct work_struct sync_upd_work;
++
++	/** @tiler_oom_work: Work used to process tiler OOM events happening on this group. */
++	struct work_struct tiler_oom_work;
++
++	/** @term_work: Work used to finish the group termination procedure. */
++	struct work_struct term_work;
++
++	/**
++	 * @release_work: Work used to release group resources.
++	 *
++	 * We need to postpone the group release to avoid a deadlock when
++	 * the last ref is released in the tick work.
++	 */
++	struct work_struct release_work;
++
++	/**
++	 * @run_node: Node used to insert the group in the
++	 * panthor_group::groups::{runnable,idle} and
++	 * panthor_group::reset.stopped_groups lists.
++	 */
++	struct list_head run_node;
++
++	/**
++	 * @wait_node: Node used to insert the group in the
++	 * panthor_group::groups::waiting list.
++	 */
++	struct list_head wait_node;
++};
++
++/**
++ * group_queue_work() - Queue a group work
++ * @group: Group to queue the work for.
++ * @wname: Work name.
++ *
++ * Grabs a ref and queue a work item to the scheduler workqueue. If
++ * the work was already queued, we release the reference we grabbed.
++ *
++ * Work callbacks must release the reference we grabbed here.
++ */
++#define group_queue_work(group, wname) \
++	do { \
++		group_get(group); \
++		if (!queue_work((group)->ptdev->scheduler->wq, &(group)->wname ## _work)) \
++			group_put(group); \
++	} while (0)
++
++/**
++ * sched_queue_work() - Queue a scheduler work.
++ * @sched: Scheduler object.
++ * @wname: Work name.
++ *
++ * Conditionally queues a scheduler work if no reset is pending/in-progress.
++ */
++#define sched_queue_work(sched, wname) \
++	do { \
++		if (!atomic_read(&(sched)->reset.in_progress) && \
++		    !panthor_device_reset_is_pending((sched)->ptdev)) \
++			queue_work((sched)->wq, &(sched)->wname ## _work); \
++	} while (0)
++
++/**
++ * sched_queue_delayed_work() - Queue a scheduler delayed work.
++ * @sched: Scheduler object.
++ * @wname: Work name.
++ * @delay: Work delay in jiffies.
++ *
++ * Conditionally queues a scheduler delayed work if no reset is
++ * pending/in-progress.
++ */
++#define sched_queue_delayed_work(sched, wname, delay) \
++	do { \
++		if (!atomic_read(&sched->reset.in_progress) && \
++		    !panthor_device_reset_is_pending((sched)->ptdev)) \
++			mod_delayed_work((sched)->wq, &(sched)->wname ## _work, delay); \
++	} while (0)
++
++/*
++ * We currently set the maximum of groups per file to an arbitrary low value.
++ * But this can be updated if we need more.
++ */
++#define MAX_GROUPS_PER_POOL 128
++
++/**
++ * struct panthor_group_pool - Group pool
++ *
++ * Each file get assigned a group pool.
++ */
++struct panthor_group_pool {
++	/** @xa: Xarray used to manage group handles. */
++	struct xarray xa;
++};
++
++/**
++ * struct panthor_job - Used to manage GPU job
++ */
++struct panthor_job {
++	/** @base: Inherit from drm_sched_job. */
++	struct drm_sched_job base;
++
++	/** @refcount: Reference count. */
++	struct kref refcount;
++
++	/** @group: Group of the queue this job will be pushed to. */
++	struct panthor_group *group;
++
++	/** @queue_idx: Index of the queue inside @group. */
++	u32 queue_idx;
++
++	/** @call_info: Information about the userspace command stream call. */
++	struct {
++		/** @start: GPU address of the userspace command stream. */
++		u64 start;
++
++		/** @size: Size of the userspace command stream. */
++		u32 size;
++
++		/**
++		 * @latest_flush: Flush ID at the time the userspace command
++		 * stream was built.
++		 *
++		 * Needed for the flush reduction mechanism.
++		 */
++		u32 latest_flush;
++	} call_info;
++
++	/** @ringbuf: Position of this job is in the ring buffer. */
++	struct {
++		/** @start: Start offset. */
++		u64 start;
++
++		/** @end: End offset. */
++		u64 end;
++	} ringbuf;
++
++	/**
++	 * @node: Used to insert the job in the panthor_queue::fence_ctx::in_flight_jobs
++	 * list.
++	 */
++	struct list_head node;
++
++	/** @done_fence: Fence signaled when the job is finished or cancelled. */
++	struct dma_fence *done_fence;
++};
++
++static void
++panthor_queue_put_syncwait_obj(struct panthor_queue *queue)
++{
++	if (queue->syncwait.kmap) {
++		struct iosys_map map = IOSYS_MAP_INIT_VADDR(queue->syncwait.kmap);
++
++		drm_gem_vunmap_unlocked(queue->syncwait.obj, &map);
++		queue->syncwait.kmap = NULL;
++	}
++
++	drm_gem_object_put(queue->syncwait.obj);
++	queue->syncwait.obj = NULL;
++}
++
++static void *
++panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue *queue)
++{
++	struct panthor_device *ptdev = group->ptdev;
++	struct panthor_gem_object *bo;
++	struct iosys_map map;
++	int ret;
++
++	if (queue->syncwait.kmap)
++		return queue->syncwait.kmap + queue->syncwait.offset;
++
++	bo = panthor_vm_get_bo_for_va(group->vm,
++				      queue->syncwait.gpu_va,
++				      &queue->syncwait.offset);
++	if (drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(bo)))
++		goto err_put_syncwait_obj;
++
++	queue->syncwait.obj = &bo->base.base;
++	ret = drm_gem_vmap_unlocked(queue->syncwait.obj, &map);
++	if (drm_WARN_ON(&ptdev->base, ret))
++		goto err_put_syncwait_obj;
++
++	queue->syncwait.kmap = map.vaddr;
++	if (drm_WARN_ON(&ptdev->base, !queue->syncwait.kmap))
++		goto err_put_syncwait_obj;
++
++	return queue->syncwait.kmap + queue->syncwait.offset;
++
++err_put_syncwait_obj:
++	panthor_queue_put_syncwait_obj(queue);
++	return NULL;
++}
++
++static void group_free_queue(struct panthor_group *group, struct panthor_queue *queue)
++{
++	if (IS_ERR_OR_NULL(queue))
++		return;
++
++	if (queue->entity.fence_context)
++		drm_sched_entity_destroy(&queue->entity);
++
++	if (queue->scheduler.ops)
++		drm_sched_fini(&queue->scheduler);
++
++	panthor_queue_put_syncwait_obj(queue);
++
++	panthor_kernel_bo_destroy(group->vm, queue->ringbuf);
++	panthor_kernel_bo_destroy(panthor_fw_vm(group->ptdev), queue->iface.mem);
++
++	kfree(queue);
++}
++
++static void group_release_work(struct work_struct *work)
++{
++	struct panthor_group *group = container_of(work,
++						   struct panthor_group,
++						   release_work);
++	struct panthor_device *ptdev = group->ptdev;
++	u32 i;
++
++	for (i = 0; i < group->queue_count; i++)
++		group_free_queue(group, group->queues[i]);
++
++	panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->suspend_buf);
++	panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->protm_suspend_buf);
++	panthor_kernel_bo_destroy(group->vm, group->syncobjs);
++
++	panthor_vm_put(group->vm);
++	kfree(group);
++}
++
++static void group_release(struct kref *kref)
++{
++	struct panthor_group *group = container_of(kref,
++						   struct panthor_group,
++						   refcount);
++	struct panthor_device *ptdev = group->ptdev;
++
++	drm_WARN_ON(&ptdev->base, group->csg_id >= 0);
++	drm_WARN_ON(&ptdev->base, !list_empty(&group->run_node));
++	drm_WARN_ON(&ptdev->base, !list_empty(&group->wait_node));
++
++	queue_work(panthor_cleanup_wq, &group->release_work);
++}
++
++static void group_put(struct panthor_group *group)
++{
++	if (group)
++		kref_put(&group->refcount, group_release);
++}
++
++static struct panthor_group *
++group_get(struct panthor_group *group)
++{
++	if (group)
++		kref_get(&group->refcount);
++
++	return group;
++}
++
++/**
++ * group_bind_locked() - Bind a group to a group slot
++ * @group: Group.
++ * @csg_id: Slot.
++ *
++ * Return: 0 on success, a negative error code otherwise.
++ */
++static int
++group_bind_locked(struct panthor_group *group, u32 csg_id)
++{
++	struct panthor_device *ptdev = group->ptdev;
++	struct panthor_csg_slot *csg_slot;
++	int ret;
++
++	lockdep_assert_held(&ptdev->scheduler->lock);
++
++	if (drm_WARN_ON(&ptdev->base, group->csg_id != -1 || csg_id >= MAX_CSGS ||
++			ptdev->scheduler->csg_slots[csg_id].group))
++		return -EINVAL;
++
++	ret = panthor_vm_active(group->vm);
++	if (ret)
++		return ret;
++
++	csg_slot = &ptdev->scheduler->csg_slots[csg_id];
++	group_get(group);
++	group->csg_id = csg_id;
++
++	/* Dummy doorbell allocation: doorbell is assigned to the group and
++	 * all queues use the same doorbell.
++	 *
++	 * TODO: Implement LRU-based doorbell assignment, so the most often
++	 * updated queues get their own doorbell, thus avoiding useless checks
++	 * on queues belonging to the same group that are rarely updated.
++	 */
++	for (u32 i = 0; i < group->queue_count; i++)
++		group->queues[i]->doorbell_id = csg_id + 1;
++
++	csg_slot->group = group;
++
++	return 0;
++}
++
++/**
++ * group_unbind_locked() - Unbind a group from a slot.
++ * @group: Group to unbind.
++ *
++ * Return: 0 on success, a negative error code otherwise.
++ */
++static int
++group_unbind_locked(struct panthor_group *group)
++{
++	struct panthor_device *ptdev = group->ptdev;
++	struct panthor_csg_slot *slot;
++
++	lockdep_assert_held(&ptdev->scheduler->lock);
++
++	if (drm_WARN_ON(&ptdev->base, group->csg_id < 0 || group->csg_id >= MAX_CSGS))
++		return -EINVAL;
++
++	if (drm_WARN_ON(&ptdev->base, group->state == PANTHOR_CS_GROUP_ACTIVE))
++		return -EINVAL;
++
++	slot = &ptdev->scheduler->csg_slots[group->csg_id];
++	panthor_vm_idle(group->vm);
++	group->csg_id = -1;
++
++	/* Tiler OOM events will be re-issued next time the group is scheduled. */
++	atomic_set(&group->tiler_oom, 0);
++	cancel_work(&group->tiler_oom_work);
++
++	for (u32 i = 0; i < group->queue_count; i++)
++		group->queues[i]->doorbell_id = -1;
++
++	slot->group = NULL;
++
++	group_put(group);
++	return 0;
++}
++
++/**
++ * cs_slot_prog_locked() - Program a queue slot
++ * @ptdev: Device.
++ * @csg_id: Group slot ID.
++ * @cs_id: Queue slot ID.
++ *
++ * Program a queue slot with the queue information so things can start being
++ * executed on this queue.
++ *
++ * The group slot must have a group bound to it already (group_bind_locked()).
++ */
++static void
++cs_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
++{
++	struct panthor_queue *queue = ptdev->scheduler->csg_slots[csg_id].group->queues[cs_id];
++	struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
++
++	lockdep_assert_held(&ptdev->scheduler->lock);
++
++	queue->iface.input->extract = queue->iface.output->extract;
++	drm_WARN_ON(&ptdev->base, queue->iface.input->insert < queue->iface.input->extract);
++
++	cs_iface->input->ringbuf_base = panthor_kernel_bo_gpuva(queue->ringbuf);
++	cs_iface->input->ringbuf_size = panthor_kernel_bo_size(queue->ringbuf);
++	cs_iface->input->ringbuf_input = queue->iface.input_fw_va;
++	cs_iface->input->ringbuf_output = queue->iface.output_fw_va;
++	cs_iface->input->config = CS_CONFIG_PRIORITY(queue->priority) |
++				  CS_CONFIG_DOORBELL(queue->doorbell_id);
++	cs_iface->input->ack_irq_mask = ~0;
++	panthor_fw_update_reqs(cs_iface, req,
++			       CS_IDLE_SYNC_WAIT |
++			       CS_IDLE_EMPTY |
++			       CS_STATE_START |
++			       CS_EXTRACT_EVENT,
++			       CS_IDLE_SYNC_WAIT |
++			       CS_IDLE_EMPTY |
++			       CS_STATE_MASK |
++			       CS_EXTRACT_EVENT);
++	if (queue->iface.input->insert != queue->iface.input->extract && queue->timeout_suspended) {
++		drm_sched_resume_timeout(&queue->scheduler, queue->remaining_time);
++		queue->timeout_suspended = false;
++	}
++}
++
++/**
++ * @cs_slot_reset_locked() - Reset a queue slot
++ * @ptdev: Device.
++ * @csg_id: Group slot.
++ * @cs_id: Queue slot.
++ *
++ * Change the queue slot state to STOP and suspend the queue timeout if
++ * the queue is not blocked.
++ *
++ * The group slot must have a group bound to it (group_bind_locked()).
++ */
++static int
++cs_slot_reset_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
++{
++	struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
++	struct panthor_group *group = ptdev->scheduler->csg_slots[csg_id].group;
++	struct panthor_queue *queue = group->queues[cs_id];
++
++	lockdep_assert_held(&ptdev->scheduler->lock);
++
++	panthor_fw_update_reqs(cs_iface, req,
++			       CS_STATE_STOP,
++			       CS_STATE_MASK);
++
++	/* If the queue is blocked, we want to keep the timeout running, so
++	 * we can detect unbounded waits and kill the group when that happens.
++	 */
++	if (!(group->blocked_queues & BIT(cs_id)) && !queue->timeout_suspended) {
++		queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler);
++		queue->timeout_suspended = true;
++		WARN_ON(queue->remaining_time > msecs_to_jiffies(JOB_TIMEOUT_MS));
++	}
++
++	return 0;
++}
++
++/**
++ * csg_slot_sync_priority_locked() - Synchronize the group slot priority
++ * @ptdev: Device.
++ * @csg_id: Group slot ID.
++ *
++ * Group slot priority update happens asynchronously. When we receive a
++ * %CSG_ENDPOINT_CONFIG, we know the update is effective, and can
++ * reflect it to our panthor_csg_slot object.
++ */
++static void
++csg_slot_sync_priority_locked(struct panthor_device *ptdev, u32 csg_id)
++{
++	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
++	struct panthor_fw_csg_iface *csg_iface;
++
++	lockdep_assert_held(&ptdev->scheduler->lock);
++
++	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
++	csg_slot->priority = (csg_iface->input->endpoint_req & CSG_EP_REQ_PRIORITY_MASK) >> 28;
++}
++
++/**
++ * cs_slot_sync_queue_state_locked() - Synchronize the queue slot priority
++ * @ptdev: Device.
++ * @csg_id: Group slot.
++ * @cs_id: Queue slot.
++ *
++ * Queue state is updated on group suspend or STATUS_UPDATE event.
++ */
++static void
++cs_slot_sync_queue_state_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
++{
++	struct panthor_group *group = ptdev->scheduler->csg_slots[csg_id].group;
++	struct panthor_queue *queue = group->queues[cs_id];
++	struct panthor_fw_cs_iface *cs_iface =
++		panthor_fw_get_cs_iface(group->ptdev, csg_id, cs_id);
++
++	u32 status_wait_cond;
++
++	switch (cs_iface->output->status_blocked_reason) {
++	case CS_STATUS_BLOCKED_REASON_UNBLOCKED:
++		if (queue->iface.input->insert == queue->iface.output->extract &&
++		    cs_iface->output->status_scoreboards == 0)
++			group->idle_queues |= BIT(cs_id);
++		break;
++
++	case CS_STATUS_BLOCKED_REASON_SYNC_WAIT:
++		if (list_empty(&group->wait_node)) {
++			list_move_tail(&group->wait_node,
++				       &group->ptdev->scheduler->groups.waiting);
++		}
++		group->blocked_queues |= BIT(cs_id);
++		queue->syncwait.gpu_va = cs_iface->output->status_wait_sync_ptr;
++		queue->syncwait.ref = cs_iface->output->status_wait_sync_value;
++		status_wait_cond = cs_iface->output->status_wait & CS_STATUS_WAIT_SYNC_COND_MASK;
++		queue->syncwait.gt = status_wait_cond == CS_STATUS_WAIT_SYNC_COND_GT;
++		if (cs_iface->output->status_wait & CS_STATUS_WAIT_SYNC_64B) {
++			u64 sync_val_hi = cs_iface->output->status_wait_sync_value_hi;
++
++			queue->syncwait.sync64 = true;
++			queue->syncwait.ref |= sync_val_hi << 32;
++		} else {
++			queue->syncwait.sync64 = false;
++		}
++		break;
++
++	default:
++		/* Other reasons are not blocking. Consider the queue as runnable
++		 * in those cases.
++		 */
++		break;
++	}
++}
++
++static void
++csg_slot_sync_queues_state_locked(struct panthor_device *ptdev, u32 csg_id)
++{
++	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
++	struct panthor_group *group = csg_slot->group;
++	u32 i;
++
++	lockdep_assert_held(&ptdev->scheduler->lock);
++
++	group->idle_queues = 0;
++	group->blocked_queues = 0;
++
++	for (i = 0; i < group->queue_count; i++) {
++		if (group->queues[i])
++			cs_slot_sync_queue_state_locked(ptdev, csg_id, i);
++	}
++}
++
++static void
++csg_slot_sync_state_locked(struct panthor_device *ptdev, u32 csg_id)
++{
++	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
++	struct panthor_fw_csg_iface *csg_iface;
++	struct panthor_group *group;
++	enum panthor_group_state new_state, old_state;
++
++	lockdep_assert_held(&ptdev->scheduler->lock);
++
++	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
++	group = csg_slot->group;
++
++	if (!group)
++		return;
++
++	old_state = group->state;
++	switch (csg_iface->output->ack & CSG_STATE_MASK) {
++	case CSG_STATE_START:
++	case CSG_STATE_RESUME:
++		new_state = PANTHOR_CS_GROUP_ACTIVE;
++		break;
++	case CSG_STATE_TERMINATE:
++		new_state = PANTHOR_CS_GROUP_TERMINATED;
++		break;
++	case CSG_STATE_SUSPEND:
++		new_state = PANTHOR_CS_GROUP_SUSPENDED;
++		break;
++	}
++
++	if (old_state == new_state)
++		return;
++
++	if (new_state == PANTHOR_CS_GROUP_SUSPENDED)
++		csg_slot_sync_queues_state_locked(ptdev, csg_id);
++
++	if (old_state == PANTHOR_CS_GROUP_ACTIVE) {
++		u32 i;
++
++		/* Reset the queue slots so we start from a clean
++		 * state when starting/resuming a new group on this
++		 * CSG slot. No wait needed here, and no ringbell
++		 * either, since the CS slot will only be re-used
++		 * on the next CSG start operation.
++		 */
++		for (i = 0; i < group->queue_count; i++) {
++			if (group->queues[i])
++				cs_slot_reset_locked(ptdev, csg_id, i);
++		}
++	}
++
++	group->state = new_state;
++}
++
++static int
++csg_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 priority)
++{
++	struct panthor_fw_csg_iface *csg_iface;
++	struct panthor_csg_slot *csg_slot;
++	struct panthor_group *group;
++	u32 queue_mask = 0, i;
++
++	lockdep_assert_held(&ptdev->scheduler->lock);
++
++	if (priority > MAX_CSG_PRIO)
++		return -EINVAL;
++
++	if (drm_WARN_ON(&ptdev->base, csg_id >= MAX_CSGS))
++		return -EINVAL;
++
++	csg_slot = &ptdev->scheduler->csg_slots[csg_id];
++	group = csg_slot->group;
++	if (!group || group->state == PANTHOR_CS_GROUP_ACTIVE)
++		return 0;
++
++	csg_iface = panthor_fw_get_csg_iface(group->ptdev, csg_id);
++
++	for (i = 0; i < group->queue_count; i++) {
++		if (group->queues[i]) {
++			cs_slot_prog_locked(ptdev, csg_id, i);
++			queue_mask |= BIT(i);
++		}
++	}
++
++	csg_iface->input->allow_compute = group->compute_core_mask;
++	csg_iface->input->allow_fragment = group->fragment_core_mask;
++	csg_iface->input->allow_other = group->tiler_core_mask;
++	csg_iface->input->endpoint_req = CSG_EP_REQ_COMPUTE(group->max_compute_cores) |
++					 CSG_EP_REQ_FRAGMENT(group->max_fragment_cores) |
++					 CSG_EP_REQ_TILER(group->max_tiler_cores) |
++					 CSG_EP_REQ_PRIORITY(priority);
++	csg_iface->input->config = panthor_vm_as(group->vm);
++
++	if (group->suspend_buf)
++		csg_iface->input->suspend_buf = panthor_kernel_bo_gpuva(group->suspend_buf);
++	else
++		csg_iface->input->suspend_buf = 0;
++
++	if (group->protm_suspend_buf) {
++		csg_iface->input->protm_suspend_buf =
++			panthor_kernel_bo_gpuva(group->protm_suspend_buf);
++	} else {
++		csg_iface->input->protm_suspend_buf = 0;
++	}
++
++	csg_iface->input->ack_irq_mask = ~0;
++	panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, queue_mask);
++	return 0;
++}
++
++static void
++cs_slot_process_fatal_event_locked(struct panthor_device *ptdev,
++				   u32 csg_id, u32 cs_id)
++{
++	struct panthor_scheduler *sched = ptdev->scheduler;
++	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
++	struct panthor_group *group = csg_slot->group;
++	struct panthor_fw_cs_iface *cs_iface;
++	u32 fatal;
++	u64 info;
++
++	lockdep_assert_held(&sched->lock);
++
++	cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
++	fatal = cs_iface->output->fatal;
++	info = cs_iface->output->fatal_info;
++
++	if (group)
++		group->fatal_queues |= BIT(cs_id);
++
++	sched_queue_delayed_work(sched, tick, 0);
++	drm_warn(&ptdev->base,
++		 "CSG slot %d CS slot: %d\n"
++		 "CS_FATAL.EXCEPTION_TYPE: 0x%x (%s)\n"
++		 "CS_FATAL.EXCEPTION_DATA: 0x%x\n"
++		 "CS_FATAL_INFO.EXCEPTION_DATA: 0x%llx\n",
++		 csg_id, cs_id,
++		 (unsigned int)CS_EXCEPTION_TYPE(fatal),
++		 panthor_exception_name(ptdev, CS_EXCEPTION_TYPE(fatal)),
++		 (unsigned int)CS_EXCEPTION_DATA(fatal),
++		 info);
++}
++
++static void
++cs_slot_process_fault_event_locked(struct panthor_device *ptdev,
++				   u32 csg_id, u32 cs_id)
++{
++	struct panthor_scheduler *sched = ptdev->scheduler;
++	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
++	struct panthor_group *group = csg_slot->group;
++	struct panthor_queue *queue = group && cs_id < group->queue_count ?
++				      group->queues[cs_id] : NULL;
++	struct panthor_fw_cs_iface *cs_iface;
++	u32 fault;
++	u64 info;
++
++	lockdep_assert_held(&sched->lock);
++
++	cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
++	fault = cs_iface->output->fault;
++	info = cs_iface->output->fault_info;
++
++	if (queue && CS_EXCEPTION_TYPE(fault) == DRM_PANTHOR_EXCEPTION_CS_INHERIT_FAULT) {
++		u64 cs_extract = queue->iface.output->extract;
++		struct panthor_job *job;
++
++		spin_lock(&queue->fence_ctx.lock);
++		list_for_each_entry(job, &queue->fence_ctx.in_flight_jobs, node) {
++			if (cs_extract >= job->ringbuf.end)
++				continue;
++
++			if (cs_extract < job->ringbuf.start)
++				break;
++
++			dma_fence_set_error(job->done_fence, -EINVAL);
++		}
++		spin_unlock(&queue->fence_ctx.lock);
++	}
++
++	drm_warn(&ptdev->base,
++		 "CSG slot %d CS slot: %d\n"
++		 "CS_FAULT.EXCEPTION_TYPE: 0x%x (%s)\n"
++		 "CS_FAULT.EXCEPTION_DATA: 0x%x\n"
++		 "CS_FAULT_INFO.EXCEPTION_DATA: 0x%llx\n",
++		 csg_id, cs_id,
++		 (unsigned int)CS_EXCEPTION_TYPE(fault),
++		 panthor_exception_name(ptdev, CS_EXCEPTION_TYPE(fault)),
++		 (unsigned int)CS_EXCEPTION_DATA(fault),
++		 info);
++}
++
++static int group_process_tiler_oom(struct panthor_group *group, u32 cs_id)
++{
++	struct panthor_device *ptdev = group->ptdev;
++	struct panthor_scheduler *sched = ptdev->scheduler;
++	u32 renderpasses_in_flight, pending_frag_count;
++	struct panthor_heap_pool *heaps = NULL;
++	u64 heap_address, new_chunk_va = 0;
++	u32 vt_start, vt_end, frag_end;
++	int ret, csg_id;
++
++	mutex_lock(&sched->lock);
++	csg_id = group->csg_id;
++	if (csg_id >= 0) {
++		struct panthor_fw_cs_iface *cs_iface;
++
++		cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
++		heaps = panthor_vm_get_heap_pool(group->vm, false);
++		heap_address = cs_iface->output->heap_address;
++		vt_start = cs_iface->output->heap_vt_start;
++		vt_end = cs_iface->output->heap_vt_end;
++		frag_end = cs_iface->output->heap_frag_end;
++		renderpasses_in_flight = vt_start - frag_end;
++		pending_frag_count = vt_end - frag_end;
++	}
++	mutex_unlock(&sched->lock);
++
++	/* The group got scheduled out, we stop here. We will get a new tiler OOM event
++	 * when it's scheduled again.
++	 */
++	if (unlikely(csg_id < 0))
++		return 0;
++
++	if (!heaps || frag_end > vt_end || vt_end >= vt_start) {
++		ret = -EINVAL;
++	} else {
++		/* We do the allocation without holding the scheduler lock to avoid
++		 * blocking the scheduling.
++		 */
++		ret = panthor_heap_grow(heaps, heap_address,
++					renderpasses_in_flight,
++					pending_frag_count, &new_chunk_va);
++	}
++
++	if (ret && ret != -EBUSY) {
++		drm_warn(&ptdev->base, "Failed to extend the tiler heap\n");
++		group->fatal_queues |= BIT(cs_id);
++		sched_queue_delayed_work(sched, tick, 0);
++		goto out_put_heap_pool;
++	}
++
++	mutex_lock(&sched->lock);
++	csg_id = group->csg_id;
++	if (csg_id >= 0) {
++		struct panthor_fw_csg_iface *csg_iface;
++		struct panthor_fw_cs_iface *cs_iface;
++
++		csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
++		cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
++
++		cs_iface->input->heap_start = new_chunk_va;
++		cs_iface->input->heap_end = new_chunk_va;
++		panthor_fw_update_reqs(cs_iface, req, cs_iface->output->ack, CS_TILER_OOM);
++		panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, BIT(cs_id));
++		panthor_fw_ring_csg_doorbells(ptdev, BIT(csg_id));
++	}
++	mutex_unlock(&sched->lock);
++
++	/* We allocated a chunck, but couldn't link it to the heap
++	 * context because the group was scheduled out while we were
++	 * allocating memory. We need to return this chunk to the heap.
++	 */
++	if (unlikely(csg_id < 0 && new_chunk_va))
++		panthor_heap_return_chunk(heaps, heap_address, new_chunk_va);
++
++	ret = 0;
++
++out_put_heap_pool:
++	panthor_heap_pool_put(heaps);
++	return ret;
++}
++
++static void group_tiler_oom_work(struct work_struct *work)
++{
++	struct panthor_group *group =
++		container_of(work, struct panthor_group, tiler_oom_work);
++	u32 tiler_oom = atomic_xchg(&group->tiler_oom, 0);
++
++	while (tiler_oom) {
++		u32 cs_id = ffs(tiler_oom) - 1;
++
++		group_process_tiler_oom(group, cs_id);
++		tiler_oom &= ~BIT(cs_id);
++	}
++
++	group_put(group);
++}
++
++static void
++cs_slot_process_tiler_oom_event_locked(struct panthor_device *ptdev,
++				       u32 csg_id, u32 cs_id)
++{
++	struct panthor_scheduler *sched = ptdev->scheduler;
++	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
++	struct panthor_group *group = csg_slot->group;
++
++	lockdep_assert_held(&sched->lock);
++
++	if (drm_WARN_ON(&ptdev->base, !group))
++		return;
++
++	atomic_or(BIT(cs_id), &group->tiler_oom);
++
++	/* We don't use group_queue_work() here because we want to queue the
++	 * work item to the heap_alloc_wq.
++	 */
++	group_get(group);
++	if (!queue_work(sched->heap_alloc_wq, &group->tiler_oom_work))
++		group_put(group);
++}
++
++static bool cs_slot_process_irq_locked(struct panthor_device *ptdev,
++				       u32 csg_id, u32 cs_id)
++{
++	struct panthor_fw_cs_iface *cs_iface;
++	u32 req, ack, events;
++
++	lockdep_assert_held(&ptdev->scheduler->lock);
++
++	cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
++	req = cs_iface->input->req;
++	ack = cs_iface->output->ack;
++	events = (req ^ ack) & CS_EVT_MASK;
++
++	if (events & CS_FATAL)
++		cs_slot_process_fatal_event_locked(ptdev, csg_id, cs_id);
++
++	if (events & CS_FAULT)
++		cs_slot_process_fault_event_locked(ptdev, csg_id, cs_id);
++
++	if (events & CS_TILER_OOM)
++		cs_slot_process_tiler_oom_event_locked(ptdev, csg_id, cs_id);
++
++	/* We don't acknowledge the TILER_OOM event since its handling is
++	 * deferred to a separate work.
++	 */
++	panthor_fw_update_reqs(cs_iface, req, ack, CS_FATAL | CS_FAULT);
++
++	return (events & (CS_FAULT | CS_TILER_OOM)) != 0;
++}
++
++static void csg_slot_sync_idle_state_locked(struct panthor_device *ptdev, u32 csg_id)
++{
++	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
++	struct panthor_fw_csg_iface *csg_iface;
++
++	lockdep_assert_held(&ptdev->scheduler->lock);
++
++	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
++	csg_slot->idle = csg_iface->output->status_state & CSG_STATUS_STATE_IS_IDLE;
++}
++
++static void csg_slot_process_idle_event_locked(struct panthor_device *ptdev, u32 csg_id)
++{
++	struct panthor_scheduler *sched = ptdev->scheduler;
++
++	lockdep_assert_held(&sched->lock);
++
++	sched->might_have_idle_groups = true;
++
++	/* Schedule a tick so we can evict idle groups and schedule non-idle
++	 * ones. This will also update runtime PM and devfreq busy/idle states,
++	 * so the device can lower its frequency or get suspended.
++	 */
++	sched_queue_delayed_work(sched, tick, 0);
++}
++
++static void csg_slot_sync_update_locked(struct panthor_device *ptdev,
++					u32 csg_id)
++{
++	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
++	struct panthor_group *group = csg_slot->group;
++
++	lockdep_assert_held(&ptdev->scheduler->lock);
++
++	if (group)
++		group_queue_work(group, sync_upd);
++
++	sched_queue_work(ptdev->scheduler, sync_upd);
++}
++
++static void
++csg_slot_process_progress_timer_event_locked(struct panthor_device *ptdev, u32 csg_id)
++{
++	struct panthor_scheduler *sched = ptdev->scheduler;
++	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
++	struct panthor_group *group = csg_slot->group;
++
++	lockdep_assert_held(&sched->lock);
++
++	drm_warn(&ptdev->base, "CSG slot %d progress timeout\n", csg_id);
++
++	group = csg_slot->group;
++	if (!drm_WARN_ON(&ptdev->base, !group))
++		group->timedout = true;
++
++	sched_queue_delayed_work(sched, tick, 0);
++}
++
++static void sched_process_csg_irq_locked(struct panthor_device *ptdev, u32 csg_id)
++{
++	u32 req, ack, cs_irq_req, cs_irq_ack, cs_irqs, csg_events;
++	struct panthor_fw_csg_iface *csg_iface;
++	u32 ring_cs_db_mask = 0;
++
++	lockdep_assert_held(&ptdev->scheduler->lock);
++
++	if (drm_WARN_ON(&ptdev->base, csg_id >= ptdev->scheduler->csg_slot_count))
++		return;
++
++	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
++	req = READ_ONCE(csg_iface->input->req);
++	ack = READ_ONCE(csg_iface->output->ack);
++	cs_irq_req = READ_ONCE(csg_iface->output->cs_irq_req);
++	cs_irq_ack = READ_ONCE(csg_iface->input->cs_irq_ack);
++	csg_events = (req ^ ack) & CSG_EVT_MASK;
++
++	/* There may not be any pending CSG/CS interrupts to process */
++	if (req == ack && cs_irq_req == cs_irq_ack)
++		return;
++
++	/* Immediately set IRQ_ACK bits to be same as the IRQ_REQ bits before
++	 * examining the CS_ACK & CS_REQ bits. This would ensure that Host
++	 * doesn't miss an interrupt for the CS in the race scenario where
++	 * whilst Host is servicing an interrupt for the CS, firmware sends
++	 * another interrupt for that CS.
++	 */
++	csg_iface->input->cs_irq_ack = cs_irq_req;
++
++	panthor_fw_update_reqs(csg_iface, req, ack,
++			       CSG_SYNC_UPDATE |
++			       CSG_IDLE |
++			       CSG_PROGRESS_TIMER_EVENT);
++
++	if (csg_events & CSG_IDLE)
++		csg_slot_process_idle_event_locked(ptdev, csg_id);
++
++	if (csg_events & CSG_PROGRESS_TIMER_EVENT)
++		csg_slot_process_progress_timer_event_locked(ptdev, csg_id);
++
++	cs_irqs = cs_irq_req ^ cs_irq_ack;
++	while (cs_irqs) {
++		u32 cs_id = ffs(cs_irqs) - 1;
++
++		if (cs_slot_process_irq_locked(ptdev, csg_id, cs_id))
++			ring_cs_db_mask |= BIT(cs_id);
++
++		cs_irqs &= ~BIT(cs_id);
++	}
++
++	if (csg_events & CSG_SYNC_UPDATE)
++		csg_slot_sync_update_locked(ptdev, csg_id);
++
++	if (ring_cs_db_mask)
++		panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, ring_cs_db_mask);
++
++	panthor_fw_ring_csg_doorbells(ptdev, BIT(csg_id));
++}
++
++static void sched_process_idle_event_locked(struct panthor_device *ptdev)
++{
++	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
++
++	lockdep_assert_held(&ptdev->scheduler->lock);
++
++	/* Acknowledge the idle event and schedule a tick. */
++	panthor_fw_update_reqs(glb_iface, req, glb_iface->output->ack, GLB_IDLE);
++	sched_queue_delayed_work(ptdev->scheduler, tick, 0);
++}
++
++/**
++ * panthor_sched_process_global_irq() - Process the scheduling part of a global IRQ
++ * @ptdev: Device.
++ */
++static void sched_process_global_irq_locked(struct panthor_device *ptdev)
++{
++	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
++	u32 req, ack, evts;
++
++	lockdep_assert_held(&ptdev->scheduler->lock);
++
++	req = READ_ONCE(glb_iface->input->req);
++	ack = READ_ONCE(glb_iface->output->ack);
++	evts = (req ^ ack) & GLB_EVT_MASK;
++
++	if (evts & GLB_IDLE)
++		sched_process_idle_event_locked(ptdev);
++}
++
++static void process_fw_events_work(struct work_struct *work)
++{
++	struct panthor_scheduler *sched = container_of(work, struct panthor_scheduler,
++						      fw_events_work);
++	u32 events = atomic_xchg(&sched->fw_events, 0);
++	struct panthor_device *ptdev = sched->ptdev;
++
++	mutex_lock(&sched->lock);
++
++	if (events & JOB_INT_GLOBAL_IF) {
++		sched_process_global_irq_locked(ptdev);
++		events &= ~JOB_INT_GLOBAL_IF;
++	}
++
++	while (events) {
++		u32 csg_id = ffs(events) - 1;
++
++		sched_process_csg_irq_locked(ptdev, csg_id);
++		events &= ~BIT(csg_id);
++	}
++
++	mutex_unlock(&sched->lock);
++}
++
++/**
++ * panthor_sched_report_fw_events() - Report FW events to the scheduler.
++ */
++void panthor_sched_report_fw_events(struct panthor_device *ptdev, u32 events)
++{
++	if (!ptdev->scheduler)
++		return;
++
++	atomic_or(events, &ptdev->scheduler->fw_events);
++	sched_queue_work(ptdev->scheduler, fw_events);
++}
++
++static const char *fence_get_driver_name(struct dma_fence *fence)
++{
++	return "panthor";
++}
++
++static const char *queue_fence_get_timeline_name(struct dma_fence *fence)
++{
++	return "queue-fence";
++}
++
++static const struct dma_fence_ops panthor_queue_fence_ops = {
++	.get_driver_name = fence_get_driver_name,
++	.get_timeline_name = queue_fence_get_timeline_name,
++};
++
++/**
++ */
++struct panthor_csg_slots_upd_ctx {
++	u32 update_mask;
++	u32 timedout_mask;
++	struct {
++		u32 value;
++		u32 mask;
++	} requests[MAX_CSGS];
++};
++
++static void csgs_upd_ctx_init(struct panthor_csg_slots_upd_ctx *ctx)
++{
++	memset(ctx, 0, sizeof(*ctx));
++}
++
++static void csgs_upd_ctx_queue_reqs(struct panthor_device *ptdev,
++				    struct panthor_csg_slots_upd_ctx *ctx,
++				    u32 csg_id, u32 value, u32 mask)
++{
++	if (drm_WARN_ON(&ptdev->base, !mask) ||
++	    drm_WARN_ON(&ptdev->base, csg_id >= ptdev->scheduler->csg_slot_count))
++		return;
++
++	ctx->requests[csg_id].value = (ctx->requests[csg_id].value & ~mask) | (value & mask);
++	ctx->requests[csg_id].mask |= mask;
++	ctx->update_mask |= BIT(csg_id);
++}
++
++static int csgs_upd_ctx_apply_locked(struct panthor_device *ptdev,
++				     struct panthor_csg_slots_upd_ctx *ctx)
++{
++	struct panthor_scheduler *sched = ptdev->scheduler;
++	u32 update_slots = ctx->update_mask;
++
++	lockdep_assert_held(&sched->lock);
++
++	if (!ctx->update_mask)
++		return 0;
++
++	while (update_slots) {
++		struct panthor_fw_csg_iface *csg_iface;
++		u32 csg_id = ffs(update_slots) - 1;
++
++		update_slots &= ~BIT(csg_id);
++		csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
++		panthor_fw_update_reqs(csg_iface, req,
++				       ctx->requests[csg_id].value,
++				       ctx->requests[csg_id].mask);
++	}
++
++	panthor_fw_ring_csg_doorbells(ptdev, ctx->update_mask);
++
++	update_slots = ctx->update_mask;
++	while (update_slots) {
++		struct panthor_fw_csg_iface *csg_iface;
++		u32 csg_id = ffs(update_slots) - 1;
++		u32 req_mask = ctx->requests[csg_id].mask, acked;
++		int ret;
++
++		update_slots &= ~BIT(csg_id);
++		csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
++
++		ret = panthor_fw_csg_wait_acks(ptdev, csg_id, req_mask, &acked, 100);
++
++		if (acked & CSG_ENDPOINT_CONFIG)
++			csg_slot_sync_priority_locked(ptdev, csg_id);
++
++		if (acked & CSG_STATE_MASK)
++			csg_slot_sync_state_locked(ptdev, csg_id);
++
++		if (acked & CSG_STATUS_UPDATE) {
++			csg_slot_sync_queues_state_locked(ptdev, csg_id);
++			csg_slot_sync_idle_state_locked(ptdev, csg_id);
++		}
++
++		if (ret && acked != req_mask &&
++		    ((csg_iface->input->req ^ csg_iface->output->ack) & req_mask) != 0) {
++			drm_err(&ptdev->base, "CSG %d update request timedout", csg_id);
++			ctx->timedout_mask |= BIT(csg_id);
++		}
++	}
++
++	if (ctx->timedout_mask)
++		return -ETIMEDOUT;
++
++	return 0;
++}
++
++struct panthor_sched_tick_ctx {
++	struct list_head old_groups[PANTHOR_CSG_PRIORITY_COUNT];
++	struct list_head groups[PANTHOR_CSG_PRIORITY_COUNT];
++	u32 idle_group_count;
++	u32 group_count;
++	enum panthor_csg_priority min_priority;
++	struct panthor_vm *vms[MAX_CS_PER_CSG];
++	u32 as_count;
++	bool immediate_tick;
++	u32 csg_upd_failed_mask;
++};
++
++static bool
++tick_ctx_is_full(const struct panthor_scheduler *sched,
++		 const struct panthor_sched_tick_ctx *ctx)
++{
++	return ctx->group_count == sched->csg_slot_count;
++}
++
++static bool
++group_is_idle(struct panthor_group *group)
++{
++	struct panthor_device *ptdev = group->ptdev;
++	u32 inactive_queues;
++
++	if (group->csg_id >= 0)
++		return ptdev->scheduler->csg_slots[group->csg_id].idle;
++
++	inactive_queues = group->idle_queues | group->blocked_queues;
++	return hweight32(inactive_queues) == group->queue_count;
++}
++
++static bool
++group_can_run(struct panthor_group *group)
++{
++	return group->state != PANTHOR_CS_GROUP_TERMINATED &&
++	       !group->destroyed && group->fatal_queues == 0 &&
++	       !group->timedout;
++}
++
++static void
++tick_ctx_pick_groups_from_list(const struct panthor_scheduler *sched,
++			       struct panthor_sched_tick_ctx *ctx,
++			       struct list_head *queue,
++			       bool skip_idle_groups,
++			       bool owned_by_tick_ctx)
++{
++	struct panthor_group *group, *tmp;
++
++	if (tick_ctx_is_full(sched, ctx))
++		return;
++
++	list_for_each_entry_safe(group, tmp, queue, run_node) {
++		u32 i;
++
++		if (!group_can_run(group))
++			continue;
++
++		if (skip_idle_groups && group_is_idle(group))
++			continue;
++
++		for (i = 0; i < ctx->as_count; i++) {
++			if (ctx->vms[i] == group->vm)
++				break;
++		}
++
++		if (i == ctx->as_count && ctx->as_count == sched->as_slot_count)
++			continue;
++
++		if (!owned_by_tick_ctx)
++			group_get(group);
++
++		list_move_tail(&group->run_node, &ctx->groups[group->priority]);
++		ctx->group_count++;
++		if (group_is_idle(group))
++			ctx->idle_group_count++;
++
++		if (i == ctx->as_count)
++			ctx->vms[ctx->as_count++] = group->vm;
++
++		if (ctx->min_priority > group->priority)
++			ctx->min_priority = group->priority;
++
++		if (tick_ctx_is_full(sched, ctx))
++			return;
++	}
++}
++
++static void
++tick_ctx_insert_old_group(struct panthor_scheduler *sched,
++			  struct panthor_sched_tick_ctx *ctx,
++			  struct panthor_group *group,
++			  bool full_tick)
++{
++	struct panthor_csg_slot *csg_slot = &sched->csg_slots[group->csg_id];
++	struct panthor_group *other_group;
++
++	if (!full_tick) {
++		list_add_tail(&group->run_node, &ctx->old_groups[group->priority]);
++		return;
++	}
++
++	/* Rotate to make sure groups with lower CSG slot
++	 * priorities have a chance to get a higher CSG slot
++	 * priority next time they get picked. This priority
++	 * has an impact on resource request ordering, so it's
++	 * important to make sure we don't let one group starve
++	 * all other groups with the same group priority.
++	 */
++	list_for_each_entry(other_group,
++			    &ctx->old_groups[csg_slot->group->priority],
++			    run_node) {
++		struct panthor_csg_slot *other_csg_slot = &sched->csg_slots[other_group->csg_id];
++
++		if (other_csg_slot->priority > csg_slot->priority) {
++			list_add_tail(&csg_slot->group->run_node, &other_group->run_node);
++			return;
++		}
++	}
++
++	list_add_tail(&group->run_node, &ctx->old_groups[group->priority]);
++}
++
++static void
++tick_ctx_init(struct panthor_scheduler *sched,
++	      struct panthor_sched_tick_ctx *ctx,
++	      bool full_tick)
++{
++	struct panthor_device *ptdev = sched->ptdev;
++	struct panthor_csg_slots_upd_ctx upd_ctx;
++	int ret;
++	u32 i;
++
++	memset(ctx, 0, sizeof(*ctx));
++	csgs_upd_ctx_init(&upd_ctx);
++
++	ctx->min_priority = PANTHOR_CSG_PRIORITY_COUNT;
++	for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
++		INIT_LIST_HEAD(&ctx->groups[i]);
++		INIT_LIST_HEAD(&ctx->old_groups[i]);
++	}
++
++	for (i = 0; i < sched->csg_slot_count; i++) {
++		struct panthor_csg_slot *csg_slot = &sched->csg_slots[i];
++		struct panthor_group *group = csg_slot->group;
++		struct panthor_fw_csg_iface *csg_iface;
++
++		if (!group)
++			continue;
++
++		csg_iface = panthor_fw_get_csg_iface(ptdev, i);
++		group_get(group);
++
++		/* If there was unhandled faults on the VM, force processing of
++		 * CSG IRQs, so we can flag the faulty queue.
++		 */
++		if (panthor_vm_has_unhandled_faults(group->vm)) {
++			sched_process_csg_irq_locked(ptdev, i);
++
++			/* No fatal fault reported, flag all queues as faulty. */
++			if (!group->fatal_queues)
++				group->fatal_queues |= GENMASK(group->queue_count - 1, 0);
++		}
++
++		tick_ctx_insert_old_group(sched, ctx, group, full_tick);
++		csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, i,
++					csg_iface->output->ack ^ CSG_STATUS_UPDATE,
++					CSG_STATUS_UPDATE);
++	}
++
++	ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
++	if (ret) {
++		panthor_device_schedule_reset(ptdev);
++		ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask;
++	}
++}
++
++#define NUM_INSTRS_PER_SLOT		16
++
++static void
++group_term_post_processing(struct panthor_group *group)
++{
++	struct panthor_job *job, *tmp;
++	LIST_HEAD(faulty_jobs);
++	bool cookie;
++	u32 i = 0;
++
++	if (drm_WARN_ON(&group->ptdev->base, group_can_run(group)))
++		return;
++
++	cookie = dma_fence_begin_signalling();
++	for (i = 0; i < group->queue_count; i++) {
++		struct panthor_queue *queue = group->queues[i];
++		struct panthor_syncobj_64b *syncobj;
++		int err;
++
++		if (group->fatal_queues & BIT(i))
++			err = -EINVAL;
++		else if (group->timedout)
++			err = -ETIMEDOUT;
++		else
++			err = -ECANCELED;
++
++		if (!queue)
++			continue;
++
++		spin_lock(&queue->fence_ctx.lock);
++		list_for_each_entry_safe(job, tmp, &queue->fence_ctx.in_flight_jobs, node) {
++			list_move_tail(&job->node, &faulty_jobs);
++			dma_fence_set_error(job->done_fence, err);
++			dma_fence_signal_locked(job->done_fence);
++		}
++		spin_unlock(&queue->fence_ctx.lock);
++
++		/* Manually update the syncobj seqno to unblock waiters. */
++		syncobj = group->syncobjs->kmap + (i * sizeof(*syncobj));
++		syncobj->status = ~0;
++		syncobj->seqno = atomic64_read(&queue->fence_ctx.seqno);
++		sched_queue_work(group->ptdev->scheduler, sync_upd);
++	}
++	dma_fence_end_signalling(cookie);
++
++	list_for_each_entry_safe(job, tmp, &faulty_jobs, node) {
++		list_del_init(&job->node);
++		panthor_job_put(&job->base);
++	}
++}
++
++static void group_term_work(struct work_struct *work)
++{
++	struct panthor_group *group =
++		container_of(work, struct panthor_group, term_work);
++
++	group_term_post_processing(group);
++	group_put(group);
++}
++
++static void
++tick_ctx_cleanup(struct panthor_scheduler *sched,
++		 struct panthor_sched_tick_ctx *ctx)
++{
++	struct panthor_group *group, *tmp;
++	u32 i;
++
++	for (i = 0; i < ARRAY_SIZE(ctx->old_groups); i++) {
++		list_for_each_entry_safe(group, tmp, &ctx->old_groups[i], run_node) {
++			/* If everything went fine, we should only have groups
++			 * to be terminated in the old_groups lists.
++			 */
++			drm_WARN_ON(&group->ptdev->base, !ctx->csg_upd_failed_mask &&
++				    group_can_run(group));
++
++			if (!group_can_run(group)) {
++				list_del_init(&group->run_node);
++				list_del_init(&group->wait_node);
++				group_queue_work(group, term);
++			} else if (group->csg_id >= 0) {
++				list_del_init(&group->run_node);
++			} else {
++				list_move(&group->run_node,
++					  group_is_idle(group) ?
++					  &sched->groups.idle[group->priority] :
++					  &sched->groups.runnable[group->priority]);
++			}
++			group_put(group);
++		}
++	}
++
++	for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
++		/* If everything went fine, the groups to schedule lists should
++		 * be empty.
++		 */
++		drm_WARN_ON(&group->ptdev->base,
++			    !ctx->csg_upd_failed_mask && !list_empty(&ctx->groups[i]));
++
++		list_for_each_entry_safe(group, tmp, &ctx->groups[i], run_node) {
++			if (group->csg_id >= 0) {
++				list_del_init(&group->run_node);
++			} else {
++				list_move(&group->run_node,
++					  group_is_idle(group) ?
++					  &sched->groups.idle[group->priority] :
++					  &sched->groups.runnable[group->priority]);
++			}
++			group_put(group);
++		}
++	}
++}
++
++static void
++tick_ctx_apply(struct panthor_scheduler *sched, struct panthor_sched_tick_ctx *ctx)
++{
++	struct panthor_group *group, *tmp;
++	struct panthor_device *ptdev = sched->ptdev;
++	struct panthor_csg_slot *csg_slot;
++	int prio, new_csg_prio = MAX_CSG_PRIO, i;
++	u32 csg_mod_mask = 0, free_csg_slots = 0;
++	struct panthor_csg_slots_upd_ctx upd_ctx;
++	int ret;
++
++	csgs_upd_ctx_init(&upd_ctx);
++
++	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
++		/* Suspend or terminate evicted groups. */
++		list_for_each_entry(group, &ctx->old_groups[prio], run_node) {
++			bool term = !group_can_run(group);
++			int csg_id = group->csg_id;
++
++			if (drm_WARN_ON(&ptdev->base, csg_id < 0))
++				continue;
++
++			csg_slot = &sched->csg_slots[csg_id];
++			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
++						term ? CSG_STATE_TERMINATE : CSG_STATE_SUSPEND,
++						CSG_STATE_MASK);
++		}
++
++		/* Update priorities on already running groups. */
++		list_for_each_entry(group, &ctx->groups[prio], run_node) {
++			struct panthor_fw_csg_iface *csg_iface;
++			int csg_id = group->csg_id;
++
++			if (csg_id < 0) {
++				new_csg_prio--;
++				continue;
++			}
++
++			csg_slot = &sched->csg_slots[csg_id];
++			csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
++			if (csg_slot->priority == new_csg_prio) {
++				new_csg_prio--;
++				continue;
++			}
++
++			panthor_fw_update_reqs(csg_iface, endpoint_req,
++					       CSG_EP_REQ_PRIORITY(new_csg_prio),
++					       CSG_EP_REQ_PRIORITY_MASK);
++			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
++						csg_iface->output->ack ^ CSG_ENDPOINT_CONFIG,
++						CSG_ENDPOINT_CONFIG);
++			new_csg_prio--;
++		}
++	}
++
++	ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
++	if (ret) {
++		panthor_device_schedule_reset(ptdev);
++		ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask;
++		return;
++	}
++
++	/* Unbind evicted groups. */
++	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
++		list_for_each_entry(group, &ctx->old_groups[prio], run_node) {
++			/* This group is gone. Process interrupts to clear
++			 * any pending interrupts before we start the new
++			 * group.
++			 */
++			if (group->csg_id >= 0)
++				sched_process_csg_irq_locked(ptdev, group->csg_id);
++
++			group_unbind_locked(group);
++		}
++	}
++
++	for (i = 0; i < sched->csg_slot_count; i++) {
++		if (!sched->csg_slots[i].group)
++			free_csg_slots |= BIT(i);
++	}
++
++	csgs_upd_ctx_init(&upd_ctx);
++	new_csg_prio = MAX_CSG_PRIO;
++
++	/* Start new groups. */
++	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
++		list_for_each_entry(group, &ctx->groups[prio], run_node) {
++			int csg_id = group->csg_id;
++			struct panthor_fw_csg_iface *csg_iface;
++
++			if (csg_id >= 0) {
++				new_csg_prio--;
++				continue;
++			}
++
++			csg_id = ffs(free_csg_slots) - 1;
++			if (drm_WARN_ON(&ptdev->base, csg_id < 0))
++				break;
++
++			csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
++			csg_slot = &sched->csg_slots[csg_id];
++			csg_mod_mask |= BIT(csg_id);
++			group_bind_locked(group, csg_id);
++			csg_slot_prog_locked(ptdev, csg_id, new_csg_prio--);
++			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
++						group->state == PANTHOR_CS_GROUP_SUSPENDED ?
++						CSG_STATE_RESUME : CSG_STATE_START,
++						CSG_STATE_MASK);
++			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
++						csg_iface->output->ack ^ CSG_ENDPOINT_CONFIG,
++						CSG_ENDPOINT_CONFIG);
++			free_csg_slots &= ~BIT(csg_id);
++		}
++	}
++
++	ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
++	if (ret) {
++		panthor_device_schedule_reset(ptdev);
++		ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask;
++		return;
++	}
++
++	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
++		list_for_each_entry_safe(group, tmp, &ctx->groups[prio], run_node) {
++			list_del_init(&group->run_node);
++
++			/* If the group has been destroyed while we were
++			 * scheduling, ask for an immediate tick to
++			 * re-evaluate as soon as possible and get rid of
++			 * this dangling group.
++			 */
++			if (group->destroyed)
++				ctx->immediate_tick = true;
++			group_put(group);
++		}
++
++		/* Return evicted groups to the idle or run queues. Groups
++		 * that can no longer be run (because they've been destroyed
++		 * or experienced an unrecoverable error) will be scheduled
++		 * for destruction in tick_ctx_cleanup().
++		 */
++		list_for_each_entry_safe(group, tmp, &ctx->old_groups[prio], run_node) {
++			if (!group_can_run(group))
++				continue;
++
++			if (group_is_idle(group))
++				list_move_tail(&group->run_node, &sched->groups.idle[prio]);
++			else
++				list_move_tail(&group->run_node, &sched->groups.runnable[prio]);
++			group_put(group);
++		}
++	}
++
++	sched->used_csg_slot_count = ctx->group_count;
++	sched->might_have_idle_groups = ctx->idle_group_count > 0;
++}
++
++static u64
++tick_ctx_update_resched_target(struct panthor_scheduler *sched,
++			       const struct panthor_sched_tick_ctx *ctx)
++{
++	/* We had space left, no need to reschedule until some external event happens. */
++	if (!tick_ctx_is_full(sched, ctx))
++		goto no_tick;
++
++	/* If idle groups were scheduled, no need to wake up until some external
++	 * event happens (group unblocked, new job submitted, ...).
++	 */
++	if (ctx->idle_group_count)
++		goto no_tick;
++
++	if (drm_WARN_ON(&sched->ptdev->base, ctx->min_priority >= PANTHOR_CSG_PRIORITY_COUNT))
++		goto no_tick;
++
++	/* If there are groups of the same priority waiting, we need to
++	 * keep the scheduler ticking, otherwise, we'll just wait for
++	 * new groups with higher priority to be queued.
++	 */
++	if (!list_empty(&sched->groups.runnable[ctx->min_priority])) {
++		u64 resched_target = sched->last_tick + sched->tick_period;
++
++		if (time_before64(sched->resched_target, sched->last_tick) ||
++		    time_before64(resched_target, sched->resched_target))
++			sched->resched_target = resched_target;
++
++		return sched->resched_target - sched->last_tick;
++	}
++
++no_tick:
++	sched->resched_target = U64_MAX;
++	return U64_MAX;
++}
++
++static void tick_work(struct work_struct *work)
++{
++	struct panthor_scheduler *sched = container_of(work, struct panthor_scheduler,
++						      tick_work.work);
++	struct panthor_device *ptdev = sched->ptdev;
++	struct panthor_sched_tick_ctx ctx;
++	u64 remaining_jiffies = 0, resched_delay;
++	u64 now = get_jiffies_64();
++	int prio, ret, cookie;
++
++	if (!drm_dev_enter(&ptdev->base, &cookie))
++		return;
++
++	ret = pm_runtime_resume_and_get(ptdev->base.dev);
++	if (drm_WARN_ON(&ptdev->base, ret))
++		goto out_dev_exit;
++
++	if (time_before64(now, sched->resched_target))
++		remaining_jiffies = sched->resched_target - now;
++
++	mutex_lock(&sched->lock);
++	if (panthor_device_reset_is_pending(sched->ptdev))
++		goto out_unlock;
++
++	tick_ctx_init(sched, &ctx, remaining_jiffies != 0);
++	if (ctx.csg_upd_failed_mask)
++		goto out_cleanup_ctx;
++
++	if (remaining_jiffies) {
++		/* Scheduling forced in the middle of a tick. Only RT groups
++		 * can preempt non-RT ones. Currently running RT groups can't be
++		 * preempted.
++		 */
++		for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1;
++		     prio >= 0 && !tick_ctx_is_full(sched, &ctx);
++		     prio--) {
++			tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio],
++						       true, true);
++			if (prio == PANTHOR_CSG_PRIORITY_RT) {
++				tick_ctx_pick_groups_from_list(sched, &ctx,
++							       &sched->groups.runnable[prio],
++							       true, false);
++			}
++		}
++	}
++
++	/* First pick non-idle groups */
++	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1;
++	     prio >= 0 && !tick_ctx_is_full(sched, &ctx);
++	     prio--) {
++		tick_ctx_pick_groups_from_list(sched, &ctx, &sched->groups.runnable[prio],
++					       true, false);
++		tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio], true, true);
++	}
++
++	/* If we have free CSG slots left, pick idle groups */
++	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1;
++	     prio >= 0 && !tick_ctx_is_full(sched, &ctx);
++	     prio--) {
++		/* Check the old_group queue first to avoid reprogramming the slots */
++		tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio], false, true);
++		tick_ctx_pick_groups_from_list(sched, &ctx, &sched->groups.idle[prio],
++					       false, false);
++	}
++
++	tick_ctx_apply(sched, &ctx);
++	if (ctx.csg_upd_failed_mask)
++		goto out_cleanup_ctx;
++
++	if (ctx.idle_group_count == ctx.group_count) {
++		panthor_devfreq_record_idle(sched->ptdev);
++		if (sched->pm.has_ref) {
++			pm_runtime_put_autosuspend(ptdev->base.dev);
++			sched->pm.has_ref = false;
++		}
++	} else {
++		panthor_devfreq_record_busy(sched->ptdev);
++		if (!sched->pm.has_ref) {
++			pm_runtime_get(ptdev->base.dev);
++			sched->pm.has_ref = true;
++		}
++	}
++
++	sched->last_tick = now;
++	resched_delay = tick_ctx_update_resched_target(sched, &ctx);
++	if (ctx.immediate_tick)
++		resched_delay = 0;
++
++	if (resched_delay != U64_MAX)
++		sched_queue_delayed_work(sched, tick, resched_delay);
++
++out_cleanup_ctx:
++	tick_ctx_cleanup(sched, &ctx);
++
++out_unlock:
++	mutex_unlock(&sched->lock);
++	pm_runtime_mark_last_busy(ptdev->base.dev);
++	pm_runtime_put_autosuspend(ptdev->base.dev);
++
++out_dev_exit:
++	drm_dev_exit(cookie);
++}
++
++static int panthor_queue_eval_syncwait(struct panthor_group *group, u8 queue_idx)
++{
++	struct panthor_queue *queue = group->queues[queue_idx];
++	union {
++		struct panthor_syncobj_64b sync64;
++		struct panthor_syncobj_32b sync32;
++	} *syncobj;
++	bool result;
++	u64 value;
++
++	syncobj = panthor_queue_get_syncwait_obj(group, queue);
++	if (!syncobj)
++		return -EINVAL;
++
++	value = queue->syncwait.sync64 ?
++		syncobj->sync64.seqno :
++		syncobj->sync32.seqno;
++
++	if (queue->syncwait.gt)
++		result = value > queue->syncwait.ref;
++	else
++		result = value <= queue->syncwait.ref;
++
++	if (result)
++		panthor_queue_put_syncwait_obj(queue);
++
++	return result;
++}
++
++static void sync_upd_work(struct work_struct *work)
++{
++	struct panthor_scheduler *sched = container_of(work,
++						      struct panthor_scheduler,
++						      sync_upd_work);
++	struct panthor_group *group, *tmp;
++	bool immediate_tick = false;
++
++	mutex_lock(&sched->lock);
++	list_for_each_entry_safe(group, tmp, &sched->groups.waiting, wait_node) {
++		u32 tested_queues = group->blocked_queues;
++		u32 unblocked_queues = 0;
++
++		while (tested_queues) {
++			u32 cs_id = ffs(tested_queues) - 1;
++			int ret;
++
++			ret = panthor_queue_eval_syncwait(group, cs_id);
++			drm_WARN_ON(&group->ptdev->base, ret < 0);
++			if (ret)
++				unblocked_queues |= BIT(cs_id);
++
++			tested_queues &= ~BIT(cs_id);
++		}
++
++		if (unblocked_queues) {
++			group->blocked_queues &= ~unblocked_queues;
++
++			if (group->csg_id < 0) {
++				list_move(&group->run_node,
++					  &sched->groups.runnable[group->priority]);
++				if (group->priority == PANTHOR_CSG_PRIORITY_RT)
++					immediate_tick = true;
++			}
++		}
++
++		if (!group->blocked_queues)
++			list_del_init(&group->wait_node);
++	}
++	mutex_unlock(&sched->lock);
++
++	if (immediate_tick)
++		sched_queue_delayed_work(sched, tick, 0);
++}
++
++static void group_schedule_locked(struct panthor_group *group, u32 queue_mask)
++{
++	struct panthor_device *ptdev = group->ptdev;
++	struct panthor_scheduler *sched = ptdev->scheduler;
++	struct list_head *queue = &sched->groups.runnable[group->priority];
++	u64 delay_jiffies = 0;
++	bool was_idle;
++	u64 now;
++
++	if (!group_can_run(group))
++		return;
++
++	/* All updated queues are blocked, no need to wake up the scheduler. */
++	if ((queue_mask & group->blocked_queues) == queue_mask)
++		return;
++
++	was_idle = group_is_idle(group);
++	group->idle_queues &= ~queue_mask;
++
++	/* Don't mess up with the lists if we're in a middle of a reset. */
++	if (atomic_read(&sched->reset.in_progress))
++		return;
++
++	if (was_idle && !group_is_idle(group))
++		list_move_tail(&group->run_node, queue);
++
++	/* RT groups are preemptive. */
++	if (group->priority == PANTHOR_CSG_PRIORITY_RT) {
++		sched_queue_delayed_work(sched, tick, 0);
++		return;
++	}
++
++	/* Some groups might be idle, force an immediate tick to
++	 * re-evaluate.
++	 */
++	if (sched->might_have_idle_groups) {
++		sched_queue_delayed_work(sched, tick, 0);
++		return;
++	}
++
++	/* Scheduler is ticking, nothing to do. */
++	if (sched->resched_target != U64_MAX) {
++		/* If there are free slots, force immediating ticking. */
++		if (sched->used_csg_slot_count < sched->csg_slot_count)
++			sched_queue_delayed_work(sched, tick, 0);
++
++		return;
++	}
++
++	/* Scheduler tick was off, recalculate the resched_target based on the
++	 * last tick event, and queue the scheduler work.
++	 */
++	now = get_jiffies_64();
++	sched->resched_target = sched->last_tick + sched->tick_period;
++	if (sched->used_csg_slot_count == sched->csg_slot_count &&
++	    time_before64(now, sched->resched_target))
++		delay_jiffies = min_t(unsigned long, sched->resched_target - now, ULONG_MAX);
++
++	sched_queue_delayed_work(sched, tick, delay_jiffies);
++}
++
++static void queue_stop(struct panthor_queue *queue,
++		       struct panthor_job *bad_job)
++{
++	drm_sched_stop(&queue->scheduler, bad_job ? &bad_job->base : NULL);
++}
++
++static void queue_start(struct panthor_queue *queue)
++{
++	struct panthor_job *job;
++
++	/* Re-assign the parent fences. */
++	list_for_each_entry(job, &queue->scheduler.pending_list, base.list)
++		job->base.s_fence->parent = dma_fence_get(job->done_fence);
++
++	drm_sched_start(&queue->scheduler, true);
++}
++
++static void panthor_group_stop(struct panthor_group *group)
++{
++	struct panthor_scheduler *sched = group->ptdev->scheduler;
++
++	lockdep_assert_held(&sched->reset.lock);
++
++	for (u32 i = 0; i < group->queue_count; i++)
++		queue_stop(group->queues[i], NULL);
++
++	group_get(group);
++	list_move_tail(&group->run_node, &sched->reset.stopped_groups);
++}
++
++static void panthor_group_start(struct panthor_group *group)
++{
++	struct panthor_scheduler *sched = group->ptdev->scheduler;
++
++	lockdep_assert_held(&group->ptdev->scheduler->reset.lock);
++
++	for (u32 i = 0; i < group->queue_count; i++)
++		queue_start(group->queues[i]);
++
++	if (group_can_run(group)) {
++		list_move_tail(&group->run_node,
++			       group_is_idle(group) ?
++			       &sched->groups.idle[group->priority] :
++			       &sched->groups.runnable[group->priority]);
++	} else {
++		list_del_init(&group->run_node);
++		list_del_init(&group->wait_node);
++		group_queue_work(group, term);
++	}
++
++	group_put(group);
++}
++
++static void panthor_sched_immediate_tick(struct panthor_device *ptdev)
++{
++	struct panthor_scheduler *sched = ptdev->scheduler;
++
++	sched_queue_delayed_work(sched, tick, 0);
++}
++
++/**
++ * panthor_sched_report_mmu_fault() - Report MMU faults to the scheduler.
++ */
++void panthor_sched_report_mmu_fault(struct panthor_device *ptdev)
++{
++	/* Force a tick to immediately kill faulty groups. */
++	if (ptdev->scheduler)
++		panthor_sched_immediate_tick(ptdev);
++}
++
++void panthor_sched_resume(struct panthor_device *ptdev)
++{
++	/* Force a tick to re-evaluate after a resume. */
++	panthor_sched_immediate_tick(ptdev);
++}
++
++void panthor_sched_suspend(struct panthor_device *ptdev)
++{
++	struct panthor_scheduler *sched = ptdev->scheduler;
++	struct panthor_csg_slots_upd_ctx upd_ctx;
++	u64 suspended_slots, faulty_slots;
++	struct panthor_group *group;
++	u32 i;
++
++	mutex_lock(&sched->lock);
++	csgs_upd_ctx_init(&upd_ctx);
++	for (i = 0; i < sched->csg_slot_count; i++) {
++		struct panthor_csg_slot *csg_slot = &sched->csg_slots[i];
++
++		if (csg_slot->group) {
++			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, i,
++						CSG_STATE_SUSPEND,
++						CSG_STATE_MASK);
++		}
++	}
++
++	suspended_slots = upd_ctx.update_mask;
++
++	csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
++	suspended_slots &= ~upd_ctx.timedout_mask;
++	faulty_slots = upd_ctx.timedout_mask;
++
++	if (faulty_slots) {
++		u32 slot_mask = faulty_slots;
++
++		drm_err(&ptdev->base, "CSG suspend failed, escalating to termination");
++		csgs_upd_ctx_init(&upd_ctx);
++		while (slot_mask) {
++			u32 csg_id = ffs(slot_mask) - 1;
++
++			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
++						CSG_STATE_TERMINATE,
++						CSG_STATE_MASK);
++			slot_mask &= ~BIT(csg_id);
++		}
++
++		csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
++
++		slot_mask = upd_ctx.timedout_mask;
++		while (slot_mask) {
++			u32 csg_id = ffs(slot_mask) - 1;
++			struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
++
++			/* Terminate command timedout, but the soft-reset will
++			 * automatically terminate all active groups, so let's
++			 * force the state to halted here.
++			 */
++			if (csg_slot->group->state != PANTHOR_CS_GROUP_TERMINATED)
++				csg_slot->group->state = PANTHOR_CS_GROUP_TERMINATED;
++			slot_mask &= ~BIT(csg_id);
++		}
++	}
++
++	/* Flush L2 and LSC caches to make sure suspend state is up-to-date.
++	 * If the flush fails, flag all queues for termination.
++	 */
++	if (suspended_slots) {
++		bool flush_caches_failed = false;
++		u32 slot_mask = suspended_slots;
++
++		if (panthor_gpu_flush_caches(ptdev, CACHE_CLEAN, CACHE_CLEAN, 0))
++			flush_caches_failed = true;
++
++		while (slot_mask) {
++			u32 csg_id = ffs(slot_mask) - 1;
++			struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
++
++			if (flush_caches_failed)
++				csg_slot->group->state = PANTHOR_CS_GROUP_TERMINATED;
++			else
++				csg_slot_sync_update_locked(ptdev, csg_id);
++
++			slot_mask &= ~BIT(csg_id);
++		}
++
++		if (flush_caches_failed)
++			faulty_slots |= suspended_slots;
++	}
++
++	for (i = 0; i < sched->csg_slot_count; i++) {
++		struct panthor_csg_slot *csg_slot = &sched->csg_slots[i];
++
++		group = csg_slot->group;
++		if (!group)
++			continue;
++
++		group_get(group);
++
++		if (group->csg_id >= 0)
++			sched_process_csg_irq_locked(ptdev, group->csg_id);
++
++		group_unbind_locked(group);
++
++		drm_WARN_ON(&group->ptdev->base, !list_empty(&group->run_node));
++
++		if (group_can_run(group)) {
++			list_add(&group->run_node,
++				 &sched->groups.idle[group->priority]);
++		} else {
++			/* We don't bother stopping the scheduler if the group is
++			 * faulty, the group termination work will finish the job.
++			 */
++			list_del_init(&group->wait_node);
++			group_queue_work(group, term);
++		}
++		group_put(group);
++	}
++	mutex_unlock(&sched->lock);
++}
++
++void panthor_sched_pre_reset(struct panthor_device *ptdev)
++{
++	struct panthor_scheduler *sched = ptdev->scheduler;
++	struct panthor_group *group, *group_tmp;
++	u32 i;
++
++	mutex_lock(&sched->reset.lock);
++	atomic_set(&sched->reset.in_progress, true);
++
++	/* Cancel all scheduler works. Once this is done, these works can't be
++	 * scheduled again until the reset operation is complete.
++	 */
++	cancel_work_sync(&sched->sync_upd_work);
++	cancel_delayed_work_sync(&sched->tick_work);
++
++	panthor_sched_suspend(ptdev);
++
++	/* Stop all groups that might still accept jobs, so we don't get passed
++	 * new jobs while we're resetting.
++	 */
++	for (i = 0; i < ARRAY_SIZE(sched->groups.runnable); i++) {
++		/* All groups should be in the idle lists. */
++		drm_WARN_ON(&ptdev->base, !list_empty(&sched->groups.runnable[i]));
++		list_for_each_entry_safe(group, group_tmp, &sched->groups.runnable[i], run_node)
++			panthor_group_stop(group);
++	}
++
++	for (i = 0; i < ARRAY_SIZE(sched->groups.idle); i++) {
++		list_for_each_entry_safe(group, group_tmp, &sched->groups.idle[i], run_node)
++			panthor_group_stop(group);
++	}
++
++	mutex_unlock(&sched->reset.lock);
++}
++
++void panthor_sched_post_reset(struct panthor_device *ptdev)
++{
++	struct panthor_scheduler *sched = ptdev->scheduler;
++	struct panthor_group *group, *group_tmp;
++
++	mutex_lock(&sched->reset.lock);
++
++	list_for_each_entry_safe(group, group_tmp, &sched->reset.stopped_groups, run_node)
++		panthor_group_start(group);
++
++	/* We're done resetting the GPU, clear the reset.in_progress bit so we can
++	 * kick the scheduler.
++	 */
++	atomic_set(&sched->reset.in_progress, false);
++	mutex_unlock(&sched->reset.lock);
++
++	sched_queue_delayed_work(sched, tick, 0);
++
++	sched_queue_work(sched, sync_upd);
++}
++
++static void group_sync_upd_work(struct work_struct *work)
++{
++	struct panthor_group *group =
++		container_of(work, struct panthor_group, sync_upd_work);
++	struct panthor_job *job, *job_tmp;
++	LIST_HEAD(done_jobs);
++	u32 queue_idx;
++	bool cookie;
++
++	cookie = dma_fence_begin_signalling();
++	for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) {
++		struct panthor_queue *queue = group->queues[queue_idx];
++		struct panthor_syncobj_64b *syncobj;
++
++		if (!queue)
++			continue;
++
++		syncobj = group->syncobjs->kmap + (queue_idx * sizeof(*syncobj));
++
++		spin_lock(&queue->fence_ctx.lock);
++		list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) {
++			if (!job->call_info.size)
++				continue;
++
++			if (syncobj->seqno < job->done_fence->seqno)
++				break;
++
++			list_move_tail(&job->node, &done_jobs);
++			dma_fence_signal_locked(job->done_fence);
++		}
++		spin_unlock(&queue->fence_ctx.lock);
++	}
++	dma_fence_end_signalling(cookie);
++
++	list_for_each_entry_safe(job, job_tmp, &done_jobs, node) {
++		list_del_init(&job->node);
++		panthor_job_put(&job->base);
++	}
++
++	group_put(group);
++}
++
++static struct dma_fence *
++queue_run_job(struct drm_sched_job *sched_job)
++{
++	struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
++	struct panthor_group *group = job->group;
++	struct panthor_queue *queue = group->queues[job->queue_idx];
++	struct panthor_device *ptdev = group->ptdev;
++	struct panthor_scheduler *sched = ptdev->scheduler;
++	u32 ringbuf_size = panthor_kernel_bo_size(queue->ringbuf);
++	u32 ringbuf_insert = queue->iface.input->insert & (ringbuf_size - 1);
++	u64 addr_reg = ptdev->csif_info.cs_reg_count -
++		       ptdev->csif_info.unpreserved_cs_reg_count;
++	u64 val_reg = addr_reg + 2;
++	u64 sync_addr = panthor_kernel_bo_gpuva(group->syncobjs) +
++			job->queue_idx * sizeof(struct panthor_syncobj_64b);
++	u32 waitall_mask = GENMASK(sched->sb_slot_count - 1, 0);
++	struct dma_fence *done_fence;
++	int ret;
++
++	u64 call_instrs[NUM_INSTRS_PER_SLOT] = {
++		/* MOV32 rX+2, cs.latest_flush */
++		(2ull << 56) | (val_reg << 48) | job->call_info.latest_flush,
++
++		/* FLUSH_CACHE2.clean_inv_all.no_wait.signal(0) rX+2 */
++		(36ull << 56) | (0ull << 48) | (val_reg << 40) | (0 << 16) | 0x233,
++
++		/* MOV48 rX:rX+1, cs.start */
++		(1ull << 56) | (addr_reg << 48) | job->call_info.start,
++
++		/* MOV32 rX+2, cs.size */
++		(2ull << 56) | (val_reg << 48) | job->call_info.size,
++
++		/* WAIT(0) => waits for FLUSH_CACHE2 instruction */
++		(3ull << 56) | (1 << 16),
++
++		/* CALL rX:rX+1, rX+2 */
++		(32ull << 56) | (addr_reg << 40) | (val_reg << 32),
++
++		/* MOV48 rX:rX+1, sync_addr */
++		(1ull << 56) | (addr_reg << 48) | sync_addr,
++
++		/* MOV48 rX+2, #1 */
++		(1ull << 56) | (val_reg << 48) | 1,
++
++		/* WAIT(all) */
++		(3ull << 56) | (waitall_mask << 16),
++
++		/* SYNC_ADD64.system_scope.propage_err.nowait rX:rX+1, rX+2*/
++		(51ull << 56) | (0ull << 48) | (addr_reg << 40) | (val_reg << 32) | (0 << 16) | 1,
++
++		/* ERROR_BARRIER, so we can recover from faults at job
++		 * boundaries.
++		 */
++		(47ull << 56),
++	};
++
++	/* Need to be cacheline aligned to please the prefetcher. */
++	static_assert(sizeof(call_instrs) % 64 == 0,
++		      "call_instrs is not aligned on a cacheline");
++
++	/* Stream size is zero, nothing to do => return a NULL fence and let
++	 * drm_sched signal the parent.
++	 */
++	if (!job->call_info.size)
++		return NULL;
++
++	ret = pm_runtime_resume_and_get(ptdev->base.dev);
++	if (drm_WARN_ON(&ptdev->base, ret))
++		return ERR_PTR(ret);
++
++	mutex_lock(&sched->lock);
++	if (!group_can_run(group)) {
++		done_fence = ERR_PTR(-ECANCELED);
++		goto out_unlock;
++	}
++
++	dma_fence_init(job->done_fence,
++		       &panthor_queue_fence_ops,
++		       &queue->fence_ctx.lock,
++		       queue->fence_ctx.id,
++		       atomic64_inc_return(&queue->fence_ctx.seqno));
++
++	memcpy(queue->ringbuf->kmap + ringbuf_insert,
++	       call_instrs, sizeof(call_instrs));
++
++	panthor_job_get(&job->base);
++	spin_lock(&queue->fence_ctx.lock);
++	list_add_tail(&job->node, &queue->fence_ctx.in_flight_jobs);
++	spin_unlock(&queue->fence_ctx.lock);
++
++	job->ringbuf.start = queue->iface.input->insert;
++	job->ringbuf.end = job->ringbuf.start + sizeof(call_instrs);
++
++	/* Make sure the ring buffer is updated before the INSERT
++	 * register.
++	 */
++	wmb();
++
++	queue->iface.input->extract = queue->iface.output->extract;
++	queue->iface.input->insert = job->ringbuf.end;
++
++	if (group->csg_id < 0) {
++		/* If the queue is blocked, we want to keep the timeout running, so we
++		 * can detect unbounded waits and kill the group when that happens.
++		 * Otherwise, we suspend the timeout so the time we spend waiting for
++		 * a CSG slot is not counted.
++		 */
++		if (!(group->blocked_queues & BIT(job->queue_idx)) &&
++		    !queue->timeout_suspended) {
++			queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler);
++			queue->timeout_suspended = true;
++		}
++
++		group_schedule_locked(group, BIT(job->queue_idx));
++	} else {
++		gpu_write(ptdev, CSF_DOORBELL(queue->doorbell_id), 1);
++		if (!sched->pm.has_ref &&
++		    !(group->blocked_queues & BIT(job->queue_idx))) {
++			pm_runtime_get(ptdev->base.dev);
++			sched->pm.has_ref = true;
++		}
++	}
++
++	done_fence = dma_fence_get(job->done_fence);
++
++out_unlock:
++	mutex_unlock(&sched->lock);
++	pm_runtime_mark_last_busy(ptdev->base.dev);
++	pm_runtime_put_autosuspend(ptdev->base.dev);
++
++	return done_fence;
++}
++
++static enum drm_gpu_sched_stat
++queue_timedout_job(struct drm_sched_job *sched_job)
++{
++	struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
++	struct panthor_group *group = job->group;
++	struct panthor_device *ptdev = group->ptdev;
++	struct panthor_scheduler *sched = ptdev->scheduler;
++	struct panthor_queue *queue = group->queues[job->queue_idx];
++
++	drm_warn(&ptdev->base, "job timeout\n");
++
++	drm_WARN_ON(&ptdev->base, atomic_read(&sched->reset.in_progress));
++
++	queue_stop(queue, job);
++
++	mutex_lock(&sched->lock);
++	group->timedout = true;
++	if (group->csg_id >= 0) {
++		sched_queue_delayed_work(ptdev->scheduler, tick, 0);
++	} else {
++		/* Remove from the run queues, so the scheduler can't
++		 * pick the group on the next tick.
++		 */
++		list_del_init(&group->run_node);
++		list_del_init(&group->wait_node);
++
++		group_queue_work(group, term);
++	}
++	mutex_unlock(&sched->lock);
++
++	queue_start(queue);
++
++	return DRM_GPU_SCHED_STAT_NOMINAL;
++}
++
++static void queue_free_job(struct drm_sched_job *sched_job)
++{
++	drm_sched_job_cleanup(sched_job);
++	panthor_job_put(sched_job);
++}
++
++static const struct drm_sched_backend_ops panthor_queue_sched_ops = {
++	.run_job = queue_run_job,
++	.timedout_job = queue_timedout_job,
++	.free_job = queue_free_job,
++};
++
++static struct panthor_queue *
++group_create_queue(struct panthor_group *group,
++		   const struct drm_panthor_queue_create *args)
++{
++	struct drm_gpu_scheduler *drm_sched;
++	struct panthor_queue *queue;
++	int ret;
++
++	if (args->pad[0] || args->pad[1] || args->pad[2])
++		return ERR_PTR(-EINVAL);
++
++	if (args->ringbuf_size < SZ_4K || args->ringbuf_size > SZ_64K ||
++	    !is_power_of_2(args->ringbuf_size))
++		return ERR_PTR(-EINVAL);
++
++	if (args->priority > CSF_MAX_QUEUE_PRIO)
++		return ERR_PTR(-EINVAL);
++
++	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
++	if (!queue)
++		return ERR_PTR(-ENOMEM);
++
++	queue->fence_ctx.id = dma_fence_context_alloc(1);
++	spin_lock_init(&queue->fence_ctx.lock);
++	INIT_LIST_HEAD(&queue->fence_ctx.in_flight_jobs);
++
++	queue->priority = args->priority;
++
++	queue->ringbuf = panthor_kernel_bo_create(group->ptdev, group->vm,
++						  args->ringbuf_size,
++						  DRM_PANTHOR_BO_NO_MMAP,
++						  DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
++						  DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
++						  PANTHOR_VM_KERNEL_AUTO_VA);
++	if (IS_ERR(queue->ringbuf)) {
++		ret = PTR_ERR(queue->ringbuf);
++		goto err_free_queue;
++	}
++
++	ret = panthor_kernel_bo_vmap(queue->ringbuf);
++	if (ret)
++		goto err_free_queue;
++
++	queue->iface.mem = panthor_fw_alloc_queue_iface_mem(group->ptdev,
++							    &queue->iface.input,
++							    &queue->iface.output,
++							    &queue->iface.input_fw_va,
++							    &queue->iface.output_fw_va);
++	if (IS_ERR(queue->iface.mem)) {
++		ret = PTR_ERR(queue->iface.mem);
++		goto err_free_queue;
++	}
++
++	ret = drm_sched_init(&queue->scheduler, &panthor_queue_sched_ops,
++			     group->ptdev->scheduler->wq, 1,
++			     args->ringbuf_size / (NUM_INSTRS_PER_SLOT * sizeof(u64)),
++			     0, msecs_to_jiffies(JOB_TIMEOUT_MS),
++			     group->ptdev->reset.wq,
++			     NULL, "panthor-queue", group->ptdev->base.dev);
++	if (ret)
++		goto err_free_queue;
++
++	drm_sched = &queue->scheduler;
++	ret = drm_sched_entity_init(&queue->entity, 0, &drm_sched, 1, NULL);
++
++	return queue;
++
++err_free_queue:
++	group_free_queue(group, queue);
++	return ERR_PTR(ret);
++}
++
++#define MAX_GROUPS_PER_POOL		128
++
++int panthor_group_create(struct panthor_file *pfile,
++			 const struct drm_panthor_group_create *group_args,
++			 const struct drm_panthor_queue_create *queue_args)
++{
++	struct panthor_device *ptdev = pfile->ptdev;
++	struct panthor_group_pool *gpool = pfile->groups;
++	struct panthor_scheduler *sched = ptdev->scheduler;
++	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, 0);
++	struct panthor_group *group = NULL;
++	u32 gid, i, suspend_size;
++	int ret;
++
++	if (group_args->pad)
++		return -EINVAL;
++
++	if (group_args->priority > PANTHOR_CSG_PRIORITY_HIGH)
++		return -EINVAL;
++
++	if ((group_args->compute_core_mask & ~ptdev->gpu_info.shader_present) ||
++	    (group_args->fragment_core_mask & ~ptdev->gpu_info.shader_present) ||
++	    (group_args->tiler_core_mask & ~ptdev->gpu_info.tiler_present))
++		return -EINVAL;
++
++	if (hweight64(group_args->compute_core_mask) < group_args->max_compute_cores ||
++	    hweight64(group_args->fragment_core_mask) < group_args->max_fragment_cores ||
++	    hweight64(group_args->tiler_core_mask) < group_args->max_tiler_cores)
++		return -EINVAL;
++
++	group = kzalloc(sizeof(*group), GFP_KERNEL);
++	if (!group)
++		return -ENOMEM;
++
++	spin_lock_init(&group->fatal_lock);
++	kref_init(&group->refcount);
++	group->state = PANTHOR_CS_GROUP_CREATED;
++	group->csg_id = -1;
++
++	group->ptdev = ptdev;
++	group->max_compute_cores = group_args->max_compute_cores;
++	group->compute_core_mask = group_args->compute_core_mask;
++	group->max_fragment_cores = group_args->max_fragment_cores;
++	group->fragment_core_mask = group_args->fragment_core_mask;
++	group->max_tiler_cores = group_args->max_tiler_cores;
++	group->tiler_core_mask = group_args->tiler_core_mask;
++	group->priority = group_args->priority;
++
++	INIT_LIST_HEAD(&group->wait_node);
++	INIT_LIST_HEAD(&group->run_node);
++	INIT_WORK(&group->term_work, group_term_work);
++	INIT_WORK(&group->sync_upd_work, group_sync_upd_work);
++	INIT_WORK(&group->tiler_oom_work, group_tiler_oom_work);
++	INIT_WORK(&group->release_work, group_release_work);
++
++	group->vm = panthor_vm_pool_get_vm(pfile->vms, group_args->vm_id);
++	if (!group->vm) {
++		ret = -EINVAL;
++		goto err_put_group;
++	}
++
++	suspend_size = csg_iface->control->suspend_size;
++	group->suspend_buf = panthor_fw_alloc_suspend_buf_mem(ptdev, suspend_size);
++	if (IS_ERR(group->suspend_buf)) {
++		ret = PTR_ERR(group->suspend_buf);
++		group->suspend_buf = NULL;
++		goto err_put_group;
++	}
++
++	suspend_size = csg_iface->control->protm_suspend_size;
++	group->protm_suspend_buf = panthor_fw_alloc_suspend_buf_mem(ptdev, suspend_size);
++	if (IS_ERR(group->protm_suspend_buf)) {
++		ret = PTR_ERR(group->protm_suspend_buf);
++		group->protm_suspend_buf = NULL;
++		goto err_put_group;
++	}
++
++	group->syncobjs = panthor_kernel_bo_create(ptdev, group->vm,
++						   group_args->queues.count *
++						   sizeof(struct panthor_syncobj_64b),
++						   DRM_PANTHOR_BO_NO_MMAP,
++						   DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
++						   DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
++						   PANTHOR_VM_KERNEL_AUTO_VA);
++	if (IS_ERR(group->syncobjs)) {
++		ret = PTR_ERR(group->syncobjs);
++		goto err_put_group;
++	}
++
++	ret = panthor_kernel_bo_vmap(group->syncobjs);
++	if (ret)
++		goto err_put_group;
++
++	memset(group->syncobjs->kmap, 0,
++	       group_args->queues.count * sizeof(struct panthor_syncobj_64b));
++
++	for (i = 0; i < group_args->queues.count; i++) {
++		group->queues[i] = group_create_queue(group, &queue_args[i]);
++		if (IS_ERR(group->queues[i])) {
++			ret = PTR_ERR(group->queues[i]);
++			group->queues[i] = NULL;
++			goto err_put_group;
++		}
++
++		group->queue_count++;
++	}
++
++	group->idle_queues = GENMASK(group->queue_count - 1, 0);
++
++	ret = xa_alloc(&gpool->xa, &gid, group, XA_LIMIT(1, MAX_GROUPS_PER_POOL), GFP_KERNEL);
++	if (ret)
++		goto err_put_group;
++
++	mutex_lock(&sched->reset.lock);
++	if (atomic_read(&sched->reset.in_progress)) {
++		panthor_group_stop(group);
++	} else {
++		mutex_lock(&sched->lock);
++		list_add_tail(&group->run_node,
++			      &sched->groups.idle[group->priority]);
++		mutex_unlock(&sched->lock);
++	}
++	mutex_unlock(&sched->reset.lock);
++
++	return gid;
++
++err_put_group:
++	group_put(group);
++	return ret;
++}
++
++int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle)
++{
++	struct panthor_group_pool *gpool = pfile->groups;
++	struct panthor_device *ptdev = pfile->ptdev;
++	struct panthor_scheduler *sched = ptdev->scheduler;
++	struct panthor_group *group;
++
++	group = xa_erase(&gpool->xa, group_handle);
++	if (!group)
++		return -EINVAL;
++
++	for (u32 i = 0; i < group->queue_count; i++) {
++		if (group->queues[i])
++			drm_sched_entity_destroy(&group->queues[i]->entity);
++	}
++
++	mutex_lock(&sched->reset.lock);
++	mutex_lock(&sched->lock);
++	group->destroyed = true;
++	if (group->csg_id >= 0) {
++		sched_queue_delayed_work(sched, tick, 0);
++	} else if (!atomic_read(&sched->reset.in_progress)) {
++		/* Remove from the run queues, so the scheduler can't
++		 * pick the group on the next tick.
++		 */
++		list_del_init(&group->run_node);
++		list_del_init(&group->wait_node);
++		group_queue_work(group, term);
++	}
++	mutex_unlock(&sched->lock);
++	mutex_unlock(&sched->reset.lock);
++
++	group_put(group);
++	return 0;
++}
++
++int panthor_group_get_state(struct panthor_file *pfile,
++			    struct drm_panthor_group_get_state *get_state)
++{
++	struct panthor_group_pool *gpool = pfile->groups;
++	struct panthor_device *ptdev = pfile->ptdev;
++	struct panthor_scheduler *sched = ptdev->scheduler;
++	struct panthor_group *group;
++
++	if (get_state->pad)
++		return -EINVAL;
++
++	group = group_get(xa_load(&gpool->xa, get_state->group_handle));
++	if (!group)
++		return -EINVAL;
++
++	memset(get_state, 0, sizeof(*get_state));
++
++	mutex_lock(&sched->lock);
++	if (group->timedout)
++		get_state->state |= DRM_PANTHOR_GROUP_STATE_TIMEDOUT;
++	if (group->fatal_queues) {
++		get_state->state |= DRM_PANTHOR_GROUP_STATE_FATAL_FAULT;
++		get_state->fatal_queues = group->fatal_queues;
++	}
++	mutex_unlock(&sched->lock);
++
++	group_put(group);
++	return 0;
++}
++
++int panthor_group_pool_create(struct panthor_file *pfile)
++{
++	struct panthor_group_pool *gpool;
++
++	gpool = kzalloc(sizeof(*gpool), GFP_KERNEL);
++	if (!gpool)
++		return -ENOMEM;
++
++	xa_init_flags(&gpool->xa, XA_FLAGS_ALLOC1);
++	pfile->groups = gpool;
++	return 0;
++}
++
++void panthor_group_pool_destroy(struct panthor_file *pfile)
++{
++	struct panthor_group_pool *gpool = pfile->groups;
++	struct panthor_group *group;
++	unsigned long i;
++
++	if (IS_ERR_OR_NULL(gpool))
++		return;
++
++	xa_for_each(&gpool->xa, i, group)
++		panthor_group_destroy(pfile, i);
++
++	xa_destroy(&gpool->xa);
++	kfree(gpool);
++	pfile->groups = NULL;
++}
++
++static void job_release(struct kref *ref)
++{
++	struct panthor_job *job = container_of(ref, struct panthor_job, refcount);
++
++	drm_WARN_ON(&job->group->ptdev->base, !list_empty(&job->node));
++
++	if (job->base.s_fence)
++		drm_sched_job_cleanup(&job->base);
++
++	if (job->done_fence && job->done_fence->ops)
++		dma_fence_put(job->done_fence);
++	else
++		dma_fence_free(job->done_fence);
++
++	group_put(job->group);
++
++	kfree(job);
++}
++
++struct drm_sched_job *panthor_job_get(struct drm_sched_job *sched_job)
++{
++	if (sched_job) {
++		struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
++
++		kref_get(&job->refcount);
++	}
++
++	return sched_job;
++}
++
++void panthor_job_put(struct drm_sched_job *sched_job)
++{
++	struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
++
++	if (sched_job)
++		kref_put(&job->refcount, job_release);
++}
++
++struct panthor_vm *panthor_job_vm(struct drm_sched_job *sched_job)
++{
++	struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
++
++	return job->group->vm;
++}
++
++struct drm_sched_job *
++panthor_job_create(struct panthor_file *pfile,
++		   u16 group_handle,
++		   const struct drm_panthor_queue_submit *qsubmit)
++{
++	struct panthor_group_pool *gpool = pfile->groups;
++	struct panthor_job *job;
++	int ret;
++
++	if (qsubmit->pad)
++		return ERR_PTR(-EINVAL);
++
++	/* If stream_addr is zero, so stream_size should be. */
++	if ((qsubmit->stream_size == 0) != (qsubmit->stream_addr == 0))
++		return ERR_PTR(-EINVAL);
++
++	/* Make sure the address is aligned on 64-byte (cacheline) and the size is
++	 * aligned on 8-byte (instruction size).
++	 */
++	if ((qsubmit->stream_addr & 63) || (qsubmit->stream_size & 7))
++		return ERR_PTR(-EINVAL);
++
++	/* bits 24:30 must be zero. */
++	if (qsubmit->latest_flush & GENMASK(30, 24))
++		return ERR_PTR(-EINVAL);
++
++	job = kzalloc(sizeof(*job), GFP_KERNEL);
++	if (!job)
++		return ERR_PTR(-ENOMEM);
++
++	kref_init(&job->refcount);
++	job->queue_idx = qsubmit->queue_index;
++	job->call_info.size = qsubmit->stream_size;
++	job->call_info.start = qsubmit->stream_addr;
++	job->call_info.latest_flush = qsubmit->latest_flush;
++	INIT_LIST_HEAD(&job->node);
++
++	job->group = group_get(xa_load(&gpool->xa, group_handle));
++	if (!job->group) {
++		ret = -EINVAL;
++		goto err_put_job;
++	}
++
++	if (job->queue_idx >= job->group->queue_count ||
++	    !job->group->queues[job->queue_idx]) {
++		ret = -EINVAL;
++		goto err_put_job;
++	}
++
++	job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL);
++	if (!job->done_fence) {
++		ret = -ENOMEM;
++		goto err_put_job;
++	}
++
++	ret = drm_sched_job_init(&job->base,
++				 &job->group->queues[job->queue_idx]->entity,
++				 1, job->group);
++	if (ret)
++		goto err_put_job;
++
++	return &job->base;
++
++err_put_job:
++	panthor_job_put(&job->base);
++	return ERR_PTR(ret);
++}
++
++void panthor_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *sched_job)
++{
++	struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
++
++	/* Still not sure why we want USAGE_WRITE for external objects, since I
++	 * was assuming this would be handled through explicit syncs being imported
++	 * to external BOs with DMA_BUF_IOCTL_IMPORT_SYNC_FILE, but other drivers
++	 * seem to pass DMA_RESV_USAGE_WRITE, so there must be a good reason.
++	 */
++	panthor_vm_update_resvs(job->group->vm, exec, &sched_job->s_fence->finished,
++				DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_WRITE);
++}
++
++void panthor_sched_unplug(struct panthor_device *ptdev)
++{
++	struct panthor_scheduler *sched = ptdev->scheduler;
++
++	cancel_delayed_work_sync(&sched->tick_work);
++
++	mutex_lock(&sched->lock);
++	if (sched->pm.has_ref) {
++		pm_runtime_put(ptdev->base.dev);
++		sched->pm.has_ref = false;
++	}
++	mutex_unlock(&sched->lock);
++}
++
++static void panthor_sched_fini(struct drm_device *ddev, void *res)
++{
++	struct panthor_scheduler *sched = res;
++	int prio;
++
++	if (!sched || !sched->csg_slot_count)
++		return;
++
++	cancel_delayed_work_sync(&sched->tick_work);
++
++	if (sched->wq)
++		destroy_workqueue(sched->wq);
++
++	if (sched->heap_alloc_wq)
++		destroy_workqueue(sched->heap_alloc_wq);
++
++	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
++		drm_WARN_ON(ddev, !list_empty(&sched->groups.runnable[prio]));
++		drm_WARN_ON(ddev, !list_empty(&sched->groups.idle[prio]));
++	}
++
++	drm_WARN_ON(ddev, !list_empty(&sched->groups.waiting));
++}
++
++int panthor_sched_init(struct panthor_device *ptdev)
++{
++	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
++	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, 0);
++	struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, 0, 0);
++	struct panthor_scheduler *sched;
++	u32 gpu_as_count, num_groups;
++	int prio, ret;
++
++	sched = drmm_kzalloc(&ptdev->base, sizeof(*sched), GFP_KERNEL);
++	if (!sched)
++		return -ENOMEM;
++
++	/* The highest bit in JOB_INT_* is reserved for globabl IRQs. That
++	 * leaves 31 bits for CSG IRQs, hence the MAX_CSGS clamp here.
++	 */
++	num_groups = min_t(u32, MAX_CSGS, glb_iface->control->group_num);
++
++	/* The FW-side scheduler might deadlock if two groups with the same
++	 * priority try to access a set of resources that overlaps, with part
++	 * of the resources being allocated to one group and the other part to
++	 * the other group, both groups waiting for the remaining resources to
++	 * be allocated. To avoid that, it is recommended to assign each CSG a
++	 * different priority. In theory we could allow several groups to have
++	 * the same CSG priority if they don't request the same resources, but
++	 * that makes the scheduling logic more complicated, so let's clamp
++	 * the number of CSG slots to MAX_CSG_PRIO + 1 for now.
++	 */
++	num_groups = min_t(u32, MAX_CSG_PRIO + 1, num_groups);
++
++	/* We need at least one AS for the MCU and one for the GPU contexts. */
++	gpu_as_count = hweight32(ptdev->gpu_info.as_present & GENMASK(31, 1));
++	if (!gpu_as_count) {
++		drm_err(&ptdev->base, "Not enough AS (%d, expected at least 2)",
++			gpu_as_count + 1);
++		return -EINVAL;
++	}
++
++	sched->ptdev = ptdev;
++	sched->sb_slot_count = CS_FEATURES_SCOREBOARDS(cs_iface->control->features);
++	sched->csg_slot_count = num_groups;
++	sched->cs_slot_count = csg_iface->control->stream_num;
++	sched->as_slot_count = gpu_as_count;
++	ptdev->csif_info.csg_slot_count = sched->csg_slot_count;
++	ptdev->csif_info.cs_slot_count = sched->cs_slot_count;
++	ptdev->csif_info.scoreboard_slot_count = sched->sb_slot_count;
++
++	sched->last_tick = 0;
++	sched->resched_target = U64_MAX;
++	sched->tick_period = msecs_to_jiffies(10);
++	INIT_DELAYED_WORK(&sched->tick_work, tick_work);
++	INIT_WORK(&sched->sync_upd_work, sync_upd_work);
++	INIT_WORK(&sched->fw_events_work, process_fw_events_work);
++
++	ret = drmm_mutex_init(&ptdev->base, &sched->lock);
++	if (ret)
++		return ret;
++
++	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
++		INIT_LIST_HEAD(&sched->groups.runnable[prio]);
++		INIT_LIST_HEAD(&sched->groups.idle[prio]);
++	}
++	INIT_LIST_HEAD(&sched->groups.waiting);
++
++	ret = drmm_mutex_init(&ptdev->base, &sched->reset.lock);
++	if (ret)
++		return ret;
++
++	INIT_LIST_HEAD(&sched->reset.stopped_groups);
++
++	/* sched->heap_alloc_wq will be used for heap chunk allocation on
++	 * tiler OOM events, which means we can't use the same workqueue for
++	 * the scheduler because works queued by the scheduler are in
++	 * the dma-signalling path. Allocate a dedicated heap_alloc_wq to
++	 * work around this limitation.
++	 *
++	 * FIXME: Ultimately, what we need is a failable/non-blocking GEM
++	 * allocation path that we can call when a heap OOM is reported. The
++	 * FW is smart enough to fall back on other methods if the kernel can't
++	 * allocate memory, and fail the tiling job if none of these
++	 * countermeasures worked.
++	 *
++	 * Set WQ_MEM_RECLAIM on sched->wq to unblock the situation when the
++	 * system is running out of memory.
++	 */
++	sched->heap_alloc_wq = alloc_workqueue("panthor-heap-alloc", WQ_UNBOUND, 0);
++	sched->wq = alloc_workqueue("panthor-csf-sched", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
++	if (!sched->wq || !sched->heap_alloc_wq) {
++		panthor_sched_fini(&ptdev->base, sched);
++		drm_err(&ptdev->base, "Failed to allocate the workqueues");
++		return -ENOMEM;
++	}
++
++	ret = drmm_add_action_or_reset(&ptdev->base, panthor_sched_fini, sched);
++	if (ret)
++		return ret;
++
++	ptdev->scheduler = sched;
++	return 0;
++}
+--- /dev/null
++++ b/drivers/gpu/drm/panthor/panthor_sched.h
+@@ -0,0 +1,50 @@
++/* SPDX-License-Identifier: GPL-2.0 or MIT */
++/* Copyright 2023 Collabora ltd. */
++
++#ifndef __PANTHOR_SCHED_H__
++#define __PANTHOR_SCHED_H__
++
++struct drm_exec;
++struct dma_fence;
++struct drm_file;
++struct drm_gem_object;
++struct drm_sched_job;
++struct drm_panthor_group_create;
++struct drm_panthor_queue_create;
++struct drm_panthor_group_get_state;
++struct drm_panthor_queue_submit;
++struct panthor_device;
++struct panthor_file;
++struct panthor_group_pool;
++struct panthor_job;
++
++int panthor_group_create(struct panthor_file *pfile,
++			 const struct drm_panthor_group_create *group_args,
++			 const struct drm_panthor_queue_create *queue_args);
++int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle);
++int panthor_group_get_state(struct panthor_file *pfile,
++			    struct drm_panthor_group_get_state *get_state);
++
++struct drm_sched_job *
++panthor_job_create(struct panthor_file *pfile,
++		   u16 group_handle,
++		   const struct drm_panthor_queue_submit *qsubmit);
++struct drm_sched_job *panthor_job_get(struct drm_sched_job *job);
++struct panthor_vm *panthor_job_vm(struct drm_sched_job *sched_job);
++void panthor_job_put(struct drm_sched_job *job);
++void panthor_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *job);
++
++int panthor_group_pool_create(struct panthor_file *pfile);
++void panthor_group_pool_destroy(struct panthor_file *pfile);
++
++int panthor_sched_init(struct panthor_device *ptdev);
++void panthor_sched_unplug(struct panthor_device *ptdev);
++void panthor_sched_pre_reset(struct panthor_device *ptdev);
++void panthor_sched_post_reset(struct panthor_device *ptdev);
++void panthor_sched_suspend(struct panthor_device *ptdev);
++void panthor_sched_resume(struct panthor_device *ptdev);
++
++void panthor_sched_report_mmu_fault(struct panthor_device *ptdev);
++void panthor_sched_report_fw_events(struct panthor_device *ptdev, u32 events);
++
++#endif
diff --git a/patches-6.6/034-40-v6.10-drm-panthor-Add-the-driver-frontend-block.patch b/patches-6.6/034-40-v6.10-drm-panthor-Add-the-driver-frontend-block.patch
new file mode 100644
index 0000000..7a798a5
--- /dev/null
+++ b/patches-6.6/034-40-v6.10-drm-panthor-Add-the-driver-frontend-block.patch
@@ -0,0 +1,1534 @@
+From 4bdca11507928a4c9174e9b7240e9d058c12a71d Mon Sep 17 00:00:00 2001
+From: Boris Brezillon <boris.brezillon@collabora.com>
+Date: Thu, 29 Feb 2024 17:22:25 +0100
+Subject: [PATCH] drm/panthor: Add the driver frontend block
+
+This is the last piece missing to expose the driver to the outside
+world.
+
+This is basically a wrapper between the ioctls and the other logical
+blocks.
+
+v6:
+- Add Maxime's and Heiko's acks
+- Return a page-aligned BO size to userspace
+- Keep header inclusion alphabetically ordered
+
+v5:
+- Account for the drm_exec_init() prototype change
+- Include platform_device.h
+
+v4:
+- Add an ioctl to let the UMD query the VM state
+- Fix kernel doc
+- Let panthor_device_init() call panthor_device_init()
+- Fix cleanup ordering in the panthor_init() error path
+- Add Steve's and Liviu's R-b
+
+v3:
+- Add acks for the MIT/GPL2 relicensing
+- Fix 32-bit support
+- Account for panthor_vm and panthor_sched changes
+- Simplify the resv preparation/update logic
+- Use a linked list rather than xarray for list of signals.
+- Simplify panthor_get_uobj_array by returning the newly allocated
+  array.
+- Drop the "DOC" for job submission helpers and move the relevant
+  comments to panthor_ioctl_group_submit().
+- Add helpers sync_op_is_signal()/sync_op_is_wait().
+- Simplify return type of panthor_submit_ctx_add_sync_signal() and
+  panthor_submit_ctx_get_sync_signal().
+- Drop WARN_ON from panthor_submit_ctx_add_job().
+- Fix typos in comments.
+
+Co-developed-by: Steven Price <steven.price@arm.com>
+Signed-off-by: Steven Price <steven.price@arm.com>
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Acked-by: Steven Price <steven.price@arm.com> # MIT+GPL2 relicensing,Arm
+Acked-by: Grant Likely <grant.likely@linaro.org> # MIT+GPL2 relicensing,Linaro
+Acked-by: Boris Brezillon <boris.brezillon@collabora.com> # MIT+GPL2 relicensing,Collabora
+Reviewed-by: Steven Price <steven.price@arm.com>
+Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
+Acked-by: Maxime Ripard <mripard@kernel.org>
+Acked-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-12-boris.brezillon@collabora.com
+---
+ drivers/gpu/drm/panthor/panthor_drv.c | 1473 +++++++++++++++++++++++++
+ 1 file changed, 1473 insertions(+)
+ create mode 100644 drivers/gpu/drm/panthor/panthor_drv.c
+
+--- /dev/null
++++ b/drivers/gpu/drm/panthor/panthor_drv.c
+@@ -0,0 +1,1472 @@
++// SPDX-License-Identifier: GPL-2.0 or MIT
++/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
++/* Copyright 2019 Linaro, Ltd., Rob Herring <robh@kernel.org> */
++/* Copyright 2019 Collabora ltd. */
++
++#include <linux/list.h>
++#include <linux/module.h>
++#include <linux/of_platform.h>
++#include <linux/pagemap.h>
++#include <linux/platform_device.h>
++#include <linux/pm_runtime.h>
++
++#include <drm/drm_debugfs.h>
++#include <drm/drm_drv.h>
++#include <drm/drm_exec.h>
++#include <drm/drm_ioctl.h>
++#include <drm/drm_syncobj.h>
++#include <drm/drm_utils.h>
++#include <drm/gpu_scheduler.h>
++#include <drm/panthor_drm.h>
++
++#include "panthor_device.h"
++#include "panthor_fw.h"
++#include "panthor_gem.h"
++#include "panthor_gpu.h"
++#include "panthor_heap.h"
++#include "panthor_mmu.h"
++#include "panthor_regs.h"
++#include "panthor_sched.h"
++
++/**
++ * DOC: user <-> kernel object copy helpers.
++ */
++
++/**
++ * panthor_set_uobj() - Copy kernel object to user object.
++ * @usr_ptr: Users pointer.
++ * @usr_size: Size of the user object.
++ * @min_size: Minimum size for this object.
++ * @kern_size: Size of the kernel object.
++ * @in: Address of the kernel object to copy.
++ *
++ * Helper automating kernel -> user object copies.
++ *
++ * Don't use this function directly, use PANTHOR_UOBJ_SET() instead.
++ *
++ * Return: 0 on success, a negative error code otherwise.
++ */
++static int
++panthor_set_uobj(u64 usr_ptr, u32 usr_size, u32 min_size, u32 kern_size, const void *in)
++{
++	/* User size shouldn't be smaller than the minimal object size. */
++	if (usr_size < min_size)
++		return -EINVAL;
++
++	if (copy_to_user(u64_to_user_ptr(usr_ptr), in, min_t(u32, usr_size, kern_size)))
++		return -EFAULT;
++
++	/* When the kernel object is smaller than the user object, we fill the gap with
++	 * zeros.
++	 */
++	if (usr_size > kern_size &&
++	    clear_user(u64_to_user_ptr(usr_ptr + kern_size), usr_size - kern_size)) {
++		return -EFAULT;
++	}
++
++	return 0;
++}
++
++/**
++ * panthor_get_uobj_array() - Copy a user object array into a kernel accessible object array.
++ * @in: The object array to copy.
++ * @min_stride: Minimum array stride.
++ * @obj_size: Kernel object size.
++ *
++ * Helper automating user -> kernel object copies.
++ *
++ * Don't use this function directly, use PANTHOR_UOBJ_GET_ARRAY() instead.
++ *
++ * Return: newly allocated object array or an ERR_PTR on error.
++ */
++static void *
++panthor_get_uobj_array(const struct drm_panthor_obj_array *in, u32 min_stride,
++		       u32 obj_size)
++{
++	int ret = 0;
++	void *out_alloc;
++
++	/* User stride must be at least the minimum object size, otherwise it might
++	 * lack useful information.
++	 */
++	if (in->stride < min_stride)
++		return ERR_PTR(-EINVAL);
++
++	if (!in->count)
++		return NULL;
++
++	out_alloc = kvmalloc_array(in->count, obj_size, GFP_KERNEL);
++	if (!out_alloc)
++		return ERR_PTR(-ENOMEM);
++
++	if (obj_size == in->stride) {
++		/* Fast path when user/kernel have the same uAPI header version. */
++		if (copy_from_user(out_alloc, u64_to_user_ptr(in->array),
++				   (unsigned long)obj_size * in->count))
++			ret = -EFAULT;
++	} else {
++		void __user *in_ptr = u64_to_user_ptr(in->array);
++		void *out_ptr = out_alloc;
++
++		/* If the sizes differ, we need to copy elements one by one. */
++		for (u32 i = 0; i < in->count; i++) {
++			ret = copy_struct_from_user(out_ptr, obj_size, in_ptr, in->stride);
++			if (ret)
++				break;
++
++			out_ptr += obj_size;
++			in_ptr += in->stride;
++		}
++	}
++
++	if (ret) {
++		kvfree(out_alloc);
++		return ERR_PTR(ret);
++	}
++
++	return out_alloc;
++}
++
++/**
++ * PANTHOR_UOBJ_MIN_SIZE_INTERNAL() - Get the minimum user object size
++ * @_typename: Object type.
++ * @_last_mandatory_field: Last mandatory field.
++ *
++ * Get the minimum user object size based on the last mandatory field name,
++ * A.K.A, the name of the last field of the structure at the time this
++ * structure was added to the uAPI.
++ *
++ * Don't use directly, use PANTHOR_UOBJ_DECL() instead.
++ */
++#define PANTHOR_UOBJ_MIN_SIZE_INTERNAL(_typename, _last_mandatory_field) \
++	(offsetof(_typename, _last_mandatory_field) + \
++	 sizeof(((_typename *)NULL)->_last_mandatory_field))
++
++/**
++ * PANTHOR_UOBJ_DECL() - Declare a new uAPI object whose subject to
++ * evolutions.
++ * @_typename: Object type.
++ * @_last_mandatory_field: Last mandatory field.
++ *
++ * Should be used to extend the PANTHOR_UOBJ_MIN_SIZE() list.
++ */
++#define PANTHOR_UOBJ_DECL(_typename, _last_mandatory_field) \
++	_typename : PANTHOR_UOBJ_MIN_SIZE_INTERNAL(_typename, _last_mandatory_field)
++
++/**
++ * PANTHOR_UOBJ_MIN_SIZE() - Get the minimum size of a given uAPI object
++ * @_obj_name: Object to get the minimum size of.
++ *
++ * Don't use this macro directly, it's automatically called by
++ * PANTHOR_UOBJ_{SET,GET_ARRAY}().
++ */
++#define PANTHOR_UOBJ_MIN_SIZE(_obj_name) \
++	_Generic(_obj_name, \
++		 PANTHOR_UOBJ_DECL(struct drm_panthor_gpu_info, tiler_present), \
++		 PANTHOR_UOBJ_DECL(struct drm_panthor_csif_info, pad), \
++		 PANTHOR_UOBJ_DECL(struct drm_panthor_sync_op, timeline_value), \
++		 PANTHOR_UOBJ_DECL(struct drm_panthor_queue_submit, syncs), \
++		 PANTHOR_UOBJ_DECL(struct drm_panthor_queue_create, ringbuf_size), \
++		 PANTHOR_UOBJ_DECL(struct drm_panthor_vm_bind_op, syncs))
++
++/**
++ * PANTHOR_UOBJ_SET() - Copy a kernel object to a user object.
++ * @_dest_usr_ptr: User pointer to copy to.
++ * @_usr_size: Size of the user object.
++ * @_src_obj: Kernel object to copy (not a pointer).
++ *
++ * Return: 0 on success, a negative error code otherwise.
++ */
++#define PANTHOR_UOBJ_SET(_dest_usr_ptr, _usr_size, _src_obj) \
++	panthor_set_uobj(_dest_usr_ptr, _usr_size, \
++			 PANTHOR_UOBJ_MIN_SIZE(_src_obj), \
++			 sizeof(_src_obj), &(_src_obj))
++
++/**
++ * PANTHOR_UOBJ_GET_ARRAY() - Copy a user object array to a kernel accessible
++ * object array.
++ * @_dest_array: Local variable that will hold the newly allocated kernel
++ * object array.
++ * @_uobj_array: The drm_panthor_obj_array object describing the user object
++ * array.
++ *
++ * Return: 0 on success, a negative error code otherwise.
++ */
++#define PANTHOR_UOBJ_GET_ARRAY(_dest_array, _uobj_array) \
++	({ \
++		typeof(_dest_array) _tmp; \
++		_tmp = panthor_get_uobj_array(_uobj_array, \
++					      PANTHOR_UOBJ_MIN_SIZE((_dest_array)[0]), \
++					      sizeof((_dest_array)[0])); \
++		if (!IS_ERR(_tmp)) \
++			_dest_array = _tmp; \
++		PTR_ERR_OR_ZERO(_tmp); \
++	})
++
++/**
++ * struct panthor_sync_signal - Represent a synchronization object point to attach
++ * our job fence to.
++ *
++ * This structure is here to keep track of fences that are currently bound to
++ * a specific syncobj point.
++ *
++ * At the beginning of a job submission, the fence
++ * is retrieved from the syncobj itself, and can be NULL if no fence was attached
++ * to this point.
++ *
++ * At the end, it points to the fence of the last job that had a
++ * %DRM_PANTHOR_SYNC_OP_SIGNAL on this syncobj.
++ *
++ * With jobs being submitted in batches, the fence might change several times during
++ * the process, allowing one job to wait on a job that's part of the same submission
++ * but appears earlier in the drm_panthor_group_submit::queue_submits array.
++ */
++struct panthor_sync_signal {
++	/** @node: list_head to track signal ops within a submit operation */
++	struct list_head node;
++
++	/** @handle: The syncobj handle. */
++	u32 handle;
++
++	/**
++	 * @point: The syncobj point.
++	 *
++	 * Zero for regular syncobjs, and non-zero for timeline syncobjs.
++	 */
++	u64 point;
++
++	/**
++	 * @syncobj: The sync object pointed by @handle.
++	 */
++	struct drm_syncobj *syncobj;
++
++	/**
++	 * @chain: Chain object used to link the new fence to an existing
++	 * timeline syncobj.
++	 *
++	 * NULL for regular syncobj, non-NULL for timeline syncobjs.
++	 */
++	struct dma_fence_chain *chain;
++
++	/**
++	 * @fence: The fence to assign to the syncobj or syncobj-point.
++	 */
++	struct dma_fence *fence;
++};
++
++/**
++ * struct panthor_job_ctx - Job context
++ */
++struct panthor_job_ctx {
++	/** @job: The job that is about to be submitted to drm_sched. */
++	struct drm_sched_job *job;
++
++	/** @syncops: Array of sync operations. */
++	struct drm_panthor_sync_op *syncops;
++
++	/** @syncop_count: Number of sync operations. */
++	u32 syncop_count;
++};
++
++/**
++ * struct panthor_submit_ctx - Submission context
++ *
++ * Anything that's related to a submission (%DRM_IOCTL_PANTHOR_VM_BIND or
++ * %DRM_IOCTL_PANTHOR_GROUP_SUBMIT) is kept here, so we can automate the
++ * initialization and cleanup steps.
++ */
++struct panthor_submit_ctx {
++	/** @file: DRM file this submission happens on. */
++	struct drm_file *file;
++
++	/**
++	 * @signals: List of struct panthor_sync_signal.
++	 *
++	 * %DRM_PANTHOR_SYNC_OP_SIGNAL operations will be recorded here,
++	 * and %DRM_PANTHOR_SYNC_OP_WAIT will first check if an entry
++	 * matching the syncobj+point exists before calling
++	 * drm_syncobj_find_fence(). This allows us to describe dependencies
++	 * existing between jobs that are part of the same batch.
++	 */
++	struct list_head signals;
++
++	/** @jobs: Array of jobs. */
++	struct panthor_job_ctx *jobs;
++
++	/** @job_count: Number of entries in the @jobs array. */
++	u32 job_count;
++
++	/** @exec: drm_exec context used to acquire and prepare resv objects. */
++	struct drm_exec exec;
++};
++
++#define PANTHOR_SYNC_OP_FLAGS_MASK \
++	(DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK | DRM_PANTHOR_SYNC_OP_SIGNAL)
++
++static bool sync_op_is_signal(const struct drm_panthor_sync_op *sync_op)
++{
++	return !!(sync_op->flags & DRM_PANTHOR_SYNC_OP_SIGNAL);
++}
++
++static bool sync_op_is_wait(const struct drm_panthor_sync_op *sync_op)
++{
++	/* Note that DRM_PANTHOR_SYNC_OP_WAIT == 0 */
++	return !(sync_op->flags & DRM_PANTHOR_SYNC_OP_SIGNAL);
++}
++
++/**
++ * panthor_check_sync_op() - Check drm_panthor_sync_op fields
++ * @sync_op: The sync operation to check.
++ *
++ * Return: 0 on success, -EINVAL otherwise.
++ */
++static int
++panthor_check_sync_op(const struct drm_panthor_sync_op *sync_op)
++{
++	u8 handle_type;
++
++	if (sync_op->flags & ~PANTHOR_SYNC_OP_FLAGS_MASK)
++		return -EINVAL;
++
++	handle_type = sync_op->flags & DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK;
++	if (handle_type != DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ &&
++	    handle_type != DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ)
++		return -EINVAL;
++
++	if (handle_type == DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ &&
++	    sync_op->timeline_value != 0)
++		return -EINVAL;
++
++	return 0;
++}
++
++/**
++ * panthor_sync_signal_free() - Release resources and free a panthor_sync_signal object
++ * @sig_sync: Signal object to free.
++ */
++static void
++panthor_sync_signal_free(struct panthor_sync_signal *sig_sync)
++{
++	if (!sig_sync)
++		return;
++
++	drm_syncobj_put(sig_sync->syncobj);
++	dma_fence_chain_free(sig_sync->chain);
++	dma_fence_put(sig_sync->fence);
++	kfree(sig_sync);
++}
++
++/**
++ * panthor_submit_ctx_add_sync_signal() - Add a signal operation to a submit context
++ * @ctx: Context to add the signal operation to.
++ * @handle: Syncobj handle.
++ * @point: Syncobj point.
++ *
++ * Return: 0 on success, otherwise negative error value.
++ */
++static int
++panthor_submit_ctx_add_sync_signal(struct panthor_submit_ctx *ctx, u32 handle, u64 point)
++{
++	struct panthor_sync_signal *sig_sync;
++	struct dma_fence *cur_fence;
++	int ret;
++
++	sig_sync = kzalloc(sizeof(*sig_sync), GFP_KERNEL);
++	if (!sig_sync)
++		return -ENOMEM;
++
++	sig_sync->handle = handle;
++	sig_sync->point = point;
++
++	if (point > 0) {
++		sig_sync->chain = dma_fence_chain_alloc();
++		if (!sig_sync->chain) {
++			ret = -ENOMEM;
++			goto err_free_sig_sync;
++		}
++	}
++
++	sig_sync->syncobj = drm_syncobj_find(ctx->file, handle);
++	if (!sig_sync->syncobj) {
++		ret = -EINVAL;
++		goto err_free_sig_sync;
++	}
++
++	/* Retrieve the current fence attached to that point. It's
++	 * perfectly fine to get a NULL fence here, it just means there's
++	 * no fence attached to that point yet.
++	 */
++	if (!drm_syncobj_find_fence(ctx->file, handle, point, 0, &cur_fence))
++		sig_sync->fence = cur_fence;
++
++	list_add_tail(&sig_sync->node, &ctx->signals);
++
++	return 0;
++
++err_free_sig_sync:
++	panthor_sync_signal_free(sig_sync);
++	return ret;
++}
++
++/**
++ * panthor_submit_ctx_search_sync_signal() - Search an existing signal operation in a
++ * submit context.
++ * @ctx: Context to search the signal operation in.
++ * @handle: Syncobj handle.
++ * @point: Syncobj point.
++ *
++ * Return: A valid panthor_sync_signal object if found, NULL otherwise.
++ */
++static struct panthor_sync_signal *
++panthor_submit_ctx_search_sync_signal(struct panthor_submit_ctx *ctx, u32 handle, u64 point)
++{
++	struct panthor_sync_signal *sig_sync;
++
++	list_for_each_entry(sig_sync, &ctx->signals, node) {
++		if (handle == sig_sync->handle && point == sig_sync->point)
++			return sig_sync;
++	}
++
++	return NULL;
++}
++
++/**
++ * panthor_submit_ctx_add_job() - Add a job to a submit context
++ * @ctx: Context to search the signal operation in.
++ * @idx: Index of the job in the context.
++ * @job: Job to add.
++ * @syncs: Sync operations provided by userspace.
++ *
++ * Return: 0 on success, a negative error code otherwise.
++ */
++static int
++panthor_submit_ctx_add_job(struct panthor_submit_ctx *ctx, u32 idx,
++			   struct drm_sched_job *job,
++			   const struct drm_panthor_obj_array *syncs)
++{
++	int ret;
++
++	ctx->jobs[idx].job = job;
++
++	ret = PANTHOR_UOBJ_GET_ARRAY(ctx->jobs[idx].syncops, syncs);
++	if (ret)
++		return ret;
++
++	ctx->jobs[idx].syncop_count = syncs->count;
++	return 0;
++}
++
++/**
++ * panthor_submit_ctx_get_sync_signal() - Search signal operation and add one if none was found.
++ * @ctx: Context to search the signal operation in.
++ * @handle: Syncobj handle.
++ * @point: Syncobj point.
++ *
++ * Return: 0 on success, a negative error code otherwise.
++ */
++static int
++panthor_submit_ctx_get_sync_signal(struct panthor_submit_ctx *ctx, u32 handle, u64 point)
++{
++	struct panthor_sync_signal *sig_sync;
++
++	sig_sync = panthor_submit_ctx_search_sync_signal(ctx, handle, point);
++	if (sig_sync)
++		return 0;
++
++	return panthor_submit_ctx_add_sync_signal(ctx, handle, point);
++}
++
++/**
++ * panthor_submit_ctx_update_job_sync_signal_fences() - Update fences
++ * on the signal operations specified by a job.
++ * @ctx: Context to search the signal operation in.
++ * @job_idx: Index of the job to operate on.
++ *
++ * Return: 0 on success, a negative error code otherwise.
++ */
++static int
++panthor_submit_ctx_update_job_sync_signal_fences(struct panthor_submit_ctx *ctx,
++						 u32 job_idx)
++{
++	struct panthor_device *ptdev = container_of(ctx->file->minor->dev,
++						    struct panthor_device,
++						    base);
++	struct dma_fence *done_fence = &ctx->jobs[job_idx].job->s_fence->finished;
++	const struct drm_panthor_sync_op *sync_ops = ctx->jobs[job_idx].syncops;
++	u32 sync_op_count = ctx->jobs[job_idx].syncop_count;
++
++	for (u32 i = 0; i < sync_op_count; i++) {
++		struct dma_fence *old_fence;
++		struct panthor_sync_signal *sig_sync;
++
++		if (!sync_op_is_signal(&sync_ops[i]))
++			continue;
++
++		sig_sync = panthor_submit_ctx_search_sync_signal(ctx, sync_ops[i].handle,
++								 sync_ops[i].timeline_value);
++		if (drm_WARN_ON(&ptdev->base, !sig_sync))
++			return -EINVAL;
++
++		old_fence = sig_sync->fence;
++		sig_sync->fence = dma_fence_get(done_fence);
++		dma_fence_put(old_fence);
++
++		if (drm_WARN_ON(&ptdev->base, !sig_sync->fence))
++			return -EINVAL;
++	}
++
++	return 0;
++}
++
++/**
++ * panthor_submit_ctx_collect_job_signal_ops() - Iterate over all job signal operations
++ * and add them to the context.
++ * @ctx: Context to search the signal operation in.
++ * @job_idx: Index of the job to operate on.
++ *
++ * Return: 0 on success, a negative error code otherwise.
++ */
++static int
++panthor_submit_ctx_collect_job_signal_ops(struct panthor_submit_ctx *ctx,
++					  u32 job_idx)
++{
++	const struct drm_panthor_sync_op *sync_ops = ctx->jobs[job_idx].syncops;
++	u32 sync_op_count = ctx->jobs[job_idx].syncop_count;
++
++	for (u32 i = 0; i < sync_op_count; i++) {
++		int ret;
++
++		if (!sync_op_is_signal(&sync_ops[i]))
++			continue;
++
++		ret = panthor_check_sync_op(&sync_ops[i]);
++		if (ret)
++			return ret;
++
++		ret = panthor_submit_ctx_get_sync_signal(ctx,
++							 sync_ops[i].handle,
++							 sync_ops[i].timeline_value);
++		if (ret)
++			return ret;
++	}
++
++	return 0;
++}
++
++/**
++ * panthor_submit_ctx_push_fences() - Iterate over the signal array, and for each entry, push
++ * the currently assigned fence to the associated syncobj.
++ * @ctx: Context to push fences on.
++ *
++ * This is the last step of a submission procedure, and is done once we know the submission
++ * is effective and job fences are guaranteed to be signaled in finite time.
++ */
++static void
++panthor_submit_ctx_push_fences(struct panthor_submit_ctx *ctx)
++{
++	struct panthor_sync_signal *sig_sync;
++
++	list_for_each_entry(sig_sync, &ctx->signals, node) {
++		if (sig_sync->chain) {
++			drm_syncobj_add_point(sig_sync->syncobj, sig_sync->chain,
++					      sig_sync->fence, sig_sync->point);
++			sig_sync->chain = NULL;
++		} else {
++			drm_syncobj_replace_fence(sig_sync->syncobj, sig_sync->fence);
++		}
++	}
++}
++
++/**
++ * panthor_submit_ctx_add_sync_deps_to_job() - Add sync wait operations as
++ * job dependencies.
++ * @ctx: Submit context.
++ * @job_idx: Index of the job to operate on.
++ *
++ * Return: 0 on success, a negative error code otherwise.
++ */
++static int
++panthor_submit_ctx_add_sync_deps_to_job(struct panthor_submit_ctx *ctx,
++					u32 job_idx)
++{
++	struct panthor_device *ptdev = container_of(ctx->file->minor->dev,
++						    struct panthor_device,
++						    base);
++	const struct drm_panthor_sync_op *sync_ops = ctx->jobs[job_idx].syncops;
++	struct drm_sched_job *job = ctx->jobs[job_idx].job;
++	u32 sync_op_count = ctx->jobs[job_idx].syncop_count;
++	int ret = 0;
++
++	for (u32 i = 0; i < sync_op_count; i++) {
++		struct panthor_sync_signal *sig_sync;
++		struct dma_fence *fence;
++
++		if (!sync_op_is_wait(&sync_ops[i]))
++			continue;
++
++		ret = panthor_check_sync_op(&sync_ops[i]);
++		if (ret)
++			return ret;
++
++		sig_sync = panthor_submit_ctx_search_sync_signal(ctx, sync_ops[i].handle,
++								 sync_ops[i].timeline_value);
++		if (sig_sync) {
++			if (drm_WARN_ON(&ptdev->base, !sig_sync->fence))
++				return -EINVAL;
++
++			fence = dma_fence_get(sig_sync->fence);
++		} else {
++			ret = drm_syncobj_find_fence(ctx->file, sync_ops[i].handle,
++						     sync_ops[i].timeline_value,
++						     0, &fence);
++			if (ret)
++				return ret;
++		}
++
++		ret = drm_sched_job_add_dependency(job, fence);
++		if (ret)
++			return ret;
++	}
++
++	return 0;
++}
++
++/**
++ * panthor_submit_ctx_collect_jobs_signal_ops() - Collect all signal operations
++ * and add them to the submit context.
++ * @ctx: Submit context.
++ *
++ * Return: 0 on success, a negative error code otherwise.
++ */
++static int
++panthor_submit_ctx_collect_jobs_signal_ops(struct panthor_submit_ctx *ctx)
++{
++	for (u32 i = 0; i < ctx->job_count; i++) {
++		int ret;
++
++		ret = panthor_submit_ctx_collect_job_signal_ops(ctx, i);
++		if (ret)
++			return ret;
++	}
++
++	return 0;
++}
++
++/**
++ * panthor_submit_ctx_add_deps_and_arm_jobs() - Add jobs dependencies and arm jobs
++ * @ctx: Submit context.
++ *
++ * Must be called after the resv preparation has been taken care of.
++ *
++ * Return: 0 on success, a negative error code otherwise.
++ */
++static int
++panthor_submit_ctx_add_deps_and_arm_jobs(struct panthor_submit_ctx *ctx)
++{
++	for (u32 i = 0; i < ctx->job_count; i++) {
++		int ret;
++
++		ret = panthor_submit_ctx_add_sync_deps_to_job(ctx, i);
++		if (ret)
++			return ret;
++
++		drm_sched_job_arm(ctx->jobs[i].job);
++
++		ret = panthor_submit_ctx_update_job_sync_signal_fences(ctx, i);
++		if (ret)
++			return ret;
++	}
++
++	return 0;
++}
++
++/**
++ * panthor_submit_ctx_push_jobs() - Push jobs to their scheduling entities.
++ * @ctx: Submit context.
++ * @upd_resvs: Callback used to update reservation objects that were previously
++ * preapred.
++ */
++static void
++panthor_submit_ctx_push_jobs(struct panthor_submit_ctx *ctx,
++			     void (*upd_resvs)(struct drm_exec *, struct drm_sched_job *))
++{
++	for (u32 i = 0; i < ctx->job_count; i++) {
++		upd_resvs(&ctx->exec, ctx->jobs[i].job);
++		drm_sched_entity_push_job(ctx->jobs[i].job);
++
++		/* Job is owned by the scheduler now. */
++		ctx->jobs[i].job = NULL;
++	}
++
++	panthor_submit_ctx_push_fences(ctx);
++}
++
++/**
++ * panthor_submit_ctx_init() - Initializes a submission context
++ * @ctx: Submit context to initialize.
++ * @file: drm_file this submission happens on.
++ * @job_count: Number of jobs that will be submitted.
++ *
++ * Return: 0 on success, a negative error code otherwise.
++ */
++static int panthor_submit_ctx_init(struct panthor_submit_ctx *ctx,
++				   struct drm_file *file, u32 job_count)
++{
++	ctx->jobs = kvmalloc_array(job_count, sizeof(*ctx->jobs),
++				   GFP_KERNEL | __GFP_ZERO);
++	if (!ctx->jobs)
++		return -ENOMEM;
++
++	ctx->file = file;
++	ctx->job_count = job_count;
++	INIT_LIST_HEAD(&ctx->signals);
++	drm_exec_init(&ctx->exec,
++		      DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES);
++	return 0;
++}
++
++/**
++ * panthor_submit_ctx_cleanup() - Cleanup a submission context
++ * @ctx: Submit context to cleanup.
++ * @job_put: Job put callback.
++ */
++static void panthor_submit_ctx_cleanup(struct panthor_submit_ctx *ctx,
++				       void (*job_put)(struct drm_sched_job *))
++{
++	struct panthor_sync_signal *sig_sync, *tmp;
++	unsigned long i;
++
++	drm_exec_fini(&ctx->exec);
++
++	list_for_each_entry_safe(sig_sync, tmp, &ctx->signals, node)
++		panthor_sync_signal_free(sig_sync);
++
++	for (i = 0; i < ctx->job_count; i++) {
++		job_put(ctx->jobs[i].job);
++		kvfree(ctx->jobs[i].syncops);
++	}
++
++	kvfree(ctx->jobs);
++}
++
++static int panthor_ioctl_dev_query(struct drm_device *ddev, void *data, struct drm_file *file)
++{
++	struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base);
++	struct drm_panthor_dev_query *args = data;
++
++	if (!args->pointer) {
++		switch (args->type) {
++		case DRM_PANTHOR_DEV_QUERY_GPU_INFO:
++			args->size = sizeof(ptdev->gpu_info);
++			return 0;
++
++		case DRM_PANTHOR_DEV_QUERY_CSIF_INFO:
++			args->size = sizeof(ptdev->csif_info);
++			return 0;
++
++		default:
++			return -EINVAL;
++		}
++	}
++
++	switch (args->type) {
++	case DRM_PANTHOR_DEV_QUERY_GPU_INFO:
++		return PANTHOR_UOBJ_SET(args->pointer, args->size, ptdev->gpu_info);
++
++	case DRM_PANTHOR_DEV_QUERY_CSIF_INFO:
++		return PANTHOR_UOBJ_SET(args->pointer, args->size, ptdev->csif_info);
++
++	default:
++		return -EINVAL;
++	}
++}
++
++#define PANTHOR_VM_CREATE_FLAGS			0
++
++static int panthor_ioctl_vm_create(struct drm_device *ddev, void *data,
++				   struct drm_file *file)
++{
++	struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base);
++	struct panthor_file *pfile = file->driver_priv;
++	struct drm_panthor_vm_create *args = data;
++	int cookie, ret;
++
++	if (!drm_dev_enter(ddev, &cookie))
++		return -ENODEV;
++
++	ret = panthor_vm_pool_create_vm(ptdev, pfile->vms,  args);
++	if (ret >= 0) {
++		args->id = ret;
++		ret = 0;
++	}
++
++	drm_dev_exit(cookie);
++	return ret;
++}
++
++static int panthor_ioctl_vm_destroy(struct drm_device *ddev, void *data,
++				    struct drm_file *file)
++{
++	struct panthor_file *pfile = file->driver_priv;
++	struct drm_panthor_vm_destroy *args = data;
++
++	if (args->pad)
++		return -EINVAL;
++
++	return panthor_vm_pool_destroy_vm(pfile->vms, args->id);
++}
++
++#define PANTHOR_BO_FLAGS		DRM_PANTHOR_BO_NO_MMAP
++
++static int panthor_ioctl_bo_create(struct drm_device *ddev, void *data,
++				   struct drm_file *file)
++{
++	struct panthor_file *pfile = file->driver_priv;
++	struct drm_panthor_bo_create *args = data;
++	struct panthor_vm *vm = NULL;
++	int cookie, ret;
++
++	if (!drm_dev_enter(ddev, &cookie))
++		return -ENODEV;
++
++	if (!args->size || args->pad ||
++	    (args->flags & ~PANTHOR_BO_FLAGS)) {
++		ret = -EINVAL;
++		goto out_dev_exit;
++	}
++
++	if (args->exclusive_vm_id) {
++		vm = panthor_vm_pool_get_vm(pfile->vms, args->exclusive_vm_id);
++		if (!vm) {
++			ret = -EINVAL;
++			goto out_dev_exit;
++		}
++	}
++
++	ret = panthor_gem_create_with_handle(file, ddev, vm, &args->size,
++					     args->flags, &args->handle);
++
++	panthor_vm_put(vm);
++
++out_dev_exit:
++	drm_dev_exit(cookie);
++	return ret;
++}
++
++static int panthor_ioctl_bo_mmap_offset(struct drm_device *ddev, void *data,
++					struct drm_file *file)
++{
++	struct drm_panthor_bo_mmap_offset *args = data;
++	struct drm_gem_object *obj;
++	int ret;
++
++	if (args->pad)
++		return -EINVAL;
++
++	obj = drm_gem_object_lookup(file, args->handle);
++	if (!obj)
++		return -ENOENT;
++
++	ret = drm_gem_create_mmap_offset(obj);
++	if (ret)
++		goto out;
++
++	args->offset = drm_vma_node_offset_addr(&obj->vma_node);
++
++out:
++	drm_gem_object_put(obj);
++	return ret;
++}
++
++static int panthor_ioctl_group_submit(struct drm_device *ddev, void *data,
++				      struct drm_file *file)
++{
++	struct panthor_file *pfile = file->driver_priv;
++	struct drm_panthor_group_submit *args = data;
++	struct drm_panthor_queue_submit *jobs_args;
++	struct panthor_submit_ctx ctx;
++	int ret = 0, cookie;
++
++	if (args->pad)
++		return -EINVAL;
++
++	if (!drm_dev_enter(ddev, &cookie))
++		return -ENODEV;
++
++	ret = PANTHOR_UOBJ_GET_ARRAY(jobs_args, &args->queue_submits);
++	if (ret)
++		goto out_dev_exit;
++
++	ret = panthor_submit_ctx_init(&ctx, file, args->queue_submits.count);
++	if (ret)
++		goto out_free_jobs_args;
++
++	/* Create jobs and attach sync operations */
++	for (u32 i = 0; i < args->queue_submits.count; i++) {
++		const struct drm_panthor_queue_submit *qsubmit = &jobs_args[i];
++		struct drm_sched_job *job;
++
++		job = panthor_job_create(pfile, args->group_handle, qsubmit);
++		if (IS_ERR(job)) {
++			ret = PTR_ERR(job);
++			goto out_cleanup_submit_ctx;
++		}
++
++		ret = panthor_submit_ctx_add_job(&ctx, i, job, &qsubmit->syncs);
++		if (ret)
++			goto out_cleanup_submit_ctx;
++	}
++
++	/*
++	 * Collect signal operations on all jobs, such that each job can pick
++	 * from it for its dependencies and update the fence to signal when the
++	 * job is submitted.
++	 */
++	ret = panthor_submit_ctx_collect_jobs_signal_ops(&ctx);
++	if (ret)
++		goto out_cleanup_submit_ctx;
++
++	/*
++	 * We acquire/prepare revs on all jobs before proceeding with the
++	 * dependency registration.
++	 *
++	 * This is solving two problems:
++	 * 1. drm_sched_job_arm() and drm_sched_entity_push_job() must be
++	 *    protected by a lock to make sure no concurrent access to the same
++	 *    entity get interleaved, which would mess up with the fence seqno
++	 *    ordering. Luckily, one of the resv being acquired is the VM resv,
++	 *    and a scheduling entity is only bound to a single VM. As soon as
++	 *    we acquire the VM resv, we should be safe.
++	 * 2. Jobs might depend on fences that were issued by previous jobs in
++	 *    the same batch, so we can't add dependencies on all jobs before
++	 *    arming previous jobs and registering the fence to the signal
++	 *    array, otherwise we might miss dependencies, or point to an
++	 *    outdated fence.
++	 */
++	if (args->queue_submits.count > 0) {
++		/* All jobs target the same group, so they also point to the same VM. */
++		struct panthor_vm *vm = panthor_job_vm(ctx.jobs[0].job);
++
++		drm_exec_until_all_locked(&ctx.exec) {
++			ret = panthor_vm_prepare_mapped_bos_resvs(&ctx.exec, vm,
++								  args->queue_submits.count);
++		}
++
++		if (ret)
++			goto out_cleanup_submit_ctx;
++	}
++
++	/*
++	 * Now that resvs are locked/prepared, we can iterate over each job to
++	 * add the dependencies, arm the job fence, register the job fence to
++	 * the signal array.
++	 */
++	ret = panthor_submit_ctx_add_deps_and_arm_jobs(&ctx);
++	if (ret)
++		goto out_cleanup_submit_ctx;
++
++	/* Nothing can fail after that point, so we can make our job fences
++	 * visible to the outside world. Push jobs and set the job fences to
++	 * the resv slots we reserved.  This also pushes the fences to the
++	 * syncobjs that are part of the signal array.
++	 */
++	panthor_submit_ctx_push_jobs(&ctx, panthor_job_update_resvs);
++
++out_cleanup_submit_ctx:
++	panthor_submit_ctx_cleanup(&ctx, panthor_job_put);
++
++out_free_jobs_args:
++	kvfree(jobs_args);
++
++out_dev_exit:
++	drm_dev_exit(cookie);
++	return ret;
++}
++
++static int panthor_ioctl_group_destroy(struct drm_device *ddev, void *data,
++				       struct drm_file *file)
++{
++	struct panthor_file *pfile = file->driver_priv;
++	struct drm_panthor_group_destroy *args = data;
++
++	if (args->pad)
++		return -EINVAL;
++
++	return panthor_group_destroy(pfile, args->group_handle);
++}
++
++static int panthor_ioctl_group_create(struct drm_device *ddev, void *data,
++				      struct drm_file *file)
++{
++	struct panthor_file *pfile = file->driver_priv;
++	struct drm_panthor_group_create *args = data;
++	struct drm_panthor_queue_create *queue_args;
++	int ret;
++
++	if (!args->queues.count)
++		return -EINVAL;
++
++	ret = PANTHOR_UOBJ_GET_ARRAY(queue_args, &args->queues);
++	if (ret)
++		return ret;
++
++	ret = panthor_group_create(pfile, args, queue_args);
++	if (ret >= 0) {
++		args->group_handle = ret;
++		ret = 0;
++	}
++
++	kvfree(queue_args);
++	return ret;
++}
++
++static int panthor_ioctl_group_get_state(struct drm_device *ddev, void *data,
++					 struct drm_file *file)
++{
++	struct panthor_file *pfile = file->driver_priv;
++	struct drm_panthor_group_get_state *args = data;
++
++	return panthor_group_get_state(pfile, args);
++}
++
++static int panthor_ioctl_tiler_heap_create(struct drm_device *ddev, void *data,
++					   struct drm_file *file)
++{
++	struct panthor_file *pfile = file->driver_priv;
++	struct drm_panthor_tiler_heap_create *args = data;
++	struct panthor_heap_pool *pool;
++	struct panthor_vm *vm;
++	int ret;
++
++	vm = panthor_vm_pool_get_vm(pfile->vms, args->vm_id);
++	if (!vm)
++		return -EINVAL;
++
++	pool = panthor_vm_get_heap_pool(vm, true);
++	if (IS_ERR(pool)) {
++		ret = PTR_ERR(pool);
++		goto out_put_vm;
++	}
++
++	ret = panthor_heap_create(pool,
++				  args->initial_chunk_count,
++				  args->chunk_size,
++				  args->max_chunks,
++				  args->target_in_flight,
++				  &args->tiler_heap_ctx_gpu_va,
++				  &args->first_heap_chunk_gpu_va);
++	if (ret < 0)
++		goto out_put_heap_pool;
++
++	/* Heap pools are per-VM. We combine the VM and HEAP id to make
++	 * a unique heap handle.
++	 */
++	args->handle = (args->vm_id << 16) | ret;
++	ret = 0;
++
++out_put_heap_pool:
++	panthor_heap_pool_put(pool);
++
++out_put_vm:
++	panthor_vm_put(vm);
++	return ret;
++}
++
++static int panthor_ioctl_tiler_heap_destroy(struct drm_device *ddev, void *data,
++					    struct drm_file *file)
++{
++	struct panthor_file *pfile = file->driver_priv;
++	struct drm_panthor_tiler_heap_destroy *args = data;
++	struct panthor_heap_pool *pool;
++	struct panthor_vm *vm;
++	int ret;
++
++	if (args->pad)
++		return -EINVAL;
++
++	vm = panthor_vm_pool_get_vm(pfile->vms, args->handle >> 16);
++	if (!vm)
++		return -EINVAL;
++
++	pool = panthor_vm_get_heap_pool(vm, false);
++	if (!pool) {
++		ret = -EINVAL;
++		goto out_put_vm;
++	}
++
++	ret = panthor_heap_destroy(pool, args->handle & GENMASK(15, 0));
++	panthor_heap_pool_put(pool);
++
++out_put_vm:
++	panthor_vm_put(vm);
++	return ret;
++}
++
++static int panthor_ioctl_vm_bind_async(struct drm_device *ddev,
++				       struct drm_panthor_vm_bind *args,
++				       struct drm_file *file)
++{
++	struct panthor_file *pfile = file->driver_priv;
++	struct drm_panthor_vm_bind_op *jobs_args;
++	struct panthor_submit_ctx ctx;
++	struct panthor_vm *vm;
++	int ret = 0;
++
++	vm = panthor_vm_pool_get_vm(pfile->vms, args->vm_id);
++	if (!vm)
++		return -EINVAL;
++
++	ret = PANTHOR_UOBJ_GET_ARRAY(jobs_args, &args->ops);
++	if (ret)
++		goto out_put_vm;
++
++	ret = panthor_submit_ctx_init(&ctx, file, args->ops.count);
++	if (ret)
++		goto out_free_jobs_args;
++
++	for (u32 i = 0; i < args->ops.count; i++) {
++		struct drm_panthor_vm_bind_op *op = &jobs_args[i];
++		struct drm_sched_job *job;
++
++		job = panthor_vm_bind_job_create(file, vm, op);
++		if (IS_ERR(job)) {
++			ret = PTR_ERR(job);
++			goto out_cleanup_submit_ctx;
++		}
++
++		ret = panthor_submit_ctx_add_job(&ctx, i, job, &op->syncs);
++		if (ret)
++			goto out_cleanup_submit_ctx;
++	}
++
++	ret = panthor_submit_ctx_collect_jobs_signal_ops(&ctx);
++	if (ret)
++		goto out_cleanup_submit_ctx;
++
++	/* Prepare reservation objects for each VM_BIND job. */
++	drm_exec_until_all_locked(&ctx.exec) {
++		for (u32 i = 0; i < ctx.job_count; i++) {
++			ret = panthor_vm_bind_job_prepare_resvs(&ctx.exec, ctx.jobs[i].job);
++			drm_exec_retry_on_contention(&ctx.exec);
++			if (ret)
++				goto out_cleanup_submit_ctx;
++		}
++	}
++
++	ret = panthor_submit_ctx_add_deps_and_arm_jobs(&ctx);
++	if (ret)
++		goto out_cleanup_submit_ctx;
++
++	/* Nothing can fail after that point. */
++	panthor_submit_ctx_push_jobs(&ctx, panthor_vm_bind_job_update_resvs);
++
++out_cleanup_submit_ctx:
++	panthor_submit_ctx_cleanup(&ctx, panthor_vm_bind_job_put);
++
++out_free_jobs_args:
++	kvfree(jobs_args);
++
++out_put_vm:
++	panthor_vm_put(vm);
++	return ret;
++}
++
++static int panthor_ioctl_vm_bind_sync(struct drm_device *ddev,
++				      struct drm_panthor_vm_bind *args,
++				      struct drm_file *file)
++{
++	struct panthor_file *pfile = file->driver_priv;
++	struct drm_panthor_vm_bind_op *jobs_args;
++	struct panthor_vm *vm;
++	int ret;
++
++	vm = panthor_vm_pool_get_vm(pfile->vms, args->vm_id);
++	if (!vm)
++		return -EINVAL;
++
++	ret = PANTHOR_UOBJ_GET_ARRAY(jobs_args, &args->ops);
++	if (ret)
++		goto out_put_vm;
++
++	for (u32 i = 0; i < args->ops.count; i++) {
++		ret = panthor_vm_bind_exec_sync_op(file, vm, &jobs_args[i]);
++		if (ret) {
++			/* Update ops.count so the user knows where things failed. */
++			args->ops.count = i;
++			break;
++		}
++	}
++
++	kvfree(jobs_args);
++
++out_put_vm:
++	panthor_vm_put(vm);
++	return ret;
++}
++
++#define PANTHOR_VM_BIND_FLAGS DRM_PANTHOR_VM_BIND_ASYNC
++
++static int panthor_ioctl_vm_bind(struct drm_device *ddev, void *data,
++				 struct drm_file *file)
++{
++	struct drm_panthor_vm_bind *args = data;
++	int cookie, ret;
++
++	if (!drm_dev_enter(ddev, &cookie))
++		return -ENODEV;
++
++	if (args->flags & DRM_PANTHOR_VM_BIND_ASYNC)
++		ret = panthor_ioctl_vm_bind_async(ddev, args, file);
++	else
++		ret = panthor_ioctl_vm_bind_sync(ddev, args, file);
++
++	drm_dev_exit(cookie);
++	return ret;
++}
++
++static int panthor_ioctl_vm_get_state(struct drm_device *ddev, void *data,
++				      struct drm_file *file)
++{
++	struct panthor_file *pfile = file->driver_priv;
++	struct drm_panthor_vm_get_state *args = data;
++	struct panthor_vm *vm;
++
++	vm = panthor_vm_pool_get_vm(pfile->vms, args->vm_id);
++	if (!vm)
++		return -EINVAL;
++
++	if (panthor_vm_is_unusable(vm))
++		args->state = DRM_PANTHOR_VM_STATE_UNUSABLE;
++	else
++		args->state = DRM_PANTHOR_VM_STATE_USABLE;
++
++	panthor_vm_put(vm);
++	return 0;
++}
++
++static int
++panthor_open(struct drm_device *ddev, struct drm_file *file)
++{
++	struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base);
++	struct panthor_file *pfile;
++	int ret;
++
++	if (!try_module_get(THIS_MODULE))
++		return -EINVAL;
++
++	pfile = kzalloc(sizeof(*pfile), GFP_KERNEL);
++	if (!pfile) {
++		ret = -ENOMEM;
++		goto err_put_mod;
++	}
++
++	pfile->ptdev = ptdev;
++
++	ret = panthor_vm_pool_create(pfile);
++	if (ret)
++		goto err_free_file;
++
++	ret = panthor_group_pool_create(pfile);
++	if (ret)
++		goto err_destroy_vm_pool;
++
++	file->driver_priv = pfile;
++	return 0;
++
++err_destroy_vm_pool:
++	panthor_vm_pool_destroy(pfile);
++
++err_free_file:
++	kfree(pfile);
++
++err_put_mod:
++	module_put(THIS_MODULE);
++	return ret;
++}
++
++static void
++panthor_postclose(struct drm_device *ddev, struct drm_file *file)
++{
++	struct panthor_file *pfile = file->driver_priv;
++
++	panthor_group_pool_destroy(pfile);
++	panthor_vm_pool_destroy(pfile);
++
++	kfree(pfile);
++	module_put(THIS_MODULE);
++}
++
++static const struct drm_ioctl_desc panthor_drm_driver_ioctls[] = {
++#define PANTHOR_IOCTL(n, func, flags) \
++	DRM_IOCTL_DEF_DRV(PANTHOR_##n, panthor_ioctl_##func, flags)
++
++	PANTHOR_IOCTL(DEV_QUERY, dev_query, DRM_RENDER_ALLOW),
++	PANTHOR_IOCTL(VM_CREATE, vm_create, DRM_RENDER_ALLOW),
++	PANTHOR_IOCTL(VM_DESTROY, vm_destroy, DRM_RENDER_ALLOW),
++	PANTHOR_IOCTL(VM_BIND, vm_bind, DRM_RENDER_ALLOW),
++	PANTHOR_IOCTL(VM_GET_STATE, vm_get_state, DRM_RENDER_ALLOW),
++	PANTHOR_IOCTL(BO_CREATE, bo_create, DRM_RENDER_ALLOW),
++	PANTHOR_IOCTL(BO_MMAP_OFFSET, bo_mmap_offset, DRM_RENDER_ALLOW),
++	PANTHOR_IOCTL(GROUP_CREATE, group_create, DRM_RENDER_ALLOW),
++	PANTHOR_IOCTL(GROUP_DESTROY, group_destroy, DRM_RENDER_ALLOW),
++	PANTHOR_IOCTL(GROUP_GET_STATE, group_get_state, DRM_RENDER_ALLOW),
++	PANTHOR_IOCTL(TILER_HEAP_CREATE, tiler_heap_create, DRM_RENDER_ALLOW),
++	PANTHOR_IOCTL(TILER_HEAP_DESTROY, tiler_heap_destroy, DRM_RENDER_ALLOW),
++	PANTHOR_IOCTL(GROUP_SUBMIT, group_submit, DRM_RENDER_ALLOW),
++};
++
++static int panthor_mmap(struct file *filp, struct vm_area_struct *vma)
++{
++	struct drm_file *file = filp->private_data;
++	struct panthor_file *pfile = file->driver_priv;
++	struct panthor_device *ptdev = pfile->ptdev;
++	u64 offset = (u64)vma->vm_pgoff << PAGE_SHIFT;
++	int ret, cookie;
++
++	if (!drm_dev_enter(file->minor->dev, &cookie))
++		return -ENODEV;
++
++	if (panthor_device_mmio_offset(offset) >= DRM_PANTHOR_USER_MMIO_OFFSET)
++		ret = panthor_device_mmap_io(ptdev, vma);
++	else
++		ret = drm_gem_mmap(filp, vma);
++
++	drm_dev_exit(cookie);
++	return ret;
++}
++
++static const struct file_operations panthor_drm_driver_fops = {
++	.open = drm_open,
++	.release = drm_release,
++	.unlocked_ioctl = drm_ioctl,
++	.compat_ioctl = drm_compat_ioctl,
++	.poll = drm_poll,
++	.read = drm_read,
++	.llseek = noop_llseek,
++	.mmap = panthor_mmap,
++};
++
++#ifdef CONFIG_DEBUG_FS
++static void panthor_debugfs_init(struct drm_minor *minor)
++{
++	panthor_mmu_debugfs_init(minor);
++}
++#endif
++
++/*
++ * PanCSF driver version:
++ * - 1.0 - initial interface
++ */
++static const struct drm_driver panthor_drm_driver = {
++	.driver_features = DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ |
++			   DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA,
++	.open = panthor_open,
++	.postclose = panthor_postclose,
++	.ioctls = panthor_drm_driver_ioctls,
++	.num_ioctls = ARRAY_SIZE(panthor_drm_driver_ioctls),
++	.fops = &panthor_drm_driver_fops,
++	.name = "panthor",
++	.desc = "Panthor DRM driver",
++	.date = "20230801",
++	.major = 1,
++	.minor = 0,
++
++	.gem_create_object = panthor_gem_create_object,
++	.gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table,
++#ifdef CONFIG_DEBUG_FS
++	.debugfs_init = panthor_debugfs_init,
++#endif
++};
++
++static int panthor_probe(struct platform_device *pdev)
++{
++	struct panthor_device *ptdev;
++
++	ptdev = devm_drm_dev_alloc(&pdev->dev, &panthor_drm_driver,
++				   struct panthor_device, base);
++	if (!ptdev)
++		return -ENOMEM;
++
++	platform_set_drvdata(pdev, ptdev);
++
++	return panthor_device_init(ptdev);
++}
++
++static void panthor_remove(struct platform_device *pdev)
++{
++	struct panthor_device *ptdev = platform_get_drvdata(pdev);
++
++	panthor_device_unplug(ptdev);
++}
++
++static const struct of_device_id dt_match[] = {
++	{ .compatible = "rockchip,rk3588-mali" },
++	{ .compatible = "arm,mali-valhall-csf" },
++	{}
++};
++MODULE_DEVICE_TABLE(of, dt_match);
++
++static DEFINE_RUNTIME_DEV_PM_OPS(panthor_pm_ops,
++				 panthor_device_suspend,
++				 panthor_device_resume,
++				 NULL);
++
++static struct platform_driver panthor_driver = {
++	.probe = panthor_probe,
++	.remove_new = panthor_remove,
++	.driver = {
++		.name = "panthor",
++		.pm = &panthor_pm_ops,
++		.of_match_table = dt_match,
++	},
++};
++
++/*
++ * Workqueue used to cleanup stuff.
++ *
++ * We create a dedicated workqueue so we can drain on unplug and
++ * make sure all resources are freed before the module is unloaded.
++ */
++struct workqueue_struct *panthor_cleanup_wq;
++
++static int __init panthor_init(void)
++{
++	int ret;
++
++	ret = panthor_mmu_pt_cache_init();
++	if (ret)
++		return ret;
++
++	panthor_cleanup_wq = alloc_workqueue("panthor-cleanup", WQ_UNBOUND, 0);
++	if (!panthor_cleanup_wq) {
++		pr_err("panthor: Failed to allocate the workqueues");
++		ret = -ENOMEM;
++		goto err_mmu_pt_cache_fini;
++	}
++
++	ret = platform_driver_register(&panthor_driver);
++	if (ret)
++		goto err_destroy_cleanup_wq;
++
++	return 0;
++
++err_destroy_cleanup_wq:
++	destroy_workqueue(panthor_cleanup_wq);
++
++err_mmu_pt_cache_fini:
++	panthor_mmu_pt_cache_fini();
++	return ret;
++}
++module_init(panthor_init);
++
++static void __exit panthor_exit(void)
++{
++	platform_driver_unregister(&panthor_driver);
++	destroy_workqueue(panthor_cleanup_wq);
++	panthor_mmu_pt_cache_fini();
++}
++module_exit(panthor_exit);
++
++MODULE_AUTHOR("Panthor Project Developers");
++MODULE_DESCRIPTION("Panthor DRM Driver");
++MODULE_LICENSE("Dual MIT/GPL");
diff --git a/patches-6.6/034-41-v6.10-drm-panthor-Allow-driver-compilation.patch b/patches-6.6/034-41-v6.10-drm-panthor-Allow-driver-compilation.patch
new file mode 100644
index 0000000..733733b
--- /dev/null
+++ b/patches-6.6/034-41-v6.10-drm-panthor-Allow-driver-compilation.patch
@@ -0,0 +1,104 @@
+From d72f049087d4f973f6332b599c92177e718107de Mon Sep 17 00:00:00 2001
+From: Boris Brezillon <boris.brezillon@collabora.com>
+Date: Thu, 29 Feb 2024 17:22:26 +0100
+Subject: [PATCH] drm/panthor: Allow driver compilation
+
+Now that all blocks are available, we can add/update Kconfig/Makefile
+files to allow compilation.
+
+v6:
+- Add Maxime's and Heiko's acks
+- Keep source files alphabetically ordered in the Makefile
+
+v4:
+- Add Steve's R-b
+
+v3:
+- Add a dep on DRM_GPUVM
+- Fix dependencies in Kconfig
+- Expand help text to (hopefully) describe which GPUs are to be
+  supported by this driver and which are for panfrost.
+
+Co-developed-by: Steven Price <steven.price@arm.com>
+Signed-off-by: Steven Price <steven.price@arm.com>
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Acked-by: Steven Price <steven.price@arm.com> # MIT+GPL2 relicensing,Arm
+Acked-by: Grant Likely <grant.likely@linaro.org> # MIT+GPL2 relicensing,Linaro
+Acked-by: Boris Brezillon <boris.brezillon@collabora.com> # MIT+GPL2 relicensing,Collabora
+Reviewed-by: Steven Price <steven.price@arm.com>
+Acked-by: Maxime Ripard <mripard@kernel.org>
+Acked-by: Heiko Stuebner <heiko@sntech.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-13-boris.brezillon@collabora.com
+---
+ drivers/gpu/drm/Kconfig          |  2 ++
+ drivers/gpu/drm/Makefile         |  1 +
+ drivers/gpu/drm/panthor/Kconfig  | 23 +++++++++++++++++++++++
+ drivers/gpu/drm/panthor/Makefile | 14 ++++++++++++++
+ 4 files changed, 40 insertions(+)
+ create mode 100644 drivers/gpu/drm/panthor/Kconfig
+ create mode 100644 drivers/gpu/drm/panthor/Makefile
+
+--- a/drivers/gpu/drm/Kconfig
++++ b/drivers/gpu/drm/Kconfig
+@@ -381,6 +381,8 @@ source "drivers/gpu/drm/lima/Kconfig"
+ 
+ source "drivers/gpu/drm/panfrost/Kconfig"
+ 
++source "drivers/gpu/drm/panthor/Kconfig"
++
+ source "drivers/gpu/drm/aspeed/Kconfig"
+ 
+ source "drivers/gpu/drm/mcde/Kconfig"
+--- a/drivers/gpu/drm/Makefile
++++ b/drivers/gpu/drm/Makefile
+@@ -189,6 +189,7 @@ obj-$(CONFIG_DRM_XEN) += xen/
+ obj-$(CONFIG_DRM_VBOXVIDEO) += vboxvideo/
+ obj-$(CONFIG_DRM_LIMA)  += lima/
+ obj-$(CONFIG_DRM_PANFROST) += panfrost/
++obj-$(CONFIG_DRM_PANTHOR) += panthor/
+ obj-$(CONFIG_DRM_ASPEED_GFX) += aspeed/
+ obj-$(CONFIG_DRM_MCDE) += mcde/
+ obj-$(CONFIG_DRM_TIDSS) += tidss/
+--- /dev/null
++++ b/drivers/gpu/drm/panthor/Kconfig
+@@ -0,0 +1,23 @@
++# SPDX-License-Identifier: GPL-2.0 or MIT
++
++config DRM_PANTHOR
++	tristate "Panthor (DRM support for ARM Mali CSF-based GPUs)"
++	depends on DRM
++	depends on ARM || ARM64 || COMPILE_TEST
++	depends on !GENERIC_ATOMIC64  # for IOMMU_IO_PGTABLE_LPAE
++	depends on MMU
++	select DEVFREQ_GOV_SIMPLE_ONDEMAND
++	select DRM_EXEC
++	select DRM_GEM_SHMEM_HELPER
++	select DRM_GPUVM
++	select DRM_SCHED
++	select IOMMU_IO_PGTABLE_LPAE
++	select IOMMU_SUPPORT
++	select PM_DEVFREQ
++	help
++	  DRM driver for ARM Mali CSF-based GPUs.
++
++	  This driver is for Mali (or Immortalis) Valhall Gxxx GPUs.
++
++	  Note that the Mali-G68 and Mali-G78, while Valhall architecture, will
++	  be supported with the panfrost driver as they are not CSF GPUs.
+--- /dev/null
++++ b/drivers/gpu/drm/panthor/Makefile
+@@ -0,0 +1,14 @@
++# SPDX-License-Identifier: GPL-2.0 or MIT
++
++panthor-y := \
++	panthor_devfreq.o \
++	panthor_device.o \
++	panthor_drv.o \
++	panthor_fw.o \
++	panthor_gem.o \
++	panthor_gpu.o \
++	panthor_heap.o \
++	panthor_mmu.o \
++	panthor_sched.o
++
++obj-$(CONFIG_DRM_PANTHOR) += panthor.o
diff --git a/patches-6.6/034-42-v6.10-drm-panthor-Fix-panthor_devfreq-kerneldoc.patch b/patches-6.6/034-42-v6.10-drm-panthor-Fix-panthor_devfreq-kerneldoc.patch
new file mode 100644
index 0000000..63605d3
--- /dev/null
+++ b/patches-6.6/034-42-v6.10-drm-panthor-Fix-panthor_devfreq-kerneldoc.patch
@@ -0,0 +1,27 @@
+From 9c86b03863844ce69f99aa66404c79492ec9e208 Mon Sep 17 00:00:00 2001
+From: Boris Brezillon <boris.brezillon@collabora.com>
+Date: Mon, 4 Mar 2024 10:08:10 +0100
+Subject: [PATCH] drm/panthor: Fix panthor_devfreq kerneldoc
+
+Missing '*' to have a valid kerneldoc prefix.
+
+Reported-by: kernel test robot <lkp@intel.com>
+Closes: https://lore.kernel.org/oe-kbuild-all/202403031019.6jvrOqGT-lkp@intel.com/
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Reviewed-by: Steven Price <steven.price@arm.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240304090812.3941084-2-boris.brezillon@collabora.com
+---
+ drivers/gpu/drm/panthor/panthor_devfreq.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/panthor/panthor_devfreq.c
++++ b/drivers/gpu/drm/panthor/panthor_devfreq.c
+@@ -34,7 +34,7 @@ struct panthor_devfreq {
+ 	/** @last_busy_state: True if the GPU was busy last time we updated the state. */
+ 	bool last_busy_state;
+ 
+-	/*
++	/**
+ 	 * @lock: Lock used to protect busy_time, idle_time, time_last_update and
+ 	 * last_busy_state.
+ 	 *
diff --git a/patches-6.6/034-43-v6.10-drm-panthor-Explicitly-include-mm-h-for-the-virt-.patch b/patches-6.6/034-43-v6.10-drm-panthor-Explicitly-include-mm-h-for-the-virt-.patch
new file mode 100644
index 0000000..ddf8613
--- /dev/null
+++ b/patches-6.6/034-43-v6.10-drm-panthor-Explicitly-include-mm-h-for-the-virt-.patch
@@ -0,0 +1,31 @@
+From eb1dc10a6ee3559310436ab62db93b72310a2a18 Mon Sep 17 00:00:00 2001
+From: Boris Brezillon <boris.brezillon@collabora.com>
+Date: Mon, 4 Mar 2024 10:08:11 +0100
+Subject: [PATCH] drm/panthor: Explicitly include mm.h for the {virt,
+ __phys)_to_pfn() defs
+
+Something on arm[64] must be including <asm/page.h>, but things fail
+to compile on sparc64. Make sure this header is included (through
+linux/mm.h) so this driver can be compile-tested on all supported
+architectures.
+
+Reported-by: kernel test robot <lkp@intel.com>
+Closes: https://lore.kernel.org/oe-kbuild-all/202403031142.Vl4pW7X6-lkp@intel.com/
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
+Reviewed-by: Steven Price <steven.price@arm.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240304090812.3941084-3-boris.brezillon@collabora.com
+---
+ drivers/gpu/drm/panthor/panthor_device.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/gpu/drm/panthor/panthor_device.c
++++ b/drivers/gpu/drm/panthor/panthor_device.c
+@@ -4,6 +4,7 @@
+ /* Copyright 2023 Collabora ltd. */
+ 
+ #include <linux/clk.h>
++#include <linux/mm.h>
+ #include <linux/platform_device.h>
+ #include <linux/pm_domain.h>
+ #include <linux/pm_runtime.h>
diff --git a/patches-6.6/034-44-v6.10-drm-panthor-Fix-the-CONFIG_PM_n-case.patch b/patches-6.6/034-44-v6.10-drm-panthor-Fix-the-CONFIG_PM_n-case.patch
new file mode 100644
index 0000000..c8c18e2
--- /dev/null
+++ b/patches-6.6/034-44-v6.10-drm-panthor-Fix-the-CONFIG_PM_n-case.patch
@@ -0,0 +1,75 @@
+From 0cd8363ed802922e39446d783f767b3e09335ddc Mon Sep 17 00:00:00 2001
+From: Boris Brezillon <boris.brezillon@collabora.com>
+Date: Mon, 18 Mar 2024 16:31:17 +0100
+Subject: [PATCH] drm/panthor: Fix the CONFIG_PM=n case
+
+Putting a hard dependency on CONFIG_PM is not possible because of a
+circular dependency issue, and it's actually not desirable either. In
+order to support this use case, we forcibly resume at init time, and
+suspend at unplug time.
+
+v2:
+- Drop the #ifdef CONFIG_PM section around panthor_pm_ops's definition
+
+Reported-by: kernel test robot <lkp@intel.com>
+Closes: https://lore.kernel.org/oe-kbuild-all/202403031944.EOimQ8WK-lkp@intel.com/
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Reviewed-by: Steven Price <steven.price@arm.com>
+Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240318153117.1321544-1-boris.brezillon@collabora.com
+---
+ drivers/gpu/drm/panthor/panthor_device.c | 13 +++++++++++--
+ drivers/gpu/drm/panthor/panthor_drv.c    |  2 +-
+ 2 files changed, 12 insertions(+), 3 deletions(-)
+
+--- a/drivers/gpu/drm/panthor/panthor_device.c
++++ b/drivers/gpu/drm/panthor/panthor_device.c
+@@ -87,6 +87,10 @@ void panthor_device_unplug(struct pantho
+ 	pm_runtime_dont_use_autosuspend(ptdev->base.dev);
+ 	pm_runtime_put_sync_suspend(ptdev->base.dev);
+ 
++	/* If PM is disabled, we need to call the suspend handler manually. */
++	if (!IS_ENABLED(CONFIG_PM))
++		panthor_device_suspend(ptdev->base.dev);
++
+ 	/* Report the unplug operation as done to unblock concurrent
+ 	 * panthor_device_unplug() callers.
+ 	 */
+@@ -218,6 +222,13 @@ int panthor_device_init(struct panthor_d
+ 	if (ret)
+ 		return ret;
+ 
++	/* If PM is disabled, we need to call panthor_device_resume() manually. */
++	if (!IS_ENABLED(CONFIG_PM)) {
++		ret = panthor_device_resume(ptdev->base.dev);
++		if (ret)
++			return ret;
++	}
++
+ 	ret = panthor_gpu_init(ptdev);
+ 	if (ret)
+ 		goto err_rpm_put;
+@@ -402,7 +413,6 @@ int panthor_device_mmap_io(struct pantho
+ 	return 0;
+ }
+ 
+-#ifdef CONFIG_PM
+ int panthor_device_resume(struct device *dev)
+ {
+ 	struct panthor_device *ptdev = dev_get_drvdata(dev);
+@@ -547,4 +557,3 @@ err_set_active:
+ 	mutex_unlock(&ptdev->pm.mmio_lock);
+ 	return ret;
+ }
+-#endif
+--- a/drivers/gpu/drm/panthor/panthor_drv.c
++++ b/drivers/gpu/drm/panthor/panthor_drv.c
+@@ -1416,7 +1416,7 @@ static struct platform_driver panthor_dr
+ 	.remove_new = panthor_remove,
+ 	.driver = {
+ 		.name = "panthor",
+-		.pm = &panthor_pm_ops,
++		.pm = pm_ptr(&panthor_pm_ops),
+ 		.of_match_table = dt_match,
+ 	},
+ };
diff --git a/patches-6.6/034-45-v6.10-drm-panthor-Don-t-use-virt_to_pfn.patch b/patches-6.6/034-45-v6.10-drm-panthor-Don-t-use-virt_to_pfn.patch
new file mode 100644
index 0000000..c02405b
--- /dev/null
+++ b/patches-6.6/034-45-v6.10-drm-panthor-Don-t-use-virt_to_pfn.patch
@@ -0,0 +1,74 @@
+From 0b45921c2a8831834a5f8a52ddd0b25b5b1c6faf Mon Sep 17 00:00:00 2001
+From: Steven Price <steven.price@arm.com>
+Date: Mon, 18 Mar 2024 14:51:19 +0000
+Subject: [PATCH] drm/panthor: Don't use virt_to_pfn()
+
+virt_to_pfn() isn't available on x86 (except to xen) so breaks
+COMPILE_TEST builds. Avoid its use completely by instead storing the
+struct page pointer allocated in panthor_device_init() and using
+page_to_pfn() instead.
+
+Signed-off-by: Steven Price <steven.price@arm.com>
+Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240318145119.368582-1-steven.price@arm.com
+---
+ drivers/gpu/drm/panthor/panthor_device.c | 10 ++++++----
+ drivers/gpu/drm/panthor/panthor_device.h |  2 +-
+ 2 files changed, 7 insertions(+), 5 deletions(-)
+
+--- a/drivers/gpu/drm/panthor/panthor_device.c
++++ b/drivers/gpu/drm/panthor/panthor_device.c
+@@ -151,11 +151,12 @@ static bool panthor_device_is_initialize
+ 
+ static void panthor_device_free_page(struct drm_device *ddev, void *data)
+ {
+-	free_page((unsigned long)data);
++	__free_page(data);
+ }
+ 
+ int panthor_device_init(struct panthor_device *ptdev)
+ {
++	u32 *dummy_page_virt;
+ 	struct resource *res;
+ 	struct page *p;
+ 	int ret;
+@@ -176,7 +177,8 @@ int panthor_device_init(struct panthor_d
+ 	if (!p)
+ 		return -ENOMEM;
+ 
+-	ptdev->pm.dummy_latest_flush = page_address(p);
++	ptdev->pm.dummy_latest_flush = p;
++	dummy_page_virt = page_address(p);
+ 	ret = drmm_add_action_or_reset(&ptdev->base, panthor_device_free_page,
+ 				       ptdev->pm.dummy_latest_flush);
+ 	if (ret)
+@@ -188,7 +190,7 @@ int panthor_device_init(struct panthor_d
+ 	 * happens while the dummy page is mapped. Zero cannot be used because
+ 	 * that means 'always flush'.
+ 	 */
+-	*ptdev->pm.dummy_latest_flush = 1;
++	*dummy_page_virt = 1;
+ 
+ 	INIT_WORK(&ptdev->reset.work, panthor_device_reset_work);
+ 	ptdev->reset.wq = alloc_ordered_workqueue("panthor-reset-wq", 0);
+@@ -364,7 +366,7 @@ static vm_fault_t panthor_mmio_vm_fault(
+ 		if (active)
+ 			pfn = __phys_to_pfn(ptdev->phys_addr + CSF_GPU_LATEST_FLUSH_ID);
+ 		else
+-			pfn = virt_to_pfn(ptdev->pm.dummy_latest_flush);
++			pfn = page_to_pfn(ptdev->pm.dummy_latest_flush);
+ 		break;
+ 
+ 	default:
+--- a/drivers/gpu/drm/panthor/panthor_device.h
++++ b/drivers/gpu/drm/panthor/panthor_device.h
+@@ -160,7 +160,7 @@ struct panthor_device {
+ 		 * Used to replace the real LATEST_FLUSH page when the GPU
+ 		 * is suspended.
+ 		 */
+-		u32 *dummy_latest_flush;
++		struct page *dummy_latest_flush;
+ 	} pm;
+ };
+ 
diff --git a/patches-6.6/034-46-v6.10-drm-panthor-Fix-spelling-mistake-readyness-readiness.patch b/patches-6.6/034-46-v6.10-drm-panthor-Fix-spelling-mistake-readyness-readiness.patch
new file mode 100644
index 0000000..d2c5b50
--- /dev/null
+++ b/patches-6.6/034-46-v6.10-drm-panthor-Fix-spelling-mistake-readyness-readiness.patch
@@ -0,0 +1,26 @@
+From 9d1848778e56fb565db041e4237a2f27f9277f63 Mon Sep 17 00:00:00 2001
+From: Colin Ian King <colin.i.king@gmail.com>
+Date: Tue, 26 Mar 2024 10:02:19 +0000
+Subject: [PATCH] drm/panthor: Fix spelling mistake "readyness" -> "readiness"
+
+There is a spelling mistake in a drm_err message. Fix it.
+
+Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
+Acked-by: Liviu Dudau <liviu.dudau@arm.com>
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240326100219.43989-1-colin.i.king@gmail.com
+---
+ drivers/gpu/drm/panthor/panthor_gpu.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/panthor/panthor_gpu.c
++++ b/drivers/gpu/drm/panthor/panthor_gpu.c
+@@ -333,7 +333,7 @@ int panthor_gpu_block_power_on(struct pa
+ 						 val, (mask32 & val) == mask32,
+ 						 100, timeout_us);
+ 		if (ret) {
+-			drm_err(&ptdev->base, "timeout waiting on %s:%llx readyness",
++			drm_err(&ptdev->base, "timeout waiting on %s:%llx readiness",
+ 				blk_name, mask);
+ 			return ret;
+ 		}
diff --git a/patches-6.6/034-47-v6.10-drm-panthor-Fix-IO-page-mmap-for-32-bit-userspace-on.patch b/patches-6.6/034-47-v6.10-drm-panthor-Fix-IO-page-mmap-for-32-bit-userspace-on.patch
new file mode 100644
index 0000000..dff8430
--- /dev/null
+++ b/patches-6.6/034-47-v6.10-drm-panthor-Fix-IO-page-mmap-for-32-bit-userspace-on.patch
@@ -0,0 +1,142 @@
+From 11f0275cc1b90b4b9bf37a5ebc27c0a9b2451b4e Mon Sep 17 00:00:00 2001
+From: Boris Brezillon <boris.brezillon@collabora.com>
+Date: Tue, 26 Mar 2024 12:12:03 +0100
+Subject: [PATCH] drm/panthor: Fix IO-page mmap() for 32-bit userspace on
+ 64-bit kernel
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+When mapping an IO region, the pseudo-file offset is dependent on the
+userspace architecture. panthor_device_mmio_offset() abstracts that
+away for us by turning a userspace MMIO offset into its kernel
+equivalent, but we were not updating vm_area_struct::vm_pgoff
+accordingly, leading us to attach the MMIO region to the wrong file
+offset.
+
+This has implications when we start mixing 64 bit and 32 bit apps, but
+that's only really a problem when we start having more that 2^43 bytes of
+memory allocated, which is very unlikely to happen.
+
+What's more problematic is the fact this turns our
+unmap_mapping_range(DRM_PANTHOR_USER_MMIO_OFFSET) calls, which are
+supposed to kill the MMIO mapping when entering suspend, into NOPs.
+Which means we either keep the dummy flush_id mapping active at all
+times, or we risk a BUS_FAULT if the MMIO region was mapped, and the
+GPU is suspended after that.
+
+Solve that by patching vm_pgoff early in panthor_mmap(). With
+this in place, we no longer need the panthor_device_mmio_offset()
+helper.
+
+v3:
+- No changes
+
+v2:
+- Kill panthor_device_mmio_offset()
+
+Fixes: 5fe909cae118 ("drm/panthor: Add the device logical block")
+Reported-by: Adrián Larumbe <adrian.larumbe@collabora.com>
+Reported-by: Lukas F. Hartmann <lukas@mntmn.com>
+Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/10835
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Reviewed-by: Steven Price <steven.price@arm.com>
+Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240326111205.510019-1-boris.brezillon@collabora.com
+---
+ drivers/gpu/drm/panthor/panthor_device.c |  8 ++++----
+ drivers/gpu/drm/panthor/panthor_device.h | 24 ------------------------
+ drivers/gpu/drm/panthor/panthor_drv.c    | 17 ++++++++++++++++-
+ 3 files changed, 20 insertions(+), 29 deletions(-)
+
+--- a/drivers/gpu/drm/panthor/panthor_device.c
++++ b/drivers/gpu/drm/panthor/panthor_device.c
+@@ -348,7 +348,7 @@ static vm_fault_t panthor_mmio_vm_fault(
+ {
+ 	struct vm_area_struct *vma = vmf->vma;
+ 	struct panthor_device *ptdev = vma->vm_private_data;
+-	u64 id = (u64)vma->vm_pgoff << PAGE_SHIFT;
++	u64 offset = (u64)vma->vm_pgoff << PAGE_SHIFT;
+ 	unsigned long pfn;
+ 	pgprot_t pgprot;
+ 	vm_fault_t ret;
+@@ -361,7 +361,7 @@ static vm_fault_t panthor_mmio_vm_fault(
+ 	mutex_lock(&ptdev->pm.mmio_lock);
+ 	active = atomic_read(&ptdev->pm.state) == PANTHOR_DEVICE_PM_STATE_ACTIVE;
+ 
+-	switch (panthor_device_mmio_offset(id)) {
++	switch (offset) {
+ 	case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET:
+ 		if (active)
+ 			pfn = __phys_to_pfn(ptdev->phys_addr + CSF_GPU_LATEST_FLUSH_ID);
+@@ -392,9 +392,9 @@ static const struct vm_operations_struct
+ 
+ int panthor_device_mmap_io(struct panthor_device *ptdev, struct vm_area_struct *vma)
+ {
+-	u64 id = (u64)vma->vm_pgoff << PAGE_SHIFT;
++	u64 offset = (u64)vma->vm_pgoff << PAGE_SHIFT;
+ 
+-	switch (panthor_device_mmio_offset(id)) {
++	switch (offset) {
+ 	case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET:
+ 		if (vma->vm_end - vma->vm_start != PAGE_SIZE ||
+ 		    (vma->vm_flags & (VM_WRITE | VM_EXEC)))
+--- a/drivers/gpu/drm/panthor/panthor_device.h
++++ b/drivers/gpu/drm/panthor/panthor_device.h
+@@ -365,30 +365,6 @@ static int panthor_request_ ## __name ##
+ 					 pirq);							\
+ }
+ 
+-/**
+- * panthor_device_mmio_offset() - Turn a user MMIO offset into a kernel one
+- * @offset: Offset to convert.
+- *
+- * With 32-bit systems being limited by the 32-bit representation of mmap2's
+- * pgoffset field, we need to make the MMIO offset arch specific. This function
+- * converts a user MMIO offset into something the kernel driver understands.
+- *
+- * If the kernel and userspace architecture match, the offset is unchanged. If
+- * the kernel is 64-bit and userspace is 32-bit, the offset is adjusted to match
+- * 64-bit offsets. 32-bit kernel with 64-bit userspace is impossible.
+- *
+- * Return: Adjusted offset.
+- */
+-static inline u64 panthor_device_mmio_offset(u64 offset)
+-{
+-#ifdef CONFIG_ARM64
+-	if (test_tsk_thread_flag(current, TIF_32BIT))
+-		offset += DRM_PANTHOR_USER_MMIO_OFFSET_64BIT - DRM_PANTHOR_USER_MMIO_OFFSET_32BIT;
+-#endif
+-
+-	return offset;
+-}
+-
+ extern struct workqueue_struct *panthor_cleanup_wq;
+ 
+ #endif
+--- a/drivers/gpu/drm/panthor/panthor_drv.c
++++ b/drivers/gpu/drm/panthor/panthor_drv.c
+@@ -1326,7 +1326,22 @@ static int panthor_mmap(struct file *fil
+ 	if (!drm_dev_enter(file->minor->dev, &cookie))
+ 		return -ENODEV;
+ 
+-	if (panthor_device_mmio_offset(offset) >= DRM_PANTHOR_USER_MMIO_OFFSET)
++#ifdef CONFIG_ARM64
++	/*
++	 * With 32-bit systems being limited by the 32-bit representation of
++	 * mmap2's pgoffset field, we need to make the MMIO offset arch
++	 * specific. This converts a user MMIO offset into something the kernel
++	 * driver understands.
++	 */
++	if (test_tsk_thread_flag(current, TIF_32BIT) &&
++	    offset >= DRM_PANTHOR_USER_MMIO_OFFSET_32BIT) {
++		offset += DRM_PANTHOR_USER_MMIO_OFFSET_64BIT -
++			  DRM_PANTHOR_USER_MMIO_OFFSET_32BIT;
++		vma->vm_pgoff = offset >> PAGE_SHIFT;
++	}
++#endif
++
++	if (offset >= DRM_PANTHOR_USER_MMIO_OFFSET)
+ 		ret = panthor_device_mmap_io(ptdev, vma);
+ 	else
+ 		ret = drm_gem_mmap(filp, vma);
diff --git a/patches-6.6/034-48-v6.10-drm-panthor-Fix-ordering-in-_irq_suspend.patch b/patches-6.6/034-48-v6.10-drm-panthor-Fix-ordering-in-_irq_suspend.patch
new file mode 100644
index 0000000..4832bf0
--- /dev/null
+++ b/patches-6.6/034-48-v6.10-drm-panthor-Fix-ordering-in-_irq_suspend.patch
@@ -0,0 +1,55 @@
+From 1de434e0b2757061b09b347264f1ff5bdf996e58 Mon Sep 17 00:00:00 2001
+From: Boris Brezillon <boris.brezillon@collabora.com>
+Date: Tue, 26 Mar 2024 12:12:04 +0100
+Subject: [PATCH] drm/panthor: Fix ordering in _irq_suspend()
+
+Make sure we set suspended=true last to avoid generating an irq storm
+in the unlikely case where an IRQ happens between the suspended=true
+assignment and the _INT_MASK update.
+
+We also move the mask=0 assignment before writing to the _INT_MASK
+register to prevent the thread handler from unmasking the interrupt
+behind our back. This means we might lose events if there were some
+pending when we get to suspend the IRQ, but that's fine.
+The synchronize_irq() we have in the _irq_suspend() path was not
+there to make sure all IRQs are processed, just to make sure we don't
+have registers accesses coming from the irq handlers after
+_irq_suspend() has been called. If there's a need to have all pending
+IRQs processed, it should happen before _irq_suspend() is called.
+
+v3:
+- Add Steve's R-b
+
+v2:
+- New patch
+
+Fixes: 5fe909cae118 ("drm/panthor: Add the device logical block")
+Reported-by: Steven Price <steven.price@arm.com>
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Reviewed-by: Steven Price <steven.price@arm.com>
+Acked-by: Liviu Dudau <liviu.dudau@arm.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240326111205.510019-2-boris.brezillon@collabora.com
+---
+ drivers/gpu/drm/panthor/panthor_device.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/panthor/panthor_device.h
++++ b/drivers/gpu/drm/panthor/panthor_device.h
+@@ -325,7 +325,7 @@ static inline void panthor_ ## __name ##
+ {												\
+ 	int cookie;										\
+ 												\
+-	atomic_set(&pirq->suspended, true);							\
++	pirq->mask = 0;										\
+ 												\
+ 	if (drm_dev_enter(&pirq->ptdev->base, &cookie)) {					\
+ 		gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, 0);				\
+@@ -333,7 +333,7 @@ static inline void panthor_ ## __name ##
+ 		drm_dev_exit(cookie);								\
+ 	}											\
+ 												\
+-	pirq->mask = 0;										\
++	atomic_set(&pirq->suspended, true);							\
+ }												\
+ 												\
+ static inline void panthor_ ## __name ## _irq_resume(struct panthor_irq *pirq, u32 mask)	\
diff --git a/patches-6.6/034-49-v6.10-drm-panthor-Drop-the-dev_enter-exit-sections-in.patch b/patches-6.6/034-49-v6.10-drm-panthor-Drop-the-dev_enter-exit-sections-in.patch
new file mode 100644
index 0000000..6e09c3d
--- /dev/null
+++ b/patches-6.6/034-49-v6.10-drm-panthor-Drop-the-dev_enter-exit-sections-in.patch
@@ -0,0 +1,56 @@
+From 962f88b9c91647f3ff4a0d3709662641baed5164 Mon Sep 17 00:00:00 2001
+From: Boris Brezillon <boris.brezillon@collabora.com>
+Date: Tue, 26 Mar 2024 12:12:05 +0100
+Subject: [PATCH] drm/panthor: Drop the dev_enter/exit() sections in
+ _irq_suspend/resume()
+
+There's no reason for _irq_suspend/resume() to be called after the
+device has been unplugged, and keeping this dev_enter/exit()
+section in _irq_suspend() is turns _irq_suspend() into a NOP
+when called from the _unplug() functions, which we don't want.
+
+v3:
+- New patch
+
+Fixes: 5fe909cae118 ("drm/panthor: Add the device logical block")
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
+Reviewed-by: Steven Price <steven.price@arm.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240326111205.510019-3-boris.brezillon@collabora.com
+---
+ drivers/gpu/drm/panthor/panthor_device.h | 17 ++++-------------
+ 1 file changed, 4 insertions(+), 13 deletions(-)
+
+--- a/drivers/gpu/drm/panthor/panthor_device.h
++++ b/drivers/gpu/drm/panthor/panthor_device.h
+@@ -326,13 +326,8 @@ static inline void panthor_ ## __name ##
+ 	int cookie;										\
+ 												\
+ 	pirq->mask = 0;										\
+-												\
+-	if (drm_dev_enter(&pirq->ptdev->base, &cookie)) {					\
+-		gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, 0);				\
+-		synchronize_irq(pirq->irq);							\
+-		drm_dev_exit(cookie);								\
+-	}											\
+-												\
++	gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, 0);					\
++	synchronize_irq(pirq->irq);								\
+ 	atomic_set(&pirq->suspended, true);							\
+ }												\
+ 												\
+@@ -342,12 +337,8 @@ static inline void panthor_ ## __name ##
+ 												\
+ 	atomic_set(&pirq->suspended, false);							\
+ 	pirq->mask = mask;									\
+-												\
+-	if (drm_dev_enter(&pirq->ptdev->base, &cookie)) {					\
+-		gpu_write(pirq->ptdev, __reg_prefix ## _INT_CLEAR, mask);			\
+-		gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, mask);			\
+-		drm_dev_exit(cookie);								\
+-	}											\
++	gpu_write(pirq->ptdev, __reg_prefix ## _INT_CLEAR, mask);				\
++	gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, mask);				\
+ }												\
+ 												\
+ static int panthor_request_ ## __name ## _irq(struct panthor_device *ptdev,			\
diff --git a/patches-6.6/034-50-v6.10-drm-panthor-Fix-clang-Wunused-but-set-variable-in.patch b/patches-6.6/034-50-v6.10-drm-panthor-Fix-clang-Wunused-but-set-variable-in.patch
new file mode 100644
index 0000000..11cde86
--- /dev/null
+++ b/patches-6.6/034-50-v6.10-drm-panthor-Fix-clang-Wunused-but-set-variable-in.patch
@@ -0,0 +1,46 @@
+From d76653c32dd16d78e56208b4819134e766257c06 Mon Sep 17 00:00:00 2001
+From: Nathan Chancellor <nathan@kernel.org>
+Date: Thu, 28 Mar 2024 09:22:07 -0700
+Subject: [PATCH] drm/panthor: Fix clang -Wunused-but-set-variable in
+ tick_ctx_apply()
+
+Clang warns (or errors with CONFIG_WERROR):
+
+  drivers/gpu/drm/panthor/panthor_sched.c:2048:6: error: variable 'csg_mod_mask' set but not used [-Werror,-Wunused-but-set-variable]
+   2048 |         u32 csg_mod_mask = 0, free_csg_slots = 0;
+        |             ^
+  1 error generated.
+
+The variable is an artifact left over from refactoring that occurred
+during the development of the initial series for this driver. Remove it
+to resolve the warning.
+
+Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block")
+Signed-off-by: Nathan Chancellor <nathan@kernel.org>
+Reviewed-by: Steven Price <steven.price@arm.com>
+Reviewed-by: Justin Stitt <justinstitt@google.com>
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240328-panthor-drop-csg_mod_mask-v1-1-5a80be3df581@kernel.org
+---
+ drivers/gpu/drm/panthor/panthor_sched.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/panthor/panthor_sched.c
++++ b/drivers/gpu/drm/panthor/panthor_sched.c
+@@ -2045,7 +2045,7 @@ tick_ctx_apply(struct panthor_scheduler
+ 	struct panthor_device *ptdev = sched->ptdev;
+ 	struct panthor_csg_slot *csg_slot;
+ 	int prio, new_csg_prio = MAX_CSG_PRIO, i;
+-	u32 csg_mod_mask = 0, free_csg_slots = 0;
++	u32 free_csg_slots = 0;
+ 	struct panthor_csg_slots_upd_ctx upd_ctx;
+ 	int ret;
+ 
+@@ -2139,7 +2139,6 @@ tick_ctx_apply(struct panthor_scheduler
+ 
+ 			csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
+ 			csg_slot = &sched->csg_slots[csg_id];
+-			csg_mod_mask |= BIT(csg_id);
+ 			group_bind_locked(group, csg_id);
+ 			csg_slot_prog_locked(ptdev, csg_id, new_csg_prio--);
+ 			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
diff --git a/patches-6.6/034-51-v6.10-drm-panthor-Cleanup-unused-variable-cookie-.patch b/patches-6.6/034-51-v6.10-drm-panthor-Cleanup-unused-variable-cookie-.patch
new file mode 100644
index 0000000..90c0ed2
--- /dev/null
+++ b/patches-6.6/034-51-v6.10-drm-panthor-Cleanup-unused-variable-cookie-.patch
@@ -0,0 +1,40 @@
+From 00044169de061dac8d9da2cf930757c53006adff Mon Sep 17 00:00:00 2001
+From: Liviu Dudau <liviu.dudau@arm.com>
+Date: Tue, 2 Apr 2024 22:54:22 +0100
+Subject: [PATCH] drm/panthor: Cleanup unused variable 'cookie'
+
+Commit 962f88b9c916 ("drm/panthor: Drop the dev_enter/exit() sections in
+_irq_suspend/resume()") removed the code that used the 'cookie' variable
+but left the declaration in place. Remove it.
+
+Fixes: 962f88b9c916 ("drm/panthor: Drop the dev_enter/exit() sections in _irq_suspend/resume()")
+Cc: Boris Brezillon <boris.brezillon@collabora.com>
+Cc: Steven Price <steven.price@arm.com>
+Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
+Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240402215423.360341-1-liviu.dudau@arm.com
+---
+ drivers/gpu/drm/panthor/panthor_device.h | 4 ----
+ 1 file changed, 4 deletions(-)
+
+--- a/drivers/gpu/drm/panthor/panthor_device.h
++++ b/drivers/gpu/drm/panthor/panthor_device.h
+@@ -323,8 +323,6 @@ static irqreturn_t panthor_ ## __name ##
+ 												\
+ static inline void panthor_ ## __name ## _irq_suspend(struct panthor_irq *pirq)			\
+ {												\
+-	int cookie;										\
+-												\
+ 	pirq->mask = 0;										\
+ 	gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, 0);					\
+ 	synchronize_irq(pirq->irq);								\
+@@ -333,8 +331,6 @@ static inline void panthor_ ## __name ##
+ 												\
+ static inline void panthor_ ## __name ## _irq_resume(struct panthor_irq *pirq, u32 mask)	\
+ {												\
+-	int cookie;										\
+-												\
+ 	atomic_set(&pirq->suspended, false);							\
+ 	pirq->mask = mask;									\
+ 	gpu_write(pirq->ptdev, __reg_prefix ## _INT_CLEAR, mask);				\
diff --git a/patches-6.6/034-52-v6.10-drm-panthor-Fix-some-kerneldoc-warnings.patch b/patches-6.6/034-52-v6.10-drm-panthor-Fix-some-kerneldoc-warnings.patch
new file mode 100644
index 0000000..2f853e4
--- /dev/null
+++ b/patches-6.6/034-52-v6.10-drm-panthor-Fix-some-kerneldoc-warnings.patch
@@ -0,0 +1,58 @@
+From be7ffc821f5fc2eb30944562a04901c10892cc7c Mon Sep 17 00:00:00 2001
+From: Liviu Dudau <liviu.dudau@arm.com>
+Date: Tue, 2 Apr 2024 22:54:23 +0100
+Subject: [PATCH] drm/panthor: Fix some kerneldoc warnings
+
+When compiling with W=1 the build process will flag empty comments,
+misnamed documented variables and incorrect tagging of functions.
+Fix them in one go.
+
+Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block")
+Cc: Boris Brezillon <boris.brezillon@collabora.com>
+Cc: Steven Price <steven.price@arm.com>
+Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
+Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240402215423.360341-2-liviu.dudau@arm.com
+---
+ drivers/gpu/drm/panthor/panthor_sched.c | 8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+--- a/drivers/gpu/drm/panthor/panthor_sched.c
++++ b/drivers/gpu/drm/panthor/panthor_sched.c
+@@ -517,7 +517,7 @@ struct panthor_group {
+ 	/** @max_compute_cores: Maximum number of shader cores used for compute jobs. */
+ 	u8 max_compute_cores;
+ 
+-	/** @max_compute_cores: Maximum number of shader cores used for fragment jobs. */
++	/** @max_fragment_cores: Maximum number of shader cores used for fragment jobs. */
+ 	u8 max_fragment_cores;
+ 
+ 	/** @max_tiler_cores: Maximum number of tiler cores used for tiler jobs. */
+@@ -993,7 +993,7 @@ cs_slot_prog_locked(struct panthor_devic
+ }
+ 
+ /**
+- * @cs_slot_reset_locked() - Reset a queue slot
++ * cs_slot_reset_locked() - Reset a queue slot
+  * @ptdev: Device.
+  * @csg_id: Group slot.
+  * @cs_id: Queue slot.
+@@ -1591,7 +1591,7 @@ static void sched_process_idle_event_loc
+ }
+ 
+ /**
+- * panthor_sched_process_global_irq() - Process the scheduling part of a global IRQ
++ * sched_process_global_irq_locked() - Process the scheduling part of a global IRQ
+  * @ptdev: Device.
+  */
+ static void sched_process_global_irq_locked(struct panthor_device *ptdev)
+@@ -1660,8 +1660,6 @@ static const struct dma_fence_ops pantho
+ 	.get_timeline_name = queue_fence_get_timeline_name,
+ };
+ 
+-/**
+- */
+ struct panthor_csg_slots_upd_ctx {
+ 	u32 update_mask;
+ 	u32 timedout_mask;
diff --git a/patches-6.6/034-53-v6.10-drm-panthor-Fix-a-couple-ENOMEM-error-codes.patch b/patches-6.6/034-53-v6.10-drm-panthor-Fix-a-couple-ENOMEM-error-codes.patch
new file mode 100644
index 0000000..43ac629
--- /dev/null
+++ b/patches-6.6/034-53-v6.10-drm-panthor-Fix-a-couple-ENOMEM-error-codes.patch
@@ -0,0 +1,42 @@
+From d33733263a550775c7574169f62bf144f74d8f9a Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@linaro.org>
+Date: Tue, 2 Apr 2024 12:58:09 +0300
+Subject: [PATCH] drm/panthor: Fix a couple -ENOMEM error codes
+
+These error paths forgot to set the error code to -ENOMEM.
+
+Fixes: 647810ec2476 ("drm/panthor: Add the MMU/VM logical block")
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/cf5bbba5-427e-4940-b91e-925f9fa71f8d@moroto.mountain
+---
+ drivers/gpu/drm/panthor/panthor_mmu.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/panthor/panthor_mmu.c
++++ b/drivers/gpu/drm/panthor/panthor_mmu.c
+@@ -1264,8 +1264,10 @@ static int panthor_vm_prepare_map_op_ctx
+ 	op_ctx->rsvd_page_tables.pages = kcalloc(pt_count,
+ 						 sizeof(*op_ctx->rsvd_page_tables.pages),
+ 						 GFP_KERNEL);
+-	if (!op_ctx->rsvd_page_tables.pages)
++	if (!op_ctx->rsvd_page_tables.pages) {
++		ret = -ENOMEM;
+ 		goto err_cleanup;
++	}
+ 
+ 	ret = kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, pt_count,
+ 				    op_ctx->rsvd_page_tables.pages);
+@@ -1318,8 +1320,10 @@ static int panthor_vm_prepare_unmap_op_c
+ 		op_ctx->rsvd_page_tables.pages = kcalloc(pt_count,
+ 							 sizeof(*op_ctx->rsvd_page_tables.pages),
+ 							 GFP_KERNEL);
+-		if (!op_ctx->rsvd_page_tables.pages)
++		if (!op_ctx->rsvd_page_tables.pages) {
++			ret = -ENOMEM;
+ 			goto err_cleanup;
++		}
+ 
+ 		ret = kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, pt_count,
+ 					    op_ctx->rsvd_page_tables.pages);
diff --git a/patches-6.6/034-54-v6.10-drm-panthor-Fix-error-code-in-panthor_gpu_init.patch b/patches-6.6/034-54-v6.10-drm-panthor-Fix-error-code-in-panthor_gpu_init.patch
new file mode 100644
index 0000000..4c72843
--- /dev/null
+++ b/patches-6.6/034-54-v6.10-drm-panthor-Fix-error-code-in-panthor_gpu_init.patch
@@ -0,0 +1,33 @@
+From 99b74db1e27145bdf0afb85559aa70d951569ac3 Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@linaro.org>
+Date: Tue, 2 Apr 2024 12:56:19 +0300
+Subject: [PATCH] drm/panthor: Fix error code in panthor_gpu_init()
+
+This code accidentally returns zero/success on error because of a typo.
+It should be "irq" instead of "ret".  The other thing is that if
+platform_get_irq_byname() were to return zero then the error code would
+be cmplicated.  Fortunately, it does not so we can just change <= to
+< 0.
+
+Fixes: 5cd894e258c4 ("drm/panthor: Add the GPU logical block")
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/d753e684-43ee-45c2-a1fd-86222da204e1@moroto.mountain
+---
+ drivers/gpu/drm/panthor/panthor_gpu.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/panthor/panthor_gpu.c
++++ b/drivers/gpu/drm/panthor/panthor_gpu.c
+@@ -211,8 +211,8 @@ int panthor_gpu_init(struct panthor_devi
+ 		return ret;
+ 
+ 	irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "gpu");
+-	if (irq <= 0)
+-		return ret;
++	if (irq < 0)
++		return irq;
+ 
+ 	ret = panthor_request_gpu_irq(ptdev, &ptdev->gpu->irq, irq, GPU_INTERRUPTS_MASK);
+ 	if (ret)
diff --git a/patches-6.6/034-55-v6.10-drm-panthor-Fix-off-by-one-in-panthor_fw_get_cs_iface.patch b/patches-6.6/034-55-v6.10-drm-panthor-Fix-off-by-one-in-panthor_fw_get_cs_iface.patch
new file mode 100644
index 0000000..9b10ef9
--- /dev/null
+++ b/patches-6.6/034-55-v6.10-drm-panthor-Fix-off-by-one-in-panthor_fw_get_cs_iface.patch
@@ -0,0 +1,28 @@
+From 2b5890786014b926f845402ae80ebc71c4bd6d5c Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@linaro.org>
+Date: Tue, 2 Apr 2024 12:56:42 +0300
+Subject: [PATCH] drm/panthor: Fix off by one in panthor_fw_get_cs_iface()
+
+The ->iface.streams[csg_slot][] array has MAX_CS_PER_CSG elements so
+this > comparison needs to be >= to prevent an out of bounds access.
+
+Fixes: 2718d91816ee ("drm/panthor: Add the FW logical block")
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/62835c16-c85c-483d-a8fe-63be78d49d15@moroto.mountain
+---
+ drivers/gpu/drm/panthor/panthor_fw.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/panthor/panthor_fw.c
++++ b/drivers/gpu/drm/panthor/panthor_fw.c
+@@ -308,7 +308,7 @@ panthor_fw_get_csg_iface(struct panthor_
+ struct panthor_fw_cs_iface *
+ panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot)
+ {
+-	if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS || cs_slot > MAX_CS_PER_CSG))
++	if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS || cs_slot >= MAX_CS_PER_CSG))
+ 		return NULL;
+ 
+ 	return &ptdev->fw->iface.streams[csg_slot][cs_slot];
diff --git a/patches-6.6/034-56-v6.10-drm-panthor-Don-t-return-NULL-from.patch b/patches-6.6/034-56-v6.10-drm-panthor-Don-t-return-NULL-from.patch
new file mode 100644
index 0000000..e9d7eba
--- /dev/null
+++ b/patches-6.6/034-56-v6.10-drm-panthor-Don-t-return-NULL-from.patch
@@ -0,0 +1,58 @@
+From 45c734fdd43db14444025910b4c59dd2b8be714a Mon Sep 17 00:00:00 2001
+From: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
+Date: Tue, 2 Apr 2024 07:14:11 -0700
+Subject: [PATCH] drm/panthor: Don't return NULL from
+ panthor_vm_get_heap_pool()
+
+The kernel doc says this function returns either a valid pointer
+or an ERR_PTR(), but in practice this function can return NULL if
+create=false. Fix the function to match the doc (return
+ERR_PTR(-ENOENT) instead of NULL) and adjust all call-sites
+accordingly.
+
+Fixes: 4bdca1150792 ("drm/panthor: Add the driver frontend block")
+Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
+Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240402141412.1707949-1-harshit.m.mogalapalli@oracle.com
+---
+ drivers/gpu/drm/panthor/panthor_drv.c   | 4 ++--
+ drivers/gpu/drm/panthor/panthor_mmu.c   | 2 ++
+ drivers/gpu/drm/panthor/panthor_sched.c | 2 +-
+ 3 files changed, 5 insertions(+), 3 deletions(-)
+
+--- a/drivers/gpu/drm/panthor/panthor_drv.c
++++ b/drivers/gpu/drm/panthor/panthor_drv.c
+@@ -1089,8 +1089,8 @@ static int panthor_ioctl_tiler_heap_dest
+ 		return -EINVAL;
+ 
+ 	pool = panthor_vm_get_heap_pool(vm, false);
+-	if (!pool) {
+-		ret = -EINVAL;
++	if (IS_ERR(pool)) {
++		ret = PTR_ERR(pool);
+ 		goto out_put_vm;
+ 	}
+ 
+--- a/drivers/gpu/drm/panthor/panthor_mmu.c
++++ b/drivers/gpu/drm/panthor/panthor_mmu.c
+@@ -1897,6 +1897,8 @@ struct panthor_heap_pool *panthor_vm_get
+ 			vm->heaps.pool = panthor_heap_pool_get(pool);
+ 	} else {
+ 		pool = panthor_heap_pool_get(vm->heaps.pool);
++		if (!pool)
++			pool = ERR_PTR(-ENOENT);
+ 	}
+ 	mutex_unlock(&vm->heaps.lock);
+ 
+--- a/drivers/gpu/drm/panthor/panthor_sched.c
++++ b/drivers/gpu/drm/panthor/panthor_sched.c
+@@ -1343,7 +1343,7 @@ static int group_process_tiler_oom(struc
+ 	if (unlikely(csg_id < 0))
+ 		return 0;
+ 
+-	if (!heaps || frag_end > vt_end || vt_end >= vt_start) {
++	if (IS_ERR(heaps) || frag_end > vt_end || vt_end >= vt_start) {
+ 		ret = -EINVAL;
+ 	} else {
+ 		/* We do the allocation without holding the scheduler lock to avoid
diff --git a/patches-6.6/034-57-v6.10-drm-panthor-Fix-NULL-vs-IS_ERR-bug-in-panthor_probe.patch b/patches-6.6/034-57-v6.10-drm-panthor-Fix-NULL-vs-IS_ERR-bug-in-panthor_probe.patch
new file mode 100644
index 0000000..6bb8516
--- /dev/null
+++ b/patches-6.6/034-57-v6.10-drm-panthor-Fix-NULL-vs-IS_ERR-bug-in-panthor_probe.patch
@@ -0,0 +1,28 @@
+From 6e0718f21feda0ed97f932cee39b676817e457f2 Mon Sep 17 00:00:00 2001
+From: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
+Date: Tue, 2 Apr 2024 03:40:40 -0700
+Subject: [PATCH] drm/panthor: Fix NULL vs IS_ERR() bug in panthor_probe()
+
+The devm_drm_dev_alloc() function returns error pointers.
+Update the error handling to check for error pointers instead of NULL.
+
+Fixes: 4bdca1150792 ("drm/panthor: Add the driver frontend block")
+Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
+Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240402104041.1689951-1-harshit.m.mogalapalli@oracle.com
+---
+ drivers/gpu/drm/panthor/panthor_drv.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/panthor/panthor_drv.c
++++ b/drivers/gpu/drm/panthor/panthor_drv.c
+@@ -1399,7 +1399,7 @@ static int panthor_probe(struct platform
+ 
+ 	ptdev = devm_drm_dev_alloc(&pdev->dev, &panthor_drm_driver,
+ 				   struct panthor_device, base);
+-	if (!ptdev)
++	if (IS_ERR(ptdev))
+ 		return -ENOMEM;
+ 
+ 	platform_set_drvdata(pdev, ptdev);
diff --git a/patches-6.6/034-58-v6.10-drm-panthor-clean-up-some-types-in-panthor_sched_suspend.patch b/patches-6.6/034-58-v6.10-drm-panthor-clean-up-some-types-in-panthor_sched_suspend.patch
new file mode 100644
index 0000000..b90fcef
--- /dev/null
+++ b/patches-6.6/034-58-v6.10-drm-panthor-clean-up-some-types-in-panthor_sched_suspend.patch
@@ -0,0 +1,34 @@
+From a9b7dfd1d1f96be3a3f92128e9d78719a8d65939 Mon Sep 17 00:00:00 2001
+From: Dan Carpenter <dan.carpenter@linaro.org>
+Date: Mon, 8 Apr 2024 10:36:35 +0300
+Subject: [PATCH] drm/panthor: clean up some types in panthor_sched_suspend()
+
+These variables should be u32 instead of u64 because they're only
+storing u32 values.  Also static checkers complain when we do:
+
+	suspended_slots &= ~upd_ctx.timedout_mask;
+
+In this code "suspended_slots" is a u64 and "upd_ctx.timedout_mask".  The
+mask clears out the top 32 bits which would likely be a bug if anything
+were stored there.
+
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Reviewed-by: Steven Price <steven.price@arm.com>
+Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
+Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/85356b15-4840-4e64-8c75-922cdd6a5fef@moroto.mountain
+---
+ drivers/gpu/drm/panthor/panthor_sched.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/panthor/panthor_sched.c
++++ b/drivers/gpu/drm/panthor/panthor_sched.c
+@@ -2546,7 +2546,7 @@ void panthor_sched_suspend(struct pantho
+ {
+ 	struct panthor_scheduler *sched = ptdev->scheduler;
+ 	struct panthor_csg_slots_upd_ctx upd_ctx;
+-	u64 suspended_slots, faulty_slots;
++	u32 suspended_slots, faulty_slots;
+ 	struct panthor_group *group;
+ 	u32 i;
+ 
diff --git a/patches-6.6/111-irqchip-gic-v3-add-hackaround-for-rk3568-its.patch b/patches-6.6/111-irqchip-gic-v3-add-hackaround-for-rk3568-its.patch
index b1e7cd5..cf17e26 100644
--- a/patches-6.6/111-irqchip-gic-v3-add-hackaround-for-rk3568-its.patch
+++ b/patches-6.6/111-irqchip-gic-v3-add-hackaround-for-rk3568-its.patch
@@ -17,7 +17,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its
  #define ITS_FLAGS_FORCE_NON_SHAREABLE		(1ULL << 3)
  
  #define RD_LOCAL_LPI_ENABLED                    BIT(0)
-@@ -2206,6 +2207,11 @@ static struct page *its_allocate_prop_ta
+@@ -2186,6 +2187,11 @@ static struct page *its_allocate_prop_ta
  {
  	struct page *prop_page;
  
@@ -29,7 +29,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its
  	prop_page = alloc_pages(gfp_flags, get_order(LPI_PROPBASE_SZ));
  	if (!prop_page)
  		return NULL;
-@@ -2329,6 +2335,7 @@ static int its_setup_baser(struct its_no
+@@ -2309,6 +2315,7 @@ static int its_setup_baser(struct its_no
  	u32 alloc_pages, psz;
  	struct page *page;
  	void *base;
@@ -37,7 +37,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its
  
  	psz = baser->psz;
  	alloc_pages = (PAGE_ORDER_TO_SIZE(order) / psz);
-@@ -2340,7 +2347,10 @@ static int its_setup_baser(struct its_no
+@@ -2320,7 +2327,10 @@ static int its_setup_baser(struct its_no
  		order = get_order(GITS_BASER_PAGES_MAX * psz);
  	}
  
@@ -49,7 +49,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its
  	if (!page)
  		return -ENOMEM;
  
-@@ -2390,6 +2400,13 @@ retry_baser:
+@@ -2370,6 +2380,13 @@ retry_baser:
  	its_write_baser(its, baser, val);
  	tmp = baser->val;
  
@@ -63,7 +63,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its
  	if ((val ^ tmp) & GITS_BASER_SHAREABILITY_MASK) {
  		/*
  		 * Shareability didn't stick. Just use
-@@ -2980,6 +2997,10 @@ static struct page *its_allocate_pending
+@@ -2960,6 +2977,10 @@ static struct page *its_allocate_pending
  {
  	struct page *pend_page;
  
@@ -74,7 +74,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its
  	pend_page = alloc_pages(gfp_flags | __GFP_ZERO,
  				get_order(LPI_PENDBASE_SZ));
  	if (!pend_page)
-@@ -3135,6 +3156,9 @@ static void its_cpu_init_lpis(void)
+@@ -3115,6 +3136,9 @@ static void its_cpu_init_lpis(void)
  	gicr_write_propbaser(val, rbase + GICR_PROPBASER);
  	tmp = gicr_read_propbaser(rbase + GICR_PROPBASER);
  
@@ -84,7 +84,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its
  	if (!rdists_support_shareable())
  		tmp &= ~GICR_PROPBASER_SHAREABILITY_MASK;
  
-@@ -3162,6 +3186,9 @@ static void its_cpu_init_lpis(void)
+@@ -3142,6 +3166,9 @@ static void its_cpu_init_lpis(void)
  	gicr_write_pendbaser(val, rbase + GICR_PENDBASER);
  	tmp = gicr_read_pendbaser(rbase + GICR_PENDBASER);
  
@@ -94,7 +94,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its
  	if (!rdists_support_shareable())
  		tmp &= ~GICR_PENDBASER_SHAREABILITY_MASK;
  
-@@ -3328,7 +3355,12 @@ static bool its_alloc_table_entry(struct
+@@ -3308,7 +3335,12 @@ static bool its_alloc_table_entry(struct
  
  	/* Allocate memory for 2nd level table */
  	if (!table[idx]) {
@@ -108,7 +108,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its
  					get_order(baser->psz));
  		if (!page)
  			return false;
-@@ -3417,6 +3449,7 @@ static struct its_device *its_create_dev
+@@ -3397,6 +3429,7 @@ static struct its_device *its_create_dev
  	int nr_lpis;
  	int nr_ites;
  	int sz;
@@ -116,7 +116,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its
  
  	if (!its_alloc_device_table(its, dev_id))
  		return NULL;
-@@ -3424,7 +3457,11 @@ static struct its_device *its_create_dev
+@@ -3404,7 +3437,11 @@ static struct its_device *its_create_dev
  	if (WARN_ON(!is_power_of_2(nvecs)))
  		nvecs = roundup_pow_of_two(nvecs);
  
@@ -129,7 +129,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its
  	/*
  	 * Even if the device wants a single LPI, the ITT must be
  	 * sized as a power of two (and you need at least one bit...).
-@@ -3432,7 +3469,7 @@ static struct its_device *its_create_dev
+@@ -3412,7 +3449,7 @@ static struct its_device *its_create_dev
  	nr_ites = max(2, nvecs);
  	sz = nr_ites * (FIELD_GET(GITS_TYPER_ITT_ENTRY_SIZE, its->typer) + 1);
  	sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1;
@@ -138,7 +138,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its
  	if (alloc_lpis) {
  		lpi_map = its_lpi_alloc(nvecs, &lpi_base, &nr_lpis);
  		if (lpi_map)
-@@ -4774,6 +4811,13 @@ static bool its_set_non_coherent(void *d
+@@ -4754,6 +4791,13 @@ static bool its_set_non_coherent(void *d
  	return true;
  }
  
@@ -152,7 +152,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its
  static const struct gic_quirk its_quirks[] = {
  #ifdef CONFIG_CAVIUM_ERRATUM_22375
  	{
-@@ -4828,6 +4872,14 @@ static const struct gic_quirk its_quirks
+@@ -4808,6 +4852,14 @@ static const struct gic_quirk its_quirks
  		.init   = its_enable_rk3588001,
  	},
  #endif
@@ -167,7 +167,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its
  	{
  		.desc   = "ITS: non-coherent attribute",
  		.property = "dma-noncoherent",
-@@ -5089,6 +5141,7 @@ static int __init its_probe_one(struct i
+@@ -5069,6 +5121,7 @@ static int __init its_probe_one(struct i
  	struct page *page;
  	u32 ctlr;
  	int err;
@@ -175,7 +175,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its
  
  	its_enable_quirks(its);
  
-@@ -5122,7 +5175,9 @@ static int __init its_probe_one(struct i
+@@ -5102,7 +5155,9 @@ static int __init its_probe_one(struct i
  		}
  	}
  
@@ -186,7 +186,7 @@ Subject: [PATCH] irqchip: gic-v3: add hackaround for rk3568 its
  				get_order(ITS_CMD_QUEUE_SZ));
  	if (!page) {
  		err = -ENOMEM;
-@@ -5148,6 +5203,9 @@ static int __init its_probe_one(struct i
+@@ -5128,6 +5183,9 @@ static int __init its_probe_one(struct i
  	gits_write_cbaser(baser, its->base + GITS_CBASER);
  	tmp = gits_read_cbaser(its->base + GITS_CBASER);