1930 lines
57 KiB
Diff
1930 lines
57 KiB
Diff
From 2718d91816eeed03c09c8abe872e45f59078768c Mon Sep 17 00:00:00 2001
|
|
From: Boris Brezillon <boris.brezillon@collabora.com>
|
|
Date: Thu, 29 Feb 2024 17:22:22 +0100
|
|
Subject: [PATCH] drm/panthor: Add the FW logical block
|
|
|
|
Contains everything that's FW related, that includes the code dealing
|
|
with the microcontroller unit (MCU) that's running the FW, and anything
|
|
related to allocating memory shared between the FW and the CPU.
|
|
|
|
A few global FW events are processed in the IRQ handler, the rest is
|
|
forwarded to the scheduler, since scheduling is the primary reason for
|
|
the FW existence, and also the main source of FW <-> kernel
|
|
interactions.
|
|
|
|
v6:
|
|
- Add Maxime's and Heiko's acks
|
|
- Keep header inclusion alphabetically ordered
|
|
|
|
v5:
|
|
- Fix typo in GLB_PERFCNT_SAMPLE definition
|
|
- Fix unbalanced panthor_vm_idle/active() calls
|
|
- Fallback to a slow reset when the fast reset fails
|
|
- Add extra information when reporting a FW boot failure
|
|
|
|
v4:
|
|
- Add a MODULE_FIRMWARE() entry for gen 10.8
|
|
- Fix a wrong return ERR_PTR() in panthor_fw_load_section_entry()
|
|
- Fix typos
|
|
- Add Steve's R-b
|
|
|
|
v3:
|
|
- Make the FW path more future-proof (Liviu)
|
|
- Use one waitqueue for all FW events
|
|
- Simplify propagation of FW events to the scheduler logic
|
|
- Drop the panthor_fw_mem abstraction and use panthor_kernel_bo instead
|
|
- Account for the panthor_vm changes
|
|
- Replace magic number with 0x7fffffff with ~0 to better signify that
|
|
it's the maximum permitted value.
|
|
- More accurate rounding when computing the firmware timeout.
|
|
- Add a 'sub iterator' helper function. This also adds a check that a
|
|
firmware entry doesn't overflow the firmware image.
|
|
- Drop __packed from FW structures, natural alignment is good enough.
|
|
- Other minor code improvements.
|
|
|
|
Co-developed-by: Steven Price <steven.price@arm.com>
|
|
Signed-off-by: Steven Price <steven.price@arm.com>
|
|
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
|
|
Reviewed-by: Steven Price <steven.price@arm.com>
|
|
Acked-by: Maxime Ripard <mripard@kernel.org>
|
|
Acked-by: Heiko Stuebner <heiko@sntech.de>
|
|
Link: https://patchwork.freedesktop.org/patch/msgid/20240229162230.2634044-9-boris.brezillon@collabora.com
|
|
---
|
|
drivers/gpu/drm/panthor/panthor_fw.c | 1362 ++++++++++++++++++++++++++
|
|
drivers/gpu/drm/panthor/panthor_fw.h | 503 ++++++++++
|
|
2 files changed, 1865 insertions(+)
|
|
create mode 100644 drivers/gpu/drm/panthor/panthor_fw.c
|
|
create mode 100644 drivers/gpu/drm/panthor/panthor_fw.h
|
|
|
|
--- /dev/null
|
|
+++ b/drivers/gpu/drm/panthor/panthor_fw.c
|
|
@@ -0,0 +1,1362 @@
|
|
+// SPDX-License-Identifier: GPL-2.0 or MIT
|
|
+/* Copyright 2023 Collabora ltd. */
|
|
+
|
|
+#ifdef CONFIG_ARM_ARCH_TIMER
|
|
+#include <asm/arch_timer.h>
|
|
+#endif
|
|
+
|
|
+#include <linux/clk.h>
|
|
+#include <linux/dma-mapping.h>
|
|
+#include <linux/firmware.h>
|
|
+#include <linux/iopoll.h>
|
|
+#include <linux/iosys-map.h>
|
|
+#include <linux/mutex.h>
|
|
+#include <linux/platform_device.h>
|
|
+
|
|
+#include <drm/drm_drv.h>
|
|
+#include <drm/drm_managed.h>
|
|
+
|
|
+#include "panthor_device.h"
|
|
+#include "panthor_fw.h"
|
|
+#include "panthor_gem.h"
|
|
+#include "panthor_gpu.h"
|
|
+#include "panthor_mmu.h"
|
|
+#include "panthor_regs.h"
|
|
+#include "panthor_sched.h"
|
|
+
|
|
+#define CSF_FW_NAME "mali_csffw.bin"
|
|
+
|
|
+#define PING_INTERVAL_MS 12000
|
|
+#define PROGRESS_TIMEOUT_CYCLES (5ull * 500 * 1024 * 1024)
|
|
+#define PROGRESS_TIMEOUT_SCALE_SHIFT 10
|
|
+#define IDLE_HYSTERESIS_US 800
|
|
+#define PWROFF_HYSTERESIS_US 10000
|
|
+
|
|
+/**
|
|
+ * struct panthor_fw_binary_hdr - Firmware binary header.
|
|
+ */
|
|
+struct panthor_fw_binary_hdr {
|
|
+ /** @magic: Magic value to check binary validity. */
|
|
+ u32 magic;
|
|
+#define CSF_FW_BINARY_HEADER_MAGIC 0xc3f13a6e
|
|
+
|
|
+ /** @minor: Minor FW version. */
|
|
+ u8 minor;
|
|
+
|
|
+ /** @major: Major FW version. */
|
|
+ u8 major;
|
|
+#define CSF_FW_BINARY_HEADER_MAJOR_MAX 0
|
|
+
|
|
+ /** @padding1: MBZ. */
|
|
+ u16 padding1;
|
|
+
|
|
+ /** @version_hash: FW version hash. */
|
|
+ u32 version_hash;
|
|
+
|
|
+ /** @padding2: MBZ. */
|
|
+ u32 padding2;
|
|
+
|
|
+ /** @size: FW binary size. */
|
|
+ u32 size;
|
|
+};
|
|
+
|
|
+/**
|
|
+ * enum panthor_fw_binary_entry_type - Firmware binary entry type
|
|
+ */
|
|
+enum panthor_fw_binary_entry_type {
|
|
+ /** @CSF_FW_BINARY_ENTRY_TYPE_IFACE: Host <-> FW interface. */
|
|
+ CSF_FW_BINARY_ENTRY_TYPE_IFACE = 0,
|
|
+
|
|
+ /** @CSF_FW_BINARY_ENTRY_TYPE_CONFIG: FW config. */
|
|
+ CSF_FW_BINARY_ENTRY_TYPE_CONFIG = 1,
|
|
+
|
|
+ /** @CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST: Unit-tests. */
|
|
+ CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST = 2,
|
|
+
|
|
+ /** @CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER: Trace buffer interface. */
|
|
+ CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER = 3,
|
|
+
|
|
+ /** @CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: Timeline metadata interface. */
|
|
+ CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA = 4,
|
|
+};
|
|
+
|
|
+#define CSF_FW_BINARY_ENTRY_TYPE(ehdr) ((ehdr) & 0xff)
|
|
+#define CSF_FW_BINARY_ENTRY_SIZE(ehdr) (((ehdr) >> 8) & 0xff)
|
|
+#define CSF_FW_BINARY_ENTRY_UPDATE BIT(30)
|
|
+#define CSF_FW_BINARY_ENTRY_OPTIONAL BIT(31)
|
|
+
|
|
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_RD BIT(0)
|
|
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_WR BIT(1)
|
|
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_EX BIT(2)
|
|
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_NONE (0 << 3)
|
|
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED (1 << 3)
|
|
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_UNCACHED_COHERENT (2 << 3)
|
|
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED_COHERENT (3 << 3)
|
|
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK GENMASK(4, 3)
|
|
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_PROT BIT(5)
|
|
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED BIT(30)
|
|
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO BIT(31)
|
|
+
|
|
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS \
|
|
+ (CSF_FW_BINARY_IFACE_ENTRY_RD_RD | \
|
|
+ CSF_FW_BINARY_IFACE_ENTRY_RD_WR | \
|
|
+ CSF_FW_BINARY_IFACE_ENTRY_RD_EX | \
|
|
+ CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK | \
|
|
+ CSF_FW_BINARY_IFACE_ENTRY_RD_PROT | \
|
|
+ CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED | \
|
|
+ CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO)
|
|
+
|
|
+/**
|
|
+ * struct panthor_fw_binary_section_entry_hdr - Describes a section of FW binary
|
|
+ */
|
|
+struct panthor_fw_binary_section_entry_hdr {
|
|
+ /** @flags: Section flags. */
|
|
+ u32 flags;
|
|
+
|
|
+ /** @va: MCU virtual range to map this binary section to. */
|
|
+ struct {
|
|
+ /** @start: Start address. */
|
|
+ u32 start;
|
|
+
|
|
+ /** @end: End address. */
|
|
+ u32 end;
|
|
+ } va;
|
|
+
|
|
+ /** @data: Data to initialize the FW section with. */
|
|
+ struct {
|
|
+ /** @start: Start offset in the FW binary. */
|
|
+ u32 start;
|
|
+
|
|
+ /** @end: End offset in the FW binary. */
|
|
+ u32 end;
|
|
+ } data;
|
|
+};
|
|
+
|
|
+/**
|
|
+ * struct panthor_fw_binary_iter - Firmware binary iterator
|
|
+ *
|
|
+ * Used to parse a firmware binary.
|
|
+ */
|
|
+struct panthor_fw_binary_iter {
|
|
+ /** @data: FW binary data. */
|
|
+ const void *data;
|
|
+
|
|
+ /** @size: FW binary size. */
|
|
+ size_t size;
|
|
+
|
|
+ /** @offset: Iterator offset. */
|
|
+ size_t offset;
|
|
+};
|
|
+
|
|
+/**
|
|
+ * struct panthor_fw_section - FW section
|
|
+ */
|
|
+struct panthor_fw_section {
|
|
+ /** @node: Used to keep track of FW sections. */
|
|
+ struct list_head node;
|
|
+
|
|
+ /** @flags: Section flags, as encoded in the FW binary. */
|
|
+ u32 flags;
|
|
+
|
|
+ /** @mem: Section memory. */
|
|
+ struct panthor_kernel_bo *mem;
|
|
+
|
|
+ /**
|
|
+ * @name: Name of the section, as specified in the binary.
|
|
+ *
|
|
+ * Can be NULL.
|
|
+ */
|
|
+ const char *name;
|
|
+
|
|
+ /**
|
|
+ * @data: Initial data copied to the FW memory.
|
|
+ *
|
|
+ * We keep data around so we can reload sections after a reset.
|
|
+ */
|
|
+ struct {
|
|
+ /** @buf: Buffed used to store init data. */
|
|
+ const void *buf;
|
|
+
|
|
+ /** @size: Size of @buf in bytes. */
|
|
+ size_t size;
|
|
+ } data;
|
|
+};
|
|
+
|
|
+#define CSF_MCU_SHARED_REGION_START 0x04000000ULL
|
|
+#define CSF_MCU_SHARED_REGION_SIZE 0x04000000ULL
|
|
+
|
|
+#define MIN_CS_PER_CSG 8
|
|
+#define MIN_CSGS 3
|
|
+#define MAX_CSG_PRIO 0xf
|
|
+
|
|
+#define CSF_IFACE_VERSION(major, minor, patch) \
|
|
+ (((major) << 24) | ((minor) << 16) | (patch))
|
|
+#define CSF_IFACE_VERSION_MAJOR(v) ((v) >> 24)
|
|
+#define CSF_IFACE_VERSION_MINOR(v) (((v) >> 16) & 0xff)
|
|
+#define CSF_IFACE_VERSION_PATCH(v) ((v) & 0xffff)
|
|
+
|
|
+#define CSF_GROUP_CONTROL_OFFSET 0x1000
|
|
+#define CSF_STREAM_CONTROL_OFFSET 0x40
|
|
+#define CSF_UNPRESERVED_REG_COUNT 4
|
|
+
|
|
+/**
|
|
+ * struct panthor_fw_iface - FW interfaces
|
|
+ */
|
|
+struct panthor_fw_iface {
|
|
+ /** @global: Global interface. */
|
|
+ struct panthor_fw_global_iface global;
|
|
+
|
|
+ /** @groups: Group slot interfaces. */
|
|
+ struct panthor_fw_csg_iface groups[MAX_CSGS];
|
|
+
|
|
+ /** @streams: Command stream slot interfaces. */
|
|
+ struct panthor_fw_cs_iface streams[MAX_CSGS][MAX_CS_PER_CSG];
|
|
+};
|
|
+
|
|
+/**
|
|
+ * struct panthor_fw - Firmware management
|
|
+ */
|
|
+struct panthor_fw {
|
|
+ /** @vm: MCU VM. */
|
|
+ struct panthor_vm *vm;
|
|
+
|
|
+ /** @sections: List of FW sections. */
|
|
+ struct list_head sections;
|
|
+
|
|
+ /** @shared_section: The section containing the FW interfaces. */
|
|
+ struct panthor_fw_section *shared_section;
|
|
+
|
|
+ /** @iface: FW interfaces. */
|
|
+ struct panthor_fw_iface iface;
|
|
+
|
|
+ /** @watchdog: Collection of fields relating to the FW watchdog. */
|
|
+ struct {
|
|
+ /** @ping_work: Delayed work used to ping the FW. */
|
|
+ struct delayed_work ping_work;
|
|
+ } watchdog;
|
|
+
|
|
+ /**
|
|
+ * @req_waitqueue: FW request waitqueue.
|
|
+ *
|
|
+ * Everytime a request is sent to a command stream group or the global
|
|
+ * interface, the caller will first busy wait for the request to be
|
|
+ * acknowledged, and then fallback to a sleeping wait.
|
|
+ *
|
|
+ * This wait queue is here to support the sleeping wait flavor.
|
|
+ */
|
|
+ wait_queue_head_t req_waitqueue;
|
|
+
|
|
+ /** @booted: True is the FW is booted */
|
|
+ bool booted;
|
|
+
|
|
+ /**
|
|
+ * @fast_reset: True if the post_reset logic can proceed with a fast reset.
|
|
+ *
|
|
+ * A fast reset is just a reset where the driver doesn't reload the FW sections.
|
|
+ *
|
|
+ * Any time the firmware is properly suspended, a fast reset can take place.
|
|
+ * On the other hand, if the halt operation failed, the driver will reload
|
|
+ * all sections to make sure we start from a fresh state.
|
|
+ */
|
|
+ bool fast_reset;
|
|
+
|
|
+ /** @irq: Job irq data. */
|
|
+ struct panthor_irq irq;
|
|
+};
|
|
+
|
|
+struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev)
|
|
+{
|
|
+ return ptdev->fw->vm;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * panthor_fw_get_glb_iface() - Get the global interface
|
|
+ * @ptdev: Device.
|
|
+ *
|
|
+ * Return: The global interface.
|
|
+ */
|
|
+struct panthor_fw_global_iface *
|
|
+panthor_fw_get_glb_iface(struct panthor_device *ptdev)
|
|
+{
|
|
+ return &ptdev->fw->iface.global;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * panthor_fw_get_csg_iface() - Get a command stream group slot interface
|
|
+ * @ptdev: Device.
|
|
+ * @csg_slot: Index of the command stream group slot.
|
|
+ *
|
|
+ * Return: The command stream group slot interface.
|
|
+ */
|
|
+struct panthor_fw_csg_iface *
|
|
+panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot)
|
|
+{
|
|
+ if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS))
|
|
+ return NULL;
|
|
+
|
|
+ return &ptdev->fw->iface.groups[csg_slot];
|
|
+}
|
|
+
|
|
+/**
|
|
+ * panthor_fw_get_cs_iface() - Get a command stream slot interface
|
|
+ * @ptdev: Device.
|
|
+ * @csg_slot: Index of the command stream group slot.
|
|
+ * @cs_slot: Index of the command stream slot.
|
|
+ *
|
|
+ * Return: The command stream slot interface.
|
|
+ */
|
|
+struct panthor_fw_cs_iface *
|
|
+panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot)
|
|
+{
|
|
+ if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS || cs_slot > MAX_CS_PER_CSG))
|
|
+ return NULL;
|
|
+
|
|
+ return &ptdev->fw->iface.streams[csg_slot][cs_slot];
|
|
+}
|
|
+
|
|
+/**
|
|
+ * panthor_fw_conv_timeout() - Convert a timeout into a cycle-count
|
|
+ * @ptdev: Device.
|
|
+ * @timeout_us: Timeout expressed in micro-seconds.
|
|
+ *
|
|
+ * The FW has two timer sources: the GPU counter or arch-timer. We need
|
|
+ * to express timeouts in term of number of cycles and specify which
|
|
+ * timer source should be used.
|
|
+ *
|
|
+ * Return: A value suitable for timeout fields in the global interface.
|
|
+ */
|
|
+static u32 panthor_fw_conv_timeout(struct panthor_device *ptdev, u32 timeout_us)
|
|
+{
|
|
+ bool use_cycle_counter = false;
|
|
+ u32 timer_rate = 0;
|
|
+ u64 mod_cycles;
|
|
+
|
|
+#ifdef CONFIG_ARM_ARCH_TIMER
|
|
+ timer_rate = arch_timer_get_cntfrq();
|
|
+#endif
|
|
+
|
|
+ if (!timer_rate) {
|
|
+ use_cycle_counter = true;
|
|
+ timer_rate = clk_get_rate(ptdev->clks.core);
|
|
+ }
|
|
+
|
|
+ if (drm_WARN_ON(&ptdev->base, !timer_rate)) {
|
|
+ /* We couldn't get a valid clock rate, let's just pick the
|
|
+ * maximum value so the FW still handles the core
|
|
+ * power on/off requests.
|
|
+ */
|
|
+ return GLB_TIMER_VAL(~0) |
|
|
+ GLB_TIMER_SOURCE_GPU_COUNTER;
|
|
+ }
|
|
+
|
|
+ mod_cycles = DIV_ROUND_UP_ULL((u64)timeout_us * timer_rate,
|
|
+ 1000000ull << 10);
|
|
+ if (drm_WARN_ON(&ptdev->base, mod_cycles > GLB_TIMER_VAL(~0)))
|
|
+ mod_cycles = GLB_TIMER_VAL(~0);
|
|
+
|
|
+ return GLB_TIMER_VAL(mod_cycles) |
|
|
+ (use_cycle_counter ? GLB_TIMER_SOURCE_GPU_COUNTER : 0);
|
|
+}
|
|
+
|
|
+static int panthor_fw_binary_iter_read(struct panthor_device *ptdev,
|
|
+ struct panthor_fw_binary_iter *iter,
|
|
+ void *out, size_t size)
|
|
+{
|
|
+ size_t new_offset = iter->offset + size;
|
|
+
|
|
+ if (new_offset > iter->size || new_offset < iter->offset) {
|
|
+ drm_err(&ptdev->base, "Firmware too small\n");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ memcpy(out, iter->data + iter->offset, size);
|
|
+ iter->offset = new_offset;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int panthor_fw_binary_sub_iter_init(struct panthor_device *ptdev,
|
|
+ struct panthor_fw_binary_iter *iter,
|
|
+ struct panthor_fw_binary_iter *sub_iter,
|
|
+ size_t size)
|
|
+{
|
|
+ size_t new_offset = iter->offset + size;
|
|
+
|
|
+ if (new_offset > iter->size || new_offset < iter->offset) {
|
|
+ drm_err(&ptdev->base, "Firmware entry too long\n");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ sub_iter->offset = 0;
|
|
+ sub_iter->data = iter->data + iter->offset;
|
|
+ sub_iter->size = size;
|
|
+ iter->offset = new_offset;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void panthor_fw_init_section_mem(struct panthor_device *ptdev,
|
|
+ struct panthor_fw_section *section)
|
|
+{
|
|
+ bool was_mapped = !!section->mem->kmap;
|
|
+ int ret;
|
|
+
|
|
+ if (!section->data.size &&
|
|
+ !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO))
|
|
+ return;
|
|
+
|
|
+ ret = panthor_kernel_bo_vmap(section->mem);
|
|
+ if (drm_WARN_ON(&ptdev->base, ret))
|
|
+ return;
|
|
+
|
|
+ memcpy(section->mem->kmap, section->data.buf, section->data.size);
|
|
+ if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO) {
|
|
+ memset(section->mem->kmap + section->data.size, 0,
|
|
+ panthor_kernel_bo_size(section->mem) - section->data.size);
|
|
+ }
|
|
+
|
|
+ if (!was_mapped)
|
|
+ panthor_kernel_bo_vunmap(section->mem);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * panthor_fw_alloc_queue_iface_mem() - Allocate a ring-buffer interfaces.
|
|
+ * @ptdev: Device.
|
|
+ * @input: Pointer holding the input interface on success.
|
|
+ * Should be ignored on failure.
|
|
+ * @output: Pointer holding the output interface on success.
|
|
+ * Should be ignored on failure.
|
|
+ * @input_fw_va: Pointer holding the input interface FW VA on success.
|
|
+ * Should be ignored on failure.
|
|
+ * @output_fw_va: Pointer holding the output interface FW VA on success.
|
|
+ * Should be ignored on failure.
|
|
+ *
|
|
+ * Allocates panthor_fw_ringbuf_{input,out}_iface interfaces. The input
|
|
+ * interface is at offset 0, and the output interface at offset 4096.
|
|
+ *
|
|
+ * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
|
|
+ */
|
|
+struct panthor_kernel_bo *
|
|
+panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev,
|
|
+ struct panthor_fw_ringbuf_input_iface **input,
|
|
+ const struct panthor_fw_ringbuf_output_iface **output,
|
|
+ u32 *input_fw_va, u32 *output_fw_va)
|
|
+{
|
|
+ struct panthor_kernel_bo *mem;
|
|
+ int ret;
|
|
+
|
|
+ mem = panthor_kernel_bo_create(ptdev, ptdev->fw->vm, SZ_8K,
|
|
+ DRM_PANTHOR_BO_NO_MMAP,
|
|
+ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
|
|
+ DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
|
|
+ PANTHOR_VM_KERNEL_AUTO_VA);
|
|
+ if (IS_ERR(mem))
|
|
+ return mem;
|
|
+
|
|
+ ret = panthor_kernel_bo_vmap(mem);
|
|
+ if (ret) {
|
|
+ panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), mem);
|
|
+ return ERR_PTR(ret);
|
|
+ }
|
|
+
|
|
+ memset(mem->kmap, 0, panthor_kernel_bo_size(mem));
|
|
+ *input = mem->kmap;
|
|
+ *output = mem->kmap + SZ_4K;
|
|
+ *input_fw_va = panthor_kernel_bo_gpuva(mem);
|
|
+ *output_fw_va = *input_fw_va + SZ_4K;
|
|
+
|
|
+ return mem;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * panthor_fw_alloc_suspend_buf_mem() - Allocate a suspend buffer for a command stream group.
|
|
+ * @ptdev: Device.
|
|
+ * @size: Size of the suspend buffer.
|
|
+ *
|
|
+ * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
|
|
+ */
|
|
+struct panthor_kernel_bo *
|
|
+panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size)
|
|
+{
|
|
+ return panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), size,
|
|
+ DRM_PANTHOR_BO_NO_MMAP,
|
|
+ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
|
|
+ PANTHOR_VM_KERNEL_AUTO_VA);
|
|
+}
|
|
+
|
|
+static int panthor_fw_load_section_entry(struct panthor_device *ptdev,
|
|
+ const struct firmware *fw,
|
|
+ struct panthor_fw_binary_iter *iter,
|
|
+ u32 ehdr)
|
|
+{
|
|
+ struct panthor_fw_binary_section_entry_hdr hdr;
|
|
+ struct panthor_fw_section *section;
|
|
+ u32 section_size;
|
|
+ u32 name_len;
|
|
+ int ret;
|
|
+
|
|
+ ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr));
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ if (hdr.data.end < hdr.data.start) {
|
|
+ drm_err(&ptdev->base, "Firmware corrupted, data.end < data.start (0x%x < 0x%x)\n",
|
|
+ hdr.data.end, hdr.data.start);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ if (hdr.va.end < hdr.va.start) {
|
|
+ drm_err(&ptdev->base, "Firmware corrupted, hdr.va.end < hdr.va.start (0x%x < 0x%x)\n",
|
|
+ hdr.va.end, hdr.va.start);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ if (hdr.data.end > fw->size) {
|
|
+ drm_err(&ptdev->base, "Firmware corrupted, file truncated? data_end=0x%x > fw size=0x%zx\n",
|
|
+ hdr.data.end, fw->size);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ if ((hdr.va.start & ~PAGE_MASK) != 0 ||
|
|
+ (hdr.va.end & ~PAGE_MASK) != 0) {
|
|
+ drm_err(&ptdev->base, "Firmware corrupted, virtual addresses not page aligned: 0x%x-0x%x\n",
|
|
+ hdr.va.start, hdr.va.end);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS) {
|
|
+ drm_err(&ptdev->base, "Firmware contains interface with unsupported flags (0x%x)\n",
|
|
+ hdr.flags);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_PROT) {
|
|
+ drm_warn(&ptdev->base,
|
|
+ "Firmware protected mode entry not be supported, ignoring");
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ if (hdr.va.start == CSF_MCU_SHARED_REGION_START &&
|
|
+ !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED)) {
|
|
+ drm_err(&ptdev->base,
|
|
+ "Interface at 0x%llx must be shared", CSF_MCU_SHARED_REGION_START);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ name_len = iter->size - iter->offset;
|
|
+
|
|
+ section = drmm_kzalloc(&ptdev->base, sizeof(*section), GFP_KERNEL);
|
|
+ if (!section)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ list_add_tail(§ion->node, &ptdev->fw->sections);
|
|
+ section->flags = hdr.flags;
|
|
+ section->data.size = hdr.data.end - hdr.data.start;
|
|
+
|
|
+ if (section->data.size > 0) {
|
|
+ void *data = drmm_kmalloc(&ptdev->base, section->data.size, GFP_KERNEL);
|
|
+
|
|
+ if (!data)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ memcpy(data, fw->data + hdr.data.start, section->data.size);
|
|
+ section->data.buf = data;
|
|
+ }
|
|
+
|
|
+ if (name_len > 0) {
|
|
+ char *name = drmm_kmalloc(&ptdev->base, name_len + 1, GFP_KERNEL);
|
|
+
|
|
+ if (!name)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ memcpy(name, iter->data + iter->offset, name_len);
|
|
+ name[name_len] = '\0';
|
|
+ section->name = name;
|
|
+ }
|
|
+
|
|
+ section_size = hdr.va.end - hdr.va.start;
|
|
+ if (section_size) {
|
|
+ u32 cache_mode = hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK;
|
|
+ struct panthor_gem_object *bo;
|
|
+ u32 vm_map_flags = 0;
|
|
+ struct sg_table *sgt;
|
|
+ u64 va = hdr.va.start;
|
|
+
|
|
+ if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR))
|
|
+ vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_READONLY;
|
|
+
|
|
+ if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_EX))
|
|
+ vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC;
|
|
+
|
|
+ /* TODO: CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_*_COHERENT are mapped to
|
|
+ * non-cacheable for now. We might want to introduce a new
|
|
+ * IOMMU_xxx flag (or abuse IOMMU_MMIO, which maps to device
|
|
+ * memory and is currently not used by our driver) for
|
|
+ * AS_MEMATTR_AARCH64_SHARED memory, so we can take benefit
|
|
+ * of IO-coherent systems.
|
|
+ */
|
|
+ if (cache_mode != CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED)
|
|
+ vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED;
|
|
+
|
|
+ section->mem = panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev),
|
|
+ section_size,
|
|
+ DRM_PANTHOR_BO_NO_MMAP,
|
|
+ vm_map_flags, va);
|
|
+ if (IS_ERR(section->mem))
|
|
+ return PTR_ERR(section->mem);
|
|
+
|
|
+ if (drm_WARN_ON(&ptdev->base, section->mem->va_node.start != hdr.va.start))
|
|
+ return -EINVAL;
|
|
+
|
|
+ if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED) {
|
|
+ ret = panthor_kernel_bo_vmap(section->mem);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ panthor_fw_init_section_mem(ptdev, section);
|
|
+
|
|
+ bo = to_panthor_bo(section->mem->obj);
|
|
+ sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
|
|
+ if (IS_ERR(sgt))
|
|
+ return PTR_ERR(sgt);
|
|
+
|
|
+ dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE);
|
|
+ }
|
|
+
|
|
+ if (hdr.va.start == CSF_MCU_SHARED_REGION_START)
|
|
+ ptdev->fw->shared_section = section;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void
|
|
+panthor_reload_fw_sections(struct panthor_device *ptdev, bool full_reload)
|
|
+{
|
|
+ struct panthor_fw_section *section;
|
|
+
|
|
+ list_for_each_entry(section, &ptdev->fw->sections, node) {
|
|
+ struct sg_table *sgt;
|
|
+
|
|
+ if (!full_reload && !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR))
|
|
+ continue;
|
|
+
|
|
+ panthor_fw_init_section_mem(ptdev, section);
|
|
+ sgt = drm_gem_shmem_get_pages_sgt(&to_panthor_bo(section->mem->obj)->base);
|
|
+ if (!drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(sgt)))
|
|
+ dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE);
|
|
+ }
|
|
+}
|
|
+
|
|
+static int panthor_fw_load_entry(struct panthor_device *ptdev,
|
|
+ const struct firmware *fw,
|
|
+ struct panthor_fw_binary_iter *iter)
|
|
+{
|
|
+ struct panthor_fw_binary_iter eiter;
|
|
+ u32 ehdr;
|
|
+ int ret;
|
|
+
|
|
+ ret = panthor_fw_binary_iter_read(ptdev, iter, &ehdr, sizeof(ehdr));
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ if ((iter->offset % sizeof(u32)) ||
|
|
+ (CSF_FW_BINARY_ENTRY_SIZE(ehdr) % sizeof(u32))) {
|
|
+ drm_err(&ptdev->base, "Firmware entry isn't 32 bit aligned, offset=0x%x size=0x%x\n",
|
|
+ (u32)(iter->offset - sizeof(u32)), CSF_FW_BINARY_ENTRY_SIZE(ehdr));
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ if (panthor_fw_binary_sub_iter_init(ptdev, iter, &eiter,
|
|
+ CSF_FW_BINARY_ENTRY_SIZE(ehdr) - sizeof(ehdr)))
|
|
+ return -EINVAL;
|
|
+
|
|
+ switch (CSF_FW_BINARY_ENTRY_TYPE(ehdr)) {
|
|
+ case CSF_FW_BINARY_ENTRY_TYPE_IFACE:
|
|
+ return panthor_fw_load_section_entry(ptdev, fw, &eiter, ehdr);
|
|
+
|
|
+ /* FIXME: handle those entry types? */
|
|
+ case CSF_FW_BINARY_ENTRY_TYPE_CONFIG:
|
|
+ case CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST:
|
|
+ case CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER:
|
|
+ case CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA:
|
|
+ return 0;
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ if (ehdr & CSF_FW_BINARY_ENTRY_OPTIONAL)
|
|
+ return 0;
|
|
+
|
|
+ drm_err(&ptdev->base,
|
|
+ "Unsupported non-optional entry type %u in firmware\n",
|
|
+ CSF_FW_BINARY_ENTRY_TYPE(ehdr));
|
|
+ return -EINVAL;
|
|
+}
|
|
+
|
|
+static int panthor_fw_load(struct panthor_device *ptdev)
|
|
+{
|
|
+ const struct firmware *fw = NULL;
|
|
+ struct panthor_fw_binary_iter iter = {};
|
|
+ struct panthor_fw_binary_hdr hdr;
|
|
+ char fw_path[128];
|
|
+ int ret;
|
|
+
|
|
+ snprintf(fw_path, sizeof(fw_path), "arm/mali/arch%d.%d/%s",
|
|
+ (u32)GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id),
|
|
+ (u32)GPU_ARCH_MINOR(ptdev->gpu_info.gpu_id),
|
|
+ CSF_FW_NAME);
|
|
+
|
|
+ ret = request_firmware(&fw, fw_path, ptdev->base.dev);
|
|
+ if (ret) {
|
|
+ drm_err(&ptdev->base, "Failed to load firmware image '%s'\n",
|
|
+ CSF_FW_NAME);
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ iter.data = fw->data;
|
|
+ iter.size = fw->size;
|
|
+ ret = panthor_fw_binary_iter_read(ptdev, &iter, &hdr, sizeof(hdr));
|
|
+ if (ret)
|
|
+ goto out;
|
|
+
|
|
+ if (hdr.magic != CSF_FW_BINARY_HEADER_MAGIC) {
|
|
+ ret = -EINVAL;
|
|
+ drm_err(&ptdev->base, "Invalid firmware magic\n");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (hdr.major != CSF_FW_BINARY_HEADER_MAJOR_MAX) {
|
|
+ ret = -EINVAL;
|
|
+ drm_err(&ptdev->base, "Unsupported firmware binary header version %d.%d (expected %d.x)\n",
|
|
+ hdr.major, hdr.minor, CSF_FW_BINARY_HEADER_MAJOR_MAX);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (hdr.size > iter.size) {
|
|
+ drm_err(&ptdev->base, "Firmware image is truncated\n");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ iter.size = hdr.size;
|
|
+
|
|
+ while (iter.offset < hdr.size) {
|
|
+ ret = panthor_fw_load_entry(ptdev, fw, &iter);
|
|
+ if (ret)
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (!ptdev->fw->shared_section) {
|
|
+ drm_err(&ptdev->base, "Shared interface region not found\n");
|
|
+ ret = -EINVAL;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+out:
|
|
+ release_firmware(fw);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * iface_fw_to_cpu_addr() - Turn an MCU address into a CPU address
|
|
+ * @ptdev: Device.
|
|
+ * @mcu_va: MCU address.
|
|
+ *
|
|
+ * Return: NULL if the address is not part of the shared section, non-NULL otherwise.
|
|
+ */
|
|
+static void *iface_fw_to_cpu_addr(struct panthor_device *ptdev, u32 mcu_va)
|
|
+{
|
|
+ u64 shared_mem_start = panthor_kernel_bo_gpuva(ptdev->fw->shared_section->mem);
|
|
+ u64 shared_mem_end = shared_mem_start +
|
|
+ panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
|
|
+ if (mcu_va < shared_mem_start || mcu_va >= shared_mem_end)
|
|
+ return NULL;
|
|
+
|
|
+ return ptdev->fw->shared_section->mem->kmap + (mcu_va - shared_mem_start);
|
|
+}
|
|
+
|
|
+static int panthor_init_cs_iface(struct panthor_device *ptdev,
|
|
+ unsigned int csg_idx, unsigned int cs_idx)
|
|
+{
|
|
+ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
|
|
+ struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_idx);
|
|
+ struct panthor_fw_cs_iface *cs_iface = &ptdev->fw->iface.streams[csg_idx][cs_idx];
|
|
+ u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
|
|
+ u32 iface_offset = CSF_GROUP_CONTROL_OFFSET +
|
|
+ (csg_idx * glb_iface->control->group_stride) +
|
|
+ CSF_STREAM_CONTROL_OFFSET +
|
|
+ (cs_idx * csg_iface->control->stream_stride);
|
|
+ struct panthor_fw_cs_iface *first_cs_iface =
|
|
+ panthor_fw_get_cs_iface(ptdev, 0, 0);
|
|
+
|
|
+ if (iface_offset + sizeof(*cs_iface) >= shared_section_sz)
|
|
+ return -EINVAL;
|
|
+
|
|
+ spin_lock_init(&cs_iface->lock);
|
|
+ cs_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
|
|
+ cs_iface->input = iface_fw_to_cpu_addr(ptdev, cs_iface->control->input_va);
|
|
+ cs_iface->output = iface_fw_to_cpu_addr(ptdev, cs_iface->control->output_va);
|
|
+
|
|
+ if (!cs_iface->input || !cs_iface->output) {
|
|
+ drm_err(&ptdev->base, "Invalid stream control interface input/output VA");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ if (cs_iface != first_cs_iface) {
|
|
+ if (cs_iface->control->features != first_cs_iface->control->features) {
|
|
+ drm_err(&ptdev->base, "Expecting identical CS slots");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ } else {
|
|
+ u32 reg_count = CS_FEATURES_WORK_REGS(cs_iface->control->features);
|
|
+
|
|
+ ptdev->csif_info.cs_reg_count = reg_count;
|
|
+ ptdev->csif_info.unpreserved_cs_reg_count = CSF_UNPRESERVED_REG_COUNT;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static bool compare_csg(const struct panthor_fw_csg_control_iface *a,
|
|
+ const struct panthor_fw_csg_control_iface *b)
|
|
+{
|
|
+ if (a->features != b->features)
|
|
+ return false;
|
|
+ if (a->suspend_size != b->suspend_size)
|
|
+ return false;
|
|
+ if (a->protm_suspend_size != b->protm_suspend_size)
|
|
+ return false;
|
|
+ if (a->stream_num != b->stream_num)
|
|
+ return false;
|
|
+ return true;
|
|
+}
|
|
+
|
|
+static int panthor_init_csg_iface(struct panthor_device *ptdev,
|
|
+ unsigned int csg_idx)
|
|
+{
|
|
+ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
|
|
+ struct panthor_fw_csg_iface *csg_iface = &ptdev->fw->iface.groups[csg_idx];
|
|
+ u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
|
|
+ u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + (csg_idx * glb_iface->control->group_stride);
|
|
+ unsigned int i;
|
|
+
|
|
+ if (iface_offset + sizeof(*csg_iface) >= shared_section_sz)
|
|
+ return -EINVAL;
|
|
+
|
|
+ spin_lock_init(&csg_iface->lock);
|
|
+ csg_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
|
|
+ csg_iface->input = iface_fw_to_cpu_addr(ptdev, csg_iface->control->input_va);
|
|
+ csg_iface->output = iface_fw_to_cpu_addr(ptdev, csg_iface->control->output_va);
|
|
+
|
|
+ if (csg_iface->control->stream_num < MIN_CS_PER_CSG ||
|
|
+ csg_iface->control->stream_num > MAX_CS_PER_CSG)
|
|
+ return -EINVAL;
|
|
+
|
|
+ if (!csg_iface->input || !csg_iface->output) {
|
|
+ drm_err(&ptdev->base, "Invalid group control interface input/output VA");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ if (csg_idx > 0) {
|
|
+ struct panthor_fw_csg_iface *first_csg_iface =
|
|
+ panthor_fw_get_csg_iface(ptdev, 0);
|
|
+
|
|
+ if (!compare_csg(first_csg_iface->control, csg_iface->control)) {
|
|
+ drm_err(&ptdev->base, "Expecting identical CSG slots");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < csg_iface->control->stream_num; i++) {
|
|
+ int ret = panthor_init_cs_iface(ptdev, csg_idx, i);
|
|
+
|
|
+ if (ret)
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static u32 panthor_get_instr_features(struct panthor_device *ptdev)
|
|
+{
|
|
+ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
|
|
+
|
|
+ if (glb_iface->control->version < CSF_IFACE_VERSION(1, 1, 0))
|
|
+ return 0;
|
|
+
|
|
+ return glb_iface->control->instr_features;
|
|
+}
|
|
+
|
|
+static int panthor_fw_init_ifaces(struct panthor_device *ptdev)
|
|
+{
|
|
+ struct panthor_fw_global_iface *glb_iface = &ptdev->fw->iface.global;
|
|
+ unsigned int i;
|
|
+
|
|
+ if (!ptdev->fw->shared_section->mem->kmap)
|
|
+ return -EINVAL;
|
|
+
|
|
+ spin_lock_init(&glb_iface->lock);
|
|
+ glb_iface->control = ptdev->fw->shared_section->mem->kmap;
|
|
+
|
|
+ if (!glb_iface->control->version) {
|
|
+ drm_err(&ptdev->base, "Firmware version is 0. Firmware may have failed to boot");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ glb_iface->input = iface_fw_to_cpu_addr(ptdev, glb_iface->control->input_va);
|
|
+ glb_iface->output = iface_fw_to_cpu_addr(ptdev, glb_iface->control->output_va);
|
|
+ if (!glb_iface->input || !glb_iface->output) {
|
|
+ drm_err(&ptdev->base, "Invalid global control interface input/output VA");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ if (glb_iface->control->group_num > MAX_CSGS ||
|
|
+ glb_iface->control->group_num < MIN_CSGS) {
|
|
+ drm_err(&ptdev->base, "Invalid number of control groups");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < glb_iface->control->group_num; i++) {
|
|
+ int ret = panthor_init_csg_iface(ptdev, i);
|
|
+
|
|
+ if (ret)
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ drm_info(&ptdev->base, "CSF FW v%d.%d.%d, Features %#x Instrumentation features %#x",
|
|
+ CSF_IFACE_VERSION_MAJOR(glb_iface->control->version),
|
|
+ CSF_IFACE_VERSION_MINOR(glb_iface->control->version),
|
|
+ CSF_IFACE_VERSION_PATCH(glb_iface->control->version),
|
|
+ glb_iface->control->features,
|
|
+ panthor_get_instr_features(ptdev));
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void panthor_fw_init_global_iface(struct panthor_device *ptdev)
|
|
+{
|
|
+ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
|
|
+
|
|
+ /* Enable all cores. */
|
|
+ glb_iface->input->core_en_mask = ptdev->gpu_info.shader_present;
|
|
+
|
|
+ /* Setup timers. */
|
|
+ glb_iface->input->poweroff_timer = panthor_fw_conv_timeout(ptdev, PWROFF_HYSTERESIS_US);
|
|
+ glb_iface->input->progress_timer = PROGRESS_TIMEOUT_CYCLES >> PROGRESS_TIMEOUT_SCALE_SHIFT;
|
|
+ glb_iface->input->idle_timer = panthor_fw_conv_timeout(ptdev, IDLE_HYSTERESIS_US);
|
|
+
|
|
+ /* Enable interrupts we care about. */
|
|
+ glb_iface->input->ack_irq_mask = GLB_CFG_ALLOC_EN |
|
|
+ GLB_PING |
|
|
+ GLB_CFG_PROGRESS_TIMER |
|
|
+ GLB_CFG_POWEROFF_TIMER |
|
|
+ GLB_IDLE_EN |
|
|
+ GLB_IDLE;
|
|
+
|
|
+ panthor_fw_update_reqs(glb_iface, req, GLB_IDLE_EN, GLB_IDLE_EN);
|
|
+ panthor_fw_toggle_reqs(glb_iface, req, ack,
|
|
+ GLB_CFG_ALLOC_EN |
|
|
+ GLB_CFG_POWEROFF_TIMER |
|
|
+ GLB_CFG_PROGRESS_TIMER);
|
|
+
|
|
+ gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
|
|
+
|
|
+ /* Kick the watchdog. */
|
|
+ mod_delayed_work(ptdev->reset.wq, &ptdev->fw->watchdog.ping_work,
|
|
+ msecs_to_jiffies(PING_INTERVAL_MS));
|
|
+}
|
|
+
|
|
+static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status)
|
|
+{
|
|
+ if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF))
|
|
+ ptdev->fw->booted = true;
|
|
+
|
|
+ wake_up_all(&ptdev->fw->req_waitqueue);
|
|
+
|
|
+ /* If the FW is not booted, don't process IRQs, just flag the FW as booted. */
|
|
+ if (!ptdev->fw->booted)
|
|
+ return;
|
|
+
|
|
+ panthor_sched_report_fw_events(ptdev, status);
|
|
+}
|
|
+PANTHOR_IRQ_HANDLER(job, JOB, panthor_job_irq_handler);
|
|
+
|
|
+static int panthor_fw_start(struct panthor_device *ptdev)
|
|
+{
|
|
+ bool timedout = false;
|
|
+
|
|
+ ptdev->fw->booted = false;
|
|
+ panthor_job_irq_resume(&ptdev->fw->irq, ~0);
|
|
+ gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_AUTO);
|
|
+
|
|
+ if (!wait_event_timeout(ptdev->fw->req_waitqueue,
|
|
+ ptdev->fw->booted,
|
|
+ msecs_to_jiffies(1000))) {
|
|
+ if (!ptdev->fw->booted &&
|
|
+ !(gpu_read(ptdev, JOB_INT_STAT) & JOB_INT_GLOBAL_IF))
|
|
+ timedout = true;
|
|
+ }
|
|
+
|
|
+ if (timedout) {
|
|
+ static const char * const status_str[] = {
|
|
+ [MCU_STATUS_DISABLED] = "disabled",
|
|
+ [MCU_STATUS_ENABLED] = "enabled",
|
|
+ [MCU_STATUS_HALT] = "halt",
|
|
+ [MCU_STATUS_FATAL] = "fatal",
|
|
+ };
|
|
+ u32 status = gpu_read(ptdev, MCU_STATUS);
|
|
+
|
|
+ drm_err(&ptdev->base, "Failed to boot MCU (status=%s)",
|
|
+ status < ARRAY_SIZE(status_str) ? status_str[status] : "unknown");
|
|
+ return -ETIMEDOUT;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void panthor_fw_stop(struct panthor_device *ptdev)
|
|
+{
|
|
+ u32 status;
|
|
+
|
|
+ gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE);
|
|
+ if (readl_poll_timeout(ptdev->iomem + MCU_STATUS, status,
|
|
+ status == MCU_STATUS_DISABLED, 10, 100000))
|
|
+ drm_err(&ptdev->base, "Failed to stop MCU");
|
|
+}
|
|
+
|
|
+/**
|
|
+ * panthor_fw_pre_reset() - Call before a reset.
|
|
+ * @ptdev: Device.
|
|
+ * @on_hang: true if the reset was triggered on a GPU hang.
|
|
+ *
|
|
+ * If the reset is not triggered on a hang, we try to gracefully halt the
|
|
+ * MCU, so we can do a fast-reset when panthor_fw_post_reset() is called.
|
|
+ */
|
|
+void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang)
|
|
+{
|
|
+ /* Make sure we won't be woken up by a ping. */
|
|
+ cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
|
|
+
|
|
+ ptdev->fw->fast_reset = false;
|
|
+
|
|
+ if (!on_hang) {
|
|
+ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
|
|
+ u32 status;
|
|
+
|
|
+ panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT);
|
|
+ gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
|
|
+ if (!readl_poll_timeout(ptdev->iomem + MCU_STATUS, status,
|
|
+ status == MCU_STATUS_HALT, 10, 100000) &&
|
|
+ glb_iface->output->halt_status == PANTHOR_FW_HALT_OK) {
|
|
+ ptdev->fw->fast_reset = true;
|
|
+ } else {
|
|
+ drm_warn(&ptdev->base, "Failed to cleanly suspend MCU");
|
|
+ }
|
|
+
|
|
+ /* The FW detects 0 -> 1 transitions. Make sure we reset
|
|
+ * the HALT bit before the FW is rebooted.
|
|
+ */
|
|
+ panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT);
|
|
+ }
|
|
+
|
|
+ panthor_job_irq_suspend(&ptdev->fw->irq);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * panthor_fw_post_reset() - Call after a reset.
|
|
+ * @ptdev: Device.
|
|
+ *
|
|
+ * Start the FW. If this is not a fast reset, all FW sections are reloaded to
|
|
+ * make sure we can recover from a memory corruption.
|
|
+ */
|
|
+int panthor_fw_post_reset(struct panthor_device *ptdev)
|
|
+{
|
|
+ int ret;
|
|
+
|
|
+ /* Make the MCU VM active. */
|
|
+ ret = panthor_vm_active(ptdev->fw->vm);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ /* If this is a fast reset, try to start the MCU without reloading
|
|
+ * the FW sections. If it fails, go for a full reset.
|
|
+ */
|
|
+ if (ptdev->fw->fast_reset) {
|
|
+ ret = panthor_fw_start(ptdev);
|
|
+ if (!ret)
|
|
+ goto out;
|
|
+
|
|
+ /* Force a disable, so we get a fresh boot on the next
|
|
+ * panthor_fw_start() call.
|
|
+ */
|
|
+ gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE);
|
|
+ drm_err(&ptdev->base, "FW fast reset failed, trying a slow reset");
|
|
+ }
|
|
+
|
|
+ /* Reload all sections, including RO ones. We're not supposed
|
|
+ * to end up here anyway, let's just assume the overhead of
|
|
+ * reloading everything is acceptable.
|
|
+ */
|
|
+ panthor_reload_fw_sections(ptdev, true);
|
|
+
|
|
+ ret = panthor_fw_start(ptdev);
|
|
+ if (ret) {
|
|
+ drm_err(&ptdev->base, "FW slow reset failed");
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+out:
|
|
+ /* We must re-initialize the global interface even on fast-reset. */
|
|
+ panthor_fw_init_global_iface(ptdev);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * panthor_fw_unplug() - Called when the device is unplugged.
|
|
+ * @ptdev: Device.
|
|
+ *
|
|
+ * This function must make sure all pending operations are flushed before
|
|
+ * will release device resources, thus preventing any interaction with
|
|
+ * the HW.
|
|
+ *
|
|
+ * If there is still FW-related work running after this function returns,
|
|
+ * they must use drm_dev_{enter,exit}() and skip any HW access when
|
|
+ * drm_dev_enter() returns false.
|
|
+ */
|
|
+void panthor_fw_unplug(struct panthor_device *ptdev)
|
|
+{
|
|
+ struct panthor_fw_section *section;
|
|
+
|
|
+ cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
|
|
+
|
|
+ /* Make sure the IRQ handler can be called after that point. */
|
|
+ if (ptdev->fw->irq.irq)
|
|
+ panthor_job_irq_suspend(&ptdev->fw->irq);
|
|
+
|
|
+ panthor_fw_stop(ptdev);
|
|
+
|
|
+ list_for_each_entry(section, &ptdev->fw->sections, node)
|
|
+ panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), section->mem);
|
|
+
|
|
+ /* We intentionally don't call panthor_vm_idle() and let
|
|
+ * panthor_mmu_unplug() release the AS we acquired with
|
|
+ * panthor_vm_active() so we don't have to track the VM active/idle
|
|
+ * state to keep the active_refcnt balanced.
|
|
+ */
|
|
+ panthor_vm_put(ptdev->fw->vm);
|
|
+
|
|
+ panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * panthor_fw_wait_acks() - Wait for requests to be acknowledged by the FW.
|
|
+ * @req_ptr: Pointer to the req register.
|
|
+ * @ack_ptr: Pointer to the ack register.
|
|
+ * @wq: Wait queue to use for the sleeping wait.
|
|
+ * @req_mask: Mask of requests to wait for.
|
|
+ * @acked: Pointer to field that's updated with the acked requests.
|
|
+ * If the function returns 0, *acked == req_mask.
|
|
+ * @timeout_ms: Timeout expressed in milliseconds.
|
|
+ *
|
|
+ * Return: 0 on success, -ETIMEDOUT otherwise.
|
|
+ */
|
|
+static int panthor_fw_wait_acks(const u32 *req_ptr, const u32 *ack_ptr,
|
|
+ wait_queue_head_t *wq,
|
|
+ u32 req_mask, u32 *acked,
|
|
+ u32 timeout_ms)
|
|
+{
|
|
+ u32 ack, req = READ_ONCE(*req_ptr) & req_mask;
|
|
+ int ret;
|
|
+
|
|
+ /* Busy wait for a few µsecs before falling back to a sleeping wait. */
|
|
+ *acked = req_mask;
|
|
+ ret = read_poll_timeout_atomic(READ_ONCE, ack,
|
|
+ (ack & req_mask) == req,
|
|
+ 0, 10, 0,
|
|
+ *ack_ptr);
|
|
+ if (!ret)
|
|
+ return 0;
|
|
+
|
|
+ if (wait_event_timeout(*wq, (READ_ONCE(*ack_ptr) & req_mask) == req,
|
|
+ msecs_to_jiffies(timeout_ms)))
|
|
+ return 0;
|
|
+
|
|
+ /* Check one last time, in case we were not woken up for some reason. */
|
|
+ ack = READ_ONCE(*ack_ptr);
|
|
+ if ((ack & req_mask) == req)
|
|
+ return 0;
|
|
+
|
|
+ *acked = ~(req ^ ack) & req_mask;
|
|
+ return -ETIMEDOUT;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * panthor_fw_glb_wait_acks() - Wait for global requests to be acknowledged.
|
|
+ * @ptdev: Device.
|
|
+ * @req_mask: Mask of requests to wait for.
|
|
+ * @acked: Pointer to field that's updated with the acked requests.
|
|
+ * If the function returns 0, *acked == req_mask.
|
|
+ * @timeout_ms: Timeout expressed in milliseconds.
|
|
+ *
|
|
+ * Return: 0 on success, -ETIMEDOUT otherwise.
|
|
+ */
|
|
+int panthor_fw_glb_wait_acks(struct panthor_device *ptdev,
|
|
+ u32 req_mask, u32 *acked,
|
|
+ u32 timeout_ms)
|
|
+{
|
|
+ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
|
|
+
|
|
+ /* GLB_HALT doesn't get acked through the FW interface. */
|
|
+ if (drm_WARN_ON(&ptdev->base, req_mask & (~GLB_REQ_MASK | GLB_HALT)))
|
|
+ return -EINVAL;
|
|
+
|
|
+ return panthor_fw_wait_acks(&glb_iface->input->req,
|
|
+ &glb_iface->output->ack,
|
|
+ &ptdev->fw->req_waitqueue,
|
|
+ req_mask, acked, timeout_ms);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * panthor_fw_csg_wait_acks() - Wait for command stream group requests to be acknowledged.
|
|
+ * @ptdev: Device.
|
|
+ * @csg_slot: CSG slot ID.
|
|
+ * @req_mask: Mask of requests to wait for.
|
|
+ * @acked: Pointer to field that's updated with the acked requests.
|
|
+ * If the function returns 0, *acked == req_mask.
|
|
+ * @timeout_ms: Timeout expressed in milliseconds.
|
|
+ *
|
|
+ * Return: 0 on success, -ETIMEDOUT otherwise.
|
|
+ */
|
|
+int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_slot,
|
|
+ u32 req_mask, u32 *acked, u32 timeout_ms)
|
|
+{
|
|
+ struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot);
|
|
+ int ret;
|
|
+
|
|
+ if (drm_WARN_ON(&ptdev->base, req_mask & ~CSG_REQ_MASK))
|
|
+ return -EINVAL;
|
|
+
|
|
+ ret = panthor_fw_wait_acks(&csg_iface->input->req,
|
|
+ &csg_iface->output->ack,
|
|
+ &ptdev->fw->req_waitqueue,
|
|
+ req_mask, acked, timeout_ms);
|
|
+
|
|
+ /*
|
|
+ * Check that all bits in the state field were updated, if any mismatch
|
|
+ * then clear all bits in the state field. This allows code to do
|
|
+ * (acked & CSG_STATE_MASK) and get the right value.
|
|
+ */
|
|
+
|
|
+ if ((*acked & CSG_STATE_MASK) != CSG_STATE_MASK)
|
|
+ *acked &= ~CSG_STATE_MASK;
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * panthor_fw_ring_csg_doorbells() - Ring command stream group doorbells.
|
|
+ * @ptdev: Device.
|
|
+ * @csg_mask: Bitmask encoding the command stream group doorbells to ring.
|
|
+ *
|
|
+ * This function is toggling bits in the doorbell_req and ringing the
|
|
+ * global doorbell. It doesn't require a user doorbell to be attached to
|
|
+ * the group.
|
|
+ */
|
|
+void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_mask)
|
|
+{
|
|
+ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
|
|
+
|
|
+ panthor_fw_toggle_reqs(glb_iface, doorbell_req, doorbell_ack, csg_mask);
|
|
+ gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
|
|
+}
|
|
+
|
|
+static void panthor_fw_ping_work(struct work_struct *work)
|
|
+{
|
|
+ struct panthor_fw *fw = container_of(work, struct panthor_fw, watchdog.ping_work.work);
|
|
+ struct panthor_device *ptdev = fw->irq.ptdev;
|
|
+ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
|
|
+ u32 acked;
|
|
+ int ret;
|
|
+
|
|
+ if (panthor_device_reset_is_pending(ptdev))
|
|
+ return;
|
|
+
|
|
+ panthor_fw_toggle_reqs(glb_iface, req, ack, GLB_PING);
|
|
+ gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
|
|
+
|
|
+ ret = panthor_fw_glb_wait_acks(ptdev, GLB_PING, &acked, 100);
|
|
+ if (ret) {
|
|
+ panthor_device_schedule_reset(ptdev);
|
|
+ drm_err(&ptdev->base, "FW ping timeout, scheduling a reset");
|
|
+ } else {
|
|
+ mod_delayed_work(ptdev->reset.wq, &fw->watchdog.ping_work,
|
|
+ msecs_to_jiffies(PING_INTERVAL_MS));
|
|
+ }
|
|
+}
|
|
+
|
|
+/**
|
|
+ * panthor_fw_init() - Initialize FW related data.
|
|
+ * @ptdev: Device.
|
|
+ *
|
|
+ * Return: 0 on success, a negative error code otherwise.
|
|
+ */
|
|
+int panthor_fw_init(struct panthor_device *ptdev)
|
|
+{
|
|
+ struct panthor_fw *fw;
|
|
+ int ret, irq;
|
|
+
|
|
+ fw = drmm_kzalloc(&ptdev->base, sizeof(*fw), GFP_KERNEL);
|
|
+ if (!fw)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ ptdev->fw = fw;
|
|
+ init_waitqueue_head(&fw->req_waitqueue);
|
|
+ INIT_LIST_HEAD(&fw->sections);
|
|
+ INIT_DELAYED_WORK(&fw->watchdog.ping_work, panthor_fw_ping_work);
|
|
+
|
|
+ irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "job");
|
|
+ if (irq <= 0)
|
|
+ return -ENODEV;
|
|
+
|
|
+ ret = panthor_request_job_irq(ptdev, &fw->irq, irq, 0);
|
|
+ if (ret) {
|
|
+ drm_err(&ptdev->base, "failed to request job irq");
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ ret = panthor_gpu_l2_power_on(ptdev);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ fw->vm = panthor_vm_create(ptdev, true,
|
|
+ 0, SZ_4G,
|
|
+ CSF_MCU_SHARED_REGION_START,
|
|
+ CSF_MCU_SHARED_REGION_SIZE);
|
|
+ if (IS_ERR(fw->vm)) {
|
|
+ ret = PTR_ERR(fw->vm);
|
|
+ fw->vm = NULL;
|
|
+ goto err_unplug_fw;
|
|
+ }
|
|
+
|
|
+ ret = panthor_fw_load(ptdev);
|
|
+ if (ret)
|
|
+ goto err_unplug_fw;
|
|
+
|
|
+ ret = panthor_vm_active(fw->vm);
|
|
+ if (ret)
|
|
+ goto err_unplug_fw;
|
|
+
|
|
+ ret = panthor_fw_start(ptdev);
|
|
+ if (ret)
|
|
+ goto err_unplug_fw;
|
|
+
|
|
+ ret = panthor_fw_init_ifaces(ptdev);
|
|
+ if (ret)
|
|
+ goto err_unplug_fw;
|
|
+
|
|
+ panthor_fw_init_global_iface(ptdev);
|
|
+ return 0;
|
|
+
|
|
+err_unplug_fw:
|
|
+ panthor_fw_unplug(ptdev);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+MODULE_FIRMWARE("arm/mali/arch10.8/mali_csffw.bin");
|
|
--- /dev/null
|
|
+++ b/drivers/gpu/drm/panthor/panthor_fw.h
|
|
@@ -0,0 +1,503 @@
|
|
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
|
|
+/* Copyright 2023 Collabora ltd. */
|
|
+
|
|
+#ifndef __PANTHOR_MCU_H__
|
|
+#define __PANTHOR_MCU_H__
|
|
+
|
|
+#include <linux/types.h>
|
|
+
|
|
+struct panthor_device;
|
|
+struct panthor_kernel_bo;
|
|
+
|
|
+#define MAX_CSGS 31
|
|
+#define MAX_CS_PER_CSG 32
|
|
+
|
|
+struct panthor_fw_ringbuf_input_iface {
|
|
+ u64 insert;
|
|
+ u64 extract;
|
|
+};
|
|
+
|
|
+struct panthor_fw_ringbuf_output_iface {
|
|
+ u64 extract;
|
|
+ u32 active;
|
|
+};
|
|
+
|
|
+struct panthor_fw_cs_control_iface {
|
|
+#define CS_FEATURES_WORK_REGS(x) (((x) & GENMASK(7, 0)) + 1)
|
|
+#define CS_FEATURES_SCOREBOARDS(x) (((x) & GENMASK(15, 8)) >> 8)
|
|
+#define CS_FEATURES_COMPUTE BIT(16)
|
|
+#define CS_FEATURES_FRAGMENT BIT(17)
|
|
+#define CS_FEATURES_TILER BIT(18)
|
|
+ u32 features;
|
|
+ u32 input_va;
|
|
+ u32 output_va;
|
|
+};
|
|
+
|
|
+struct panthor_fw_cs_input_iface {
|
|
+#define CS_STATE_MASK GENMASK(2, 0)
|
|
+#define CS_STATE_STOP 0
|
|
+#define CS_STATE_START 1
|
|
+#define CS_EXTRACT_EVENT BIT(4)
|
|
+#define CS_IDLE_SYNC_WAIT BIT(8)
|
|
+#define CS_IDLE_PROTM_PENDING BIT(9)
|
|
+#define CS_IDLE_EMPTY BIT(10)
|
|
+#define CS_IDLE_RESOURCE_REQ BIT(11)
|
|
+#define CS_TILER_OOM BIT(26)
|
|
+#define CS_PROTM_PENDING BIT(27)
|
|
+#define CS_FATAL BIT(30)
|
|
+#define CS_FAULT BIT(31)
|
|
+#define CS_REQ_MASK (CS_STATE_MASK | \
|
|
+ CS_EXTRACT_EVENT | \
|
|
+ CS_IDLE_SYNC_WAIT | \
|
|
+ CS_IDLE_PROTM_PENDING | \
|
|
+ CS_IDLE_EMPTY | \
|
|
+ CS_IDLE_RESOURCE_REQ)
|
|
+#define CS_EVT_MASK (CS_TILER_OOM | \
|
|
+ CS_PROTM_PENDING | \
|
|
+ CS_FATAL | \
|
|
+ CS_FAULT)
|
|
+ u32 req;
|
|
+
|
|
+#define CS_CONFIG_PRIORITY(x) ((x) & GENMASK(3, 0))
|
|
+#define CS_CONFIG_DOORBELL(x) (((x) << 8) & GENMASK(15, 8))
|
|
+ u32 config;
|
|
+ u32 reserved1;
|
|
+ u32 ack_irq_mask;
|
|
+ u64 ringbuf_base;
|
|
+ u32 ringbuf_size;
|
|
+ u32 reserved2;
|
|
+ u64 heap_start;
|
|
+ u64 heap_end;
|
|
+ u64 ringbuf_input;
|
|
+ u64 ringbuf_output;
|
|
+ u32 instr_config;
|
|
+ u32 instrbuf_size;
|
|
+ u64 instrbuf_base;
|
|
+ u64 instrbuf_offset_ptr;
|
|
+};
|
|
+
|
|
+struct panthor_fw_cs_output_iface {
|
|
+ u32 ack;
|
|
+ u32 reserved1[15];
|
|
+ u64 status_cmd_ptr;
|
|
+
|
|
+#define CS_STATUS_WAIT_SB_MASK GENMASK(15, 0)
|
|
+#define CS_STATUS_WAIT_SB_SRC_MASK GENMASK(19, 16)
|
|
+#define CS_STATUS_WAIT_SB_SRC_NONE (0 << 16)
|
|
+#define CS_STATUS_WAIT_SB_SRC_WAIT (8 << 16)
|
|
+#define CS_STATUS_WAIT_SYNC_COND_LE (0 << 24)
|
|
+#define CS_STATUS_WAIT_SYNC_COND_GT (1 << 24)
|
|
+#define CS_STATUS_WAIT_SYNC_COND_MASK GENMASK(27, 24)
|
|
+#define CS_STATUS_WAIT_PROGRESS BIT(28)
|
|
+#define CS_STATUS_WAIT_PROTM BIT(29)
|
|
+#define CS_STATUS_WAIT_SYNC_64B BIT(30)
|
|
+#define CS_STATUS_WAIT_SYNC BIT(31)
|
|
+ u32 status_wait;
|
|
+ u32 status_req_resource;
|
|
+ u64 status_wait_sync_ptr;
|
|
+ u32 status_wait_sync_value;
|
|
+ u32 status_scoreboards;
|
|
+
|
|
+#define CS_STATUS_BLOCKED_REASON_UNBLOCKED 0
|
|
+#define CS_STATUS_BLOCKED_REASON_SB_WAIT 1
|
|
+#define CS_STATUS_BLOCKED_REASON_PROGRESS_WAIT 2
|
|
+#define CS_STATUS_BLOCKED_REASON_SYNC_WAIT 3
|
|
+#define CS_STATUS_BLOCKED_REASON_DEFERRED 5
|
|
+#define CS_STATUS_BLOCKED_REASON_RES 6
|
|
+#define CS_STATUS_BLOCKED_REASON_FLUSH 7
|
|
+#define CS_STATUS_BLOCKED_REASON_MASK GENMASK(3, 0)
|
|
+ u32 status_blocked_reason;
|
|
+ u32 status_wait_sync_value_hi;
|
|
+ u32 reserved2[6];
|
|
+
|
|
+#define CS_EXCEPTION_TYPE(x) ((x) & GENMASK(7, 0))
|
|
+#define CS_EXCEPTION_DATA(x) (((x) >> 8) & GENMASK(23, 0))
|
|
+ u32 fault;
|
|
+ u32 fatal;
|
|
+ u64 fault_info;
|
|
+ u64 fatal_info;
|
|
+ u32 reserved3[10];
|
|
+ u32 heap_vt_start;
|
|
+ u32 heap_vt_end;
|
|
+ u32 reserved4;
|
|
+ u32 heap_frag_end;
|
|
+ u64 heap_address;
|
|
+};
|
|
+
|
|
+struct panthor_fw_csg_control_iface {
|
|
+ u32 features;
|
|
+ u32 input_va;
|
|
+ u32 output_va;
|
|
+ u32 suspend_size;
|
|
+ u32 protm_suspend_size;
|
|
+ u32 stream_num;
|
|
+ u32 stream_stride;
|
|
+};
|
|
+
|
|
+struct panthor_fw_csg_input_iface {
|
|
+#define CSG_STATE_MASK GENMASK(2, 0)
|
|
+#define CSG_STATE_TERMINATE 0
|
|
+#define CSG_STATE_START 1
|
|
+#define CSG_STATE_SUSPEND 2
|
|
+#define CSG_STATE_RESUME 3
|
|
+#define CSG_ENDPOINT_CONFIG BIT(4)
|
|
+#define CSG_STATUS_UPDATE BIT(5)
|
|
+#define CSG_SYNC_UPDATE BIT(28)
|
|
+#define CSG_IDLE BIT(29)
|
|
+#define CSG_DOORBELL BIT(30)
|
|
+#define CSG_PROGRESS_TIMER_EVENT BIT(31)
|
|
+#define CSG_REQ_MASK (CSG_STATE_MASK | \
|
|
+ CSG_ENDPOINT_CONFIG | \
|
|
+ CSG_STATUS_UPDATE)
|
|
+#define CSG_EVT_MASK (CSG_SYNC_UPDATE | \
|
|
+ CSG_IDLE | \
|
|
+ CSG_PROGRESS_TIMER_EVENT)
|
|
+ u32 req;
|
|
+ u32 ack_irq_mask;
|
|
+
|
|
+ u32 doorbell_req;
|
|
+ u32 cs_irq_ack;
|
|
+ u32 reserved1[4];
|
|
+ u64 allow_compute;
|
|
+ u64 allow_fragment;
|
|
+ u32 allow_other;
|
|
+
|
|
+#define CSG_EP_REQ_COMPUTE(x) ((x) & GENMASK(7, 0))
|
|
+#define CSG_EP_REQ_FRAGMENT(x) (((x) << 8) & GENMASK(15, 8))
|
|
+#define CSG_EP_REQ_TILER(x) (((x) << 16) & GENMASK(19, 16))
|
|
+#define CSG_EP_REQ_EXCL_COMPUTE BIT(20)
|
|
+#define CSG_EP_REQ_EXCL_FRAGMENT BIT(21)
|
|
+#define CSG_EP_REQ_PRIORITY(x) (((x) << 28) & GENMASK(31, 28))
|
|
+#define CSG_EP_REQ_PRIORITY_MASK GENMASK(31, 28)
|
|
+ u32 endpoint_req;
|
|
+ u32 reserved2[2];
|
|
+ u64 suspend_buf;
|
|
+ u64 protm_suspend_buf;
|
|
+ u32 config;
|
|
+ u32 iter_trace_config;
|
|
+};
|
|
+
|
|
+struct panthor_fw_csg_output_iface {
|
|
+ u32 ack;
|
|
+ u32 reserved1;
|
|
+ u32 doorbell_ack;
|
|
+ u32 cs_irq_req;
|
|
+ u32 status_endpoint_current;
|
|
+ u32 status_endpoint_req;
|
|
+
|
|
+#define CSG_STATUS_STATE_IS_IDLE BIT(0)
|
|
+ u32 status_state;
|
|
+ u32 resource_dep;
|
|
+};
|
|
+
|
|
+struct panthor_fw_global_control_iface {
|
|
+ u32 version;
|
|
+ u32 features;
|
|
+ u32 input_va;
|
|
+ u32 output_va;
|
|
+ u32 group_num;
|
|
+ u32 group_stride;
|
|
+ u32 perfcnt_size;
|
|
+ u32 instr_features;
|
|
+};
|
|
+
|
|
+struct panthor_fw_global_input_iface {
|
|
+#define GLB_HALT BIT(0)
|
|
+#define GLB_CFG_PROGRESS_TIMER BIT(1)
|
|
+#define GLB_CFG_ALLOC_EN BIT(2)
|
|
+#define GLB_CFG_POWEROFF_TIMER BIT(3)
|
|
+#define GLB_PROTM_ENTER BIT(4)
|
|
+#define GLB_PERFCNT_EN BIT(5)
|
|
+#define GLB_PERFCNT_SAMPLE BIT(6)
|
|
+#define GLB_COUNTER_EN BIT(7)
|
|
+#define GLB_PING BIT(8)
|
|
+#define GLB_FWCFG_UPDATE BIT(9)
|
|
+#define GLB_IDLE_EN BIT(10)
|
|
+#define GLB_SLEEP BIT(12)
|
|
+#define GLB_INACTIVE_COMPUTE BIT(20)
|
|
+#define GLB_INACTIVE_FRAGMENT BIT(21)
|
|
+#define GLB_INACTIVE_TILER BIT(22)
|
|
+#define GLB_PROTM_EXIT BIT(23)
|
|
+#define GLB_PERFCNT_THRESHOLD BIT(24)
|
|
+#define GLB_PERFCNT_OVERFLOW BIT(25)
|
|
+#define GLB_IDLE BIT(26)
|
|
+#define GLB_DBG_CSF BIT(30)
|
|
+#define GLB_DBG_HOST BIT(31)
|
|
+#define GLB_REQ_MASK GENMASK(10, 0)
|
|
+#define GLB_EVT_MASK GENMASK(26, 20)
|
|
+ u32 req;
|
|
+ u32 ack_irq_mask;
|
|
+ u32 doorbell_req;
|
|
+ u32 reserved1;
|
|
+ u32 progress_timer;
|
|
+
|
|
+#define GLB_TIMER_VAL(x) ((x) & GENMASK(30, 0))
|
|
+#define GLB_TIMER_SOURCE_GPU_COUNTER BIT(31)
|
|
+ u32 poweroff_timer;
|
|
+ u64 core_en_mask;
|
|
+ u32 reserved2;
|
|
+ u32 perfcnt_as;
|
|
+ u64 perfcnt_base;
|
|
+ u32 perfcnt_extract;
|
|
+ u32 reserved3[3];
|
|
+ u32 perfcnt_config;
|
|
+ u32 perfcnt_csg_select;
|
|
+ u32 perfcnt_fw_enable;
|
|
+ u32 perfcnt_csg_enable;
|
|
+ u32 perfcnt_csf_enable;
|
|
+ u32 perfcnt_shader_enable;
|
|
+ u32 perfcnt_tiler_enable;
|
|
+ u32 perfcnt_mmu_l2_enable;
|
|
+ u32 reserved4[8];
|
|
+ u32 idle_timer;
|
|
+};
|
|
+
|
|
+enum panthor_fw_halt_status {
|
|
+ PANTHOR_FW_HALT_OK = 0,
|
|
+ PANTHOR_FW_HALT_ON_PANIC = 0x4e,
|
|
+ PANTHOR_FW_HALT_ON_WATCHDOG_EXPIRATION = 0x4f,
|
|
+};
|
|
+
|
|
+struct panthor_fw_global_output_iface {
|
|
+ u32 ack;
|
|
+ u32 reserved1;
|
|
+ u32 doorbell_ack;
|
|
+ u32 reserved2;
|
|
+ u32 halt_status;
|
|
+ u32 perfcnt_status;
|
|
+ u32 perfcnt_insert;
|
|
+};
|
|
+
|
|
+/**
|
|
+ * struct panthor_fw_cs_iface - Firmware command stream slot interface
|
|
+ */
|
|
+struct panthor_fw_cs_iface {
|
|
+ /**
|
|
+ * @lock: Lock protecting access to the panthor_fw_cs_input_iface::req
|
|
+ * field.
|
|
+ *
|
|
+ * Needed so we can update the req field concurrently from the interrupt
|
|
+ * handler and the scheduler logic.
|
|
+ *
|
|
+ * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW
|
|
+ * interface sections are mapped uncached/write-combined right now, and
|
|
+ * using cmpxchg() on such mappings leads to SError faults. Revisit when
|
|
+ * we have 'SHARED' GPU mappings hooked up.
|
|
+ */
|
|
+ spinlock_t lock;
|
|
+
|
|
+ /**
|
|
+ * @control: Command stream slot control interface.
|
|
+ *
|
|
+ * Used to expose command stream slot properties.
|
|
+ *
|
|
+ * This interface is read-only.
|
|
+ */
|
|
+ struct panthor_fw_cs_control_iface *control;
|
|
+
|
|
+ /**
|
|
+ * @input: Command stream slot input interface.
|
|
+ *
|
|
+ * Used for host updates/events.
|
|
+ */
|
|
+ struct panthor_fw_cs_input_iface *input;
|
|
+
|
|
+ /**
|
|
+ * @output: Command stream slot output interface.
|
|
+ *
|
|
+ * Used for FW updates/events.
|
|
+ *
|
|
+ * This interface is read-only.
|
|
+ */
|
|
+ const struct panthor_fw_cs_output_iface *output;
|
|
+};
|
|
+
|
|
+/**
|
|
+ * struct panthor_fw_csg_iface - Firmware command stream group slot interface
|
|
+ */
|
|
+struct panthor_fw_csg_iface {
|
|
+ /**
|
|
+ * @lock: Lock protecting access to the panthor_fw_csg_input_iface::req
|
|
+ * field.
|
|
+ *
|
|
+ * Needed so we can update the req field concurrently from the interrupt
|
|
+ * handler and the scheduler logic.
|
|
+ *
|
|
+ * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW
|
|
+ * interface sections are mapped uncached/write-combined right now, and
|
|
+ * using cmpxchg() on such mappings leads to SError faults. Revisit when
|
|
+ * we have 'SHARED' GPU mappings hooked up.
|
|
+ */
|
|
+ spinlock_t lock;
|
|
+
|
|
+ /**
|
|
+ * @control: Command stream group slot control interface.
|
|
+ *
|
|
+ * Used to expose command stream group slot properties.
|
|
+ *
|
|
+ * This interface is read-only.
|
|
+ */
|
|
+ const struct panthor_fw_csg_control_iface *control;
|
|
+
|
|
+ /**
|
|
+ * @input: Command stream slot input interface.
|
|
+ *
|
|
+ * Used for host updates/events.
|
|
+ */
|
|
+ struct panthor_fw_csg_input_iface *input;
|
|
+
|
|
+ /**
|
|
+ * @output: Command stream group slot output interface.
|
|
+ *
|
|
+ * Used for FW updates/events.
|
|
+ *
|
|
+ * This interface is read-only.
|
|
+ */
|
|
+ const struct panthor_fw_csg_output_iface *output;
|
|
+};
|
|
+
|
|
+/**
|
|
+ * struct panthor_fw_global_iface - Firmware global interface
|
|
+ */
|
|
+struct panthor_fw_global_iface {
|
|
+ /**
|
|
+ * @lock: Lock protecting access to the panthor_fw_global_input_iface::req
|
|
+ * field.
|
|
+ *
|
|
+ * Needed so we can update the req field concurrently from the interrupt
|
|
+ * handler and the scheduler/FW management logic.
|
|
+ *
|
|
+ * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW
|
|
+ * interface sections are mapped uncached/write-combined right now, and
|
|
+ * using cmpxchg() on such mappings leads to SError faults. Revisit when
|
|
+ * we have 'SHARED' GPU mappings hooked up.
|
|
+ */
|
|
+ spinlock_t lock;
|
|
+
|
|
+ /**
|
|
+ * @control: Command stream group slot control interface.
|
|
+ *
|
|
+ * Used to expose global FW properties.
|
|
+ *
|
|
+ * This interface is read-only.
|
|
+ */
|
|
+ const struct panthor_fw_global_control_iface *control;
|
|
+
|
|
+ /**
|
|
+ * @input: Global input interface.
|
|
+ *
|
|
+ * Used for host updates/events.
|
|
+ */
|
|
+ struct panthor_fw_global_input_iface *input;
|
|
+
|
|
+ /**
|
|
+ * @output: Global output interface.
|
|
+ *
|
|
+ * Used for FW updates/events.
|
|
+ *
|
|
+ * This interface is read-only.
|
|
+ */
|
|
+ const struct panthor_fw_global_output_iface *output;
|
|
+};
|
|
+
|
|
+/**
|
|
+ * panthor_fw_toggle_reqs() - Toggle acknowledge bits to send an event to the FW
|
|
+ * @__iface: The interface to operate on.
|
|
+ * @__in_reg: Name of the register to update in the input section of the interface.
|
|
+ * @__out_reg: Name of the register to take as a reference in the output section of the
|
|
+ * interface.
|
|
+ * @__mask: Mask to apply to the update.
|
|
+ *
|
|
+ * The Host -> FW event/message passing was designed to be lockless, with each side of
|
|
+ * the channel having its writeable section. Events are signaled as a difference between
|
|
+ * the host and FW side in the req/ack registers (when a bit differs, there's an event
|
|
+ * pending, when they are the same, nothing needs attention).
|
|
+ *
|
|
+ * This helper allows one to update the req register based on the current value of the
|
|
+ * ack register managed by the FW. Toggling a specific bit will flag an event. In order
|
|
+ * for events to be re-evaluated, the interface doorbell needs to be rung.
|
|
+ *
|
|
+ * Concurrent accesses to the same req register is covered.
|
|
+ *
|
|
+ * Anything requiring atomic updates to multiple registers requires a dedicated lock.
|
|
+ */
|
|
+#define panthor_fw_toggle_reqs(__iface, __in_reg, __out_reg, __mask) \
|
|
+ do { \
|
|
+ u32 __cur_val, __new_val, __out_val; \
|
|
+ spin_lock(&(__iface)->lock); \
|
|
+ __cur_val = READ_ONCE((__iface)->input->__in_reg); \
|
|
+ __out_val = READ_ONCE((__iface)->output->__out_reg); \
|
|
+ __new_val = ((__out_val ^ (__mask)) & (__mask)) | (__cur_val & ~(__mask)); \
|
|
+ WRITE_ONCE((__iface)->input->__in_reg, __new_val); \
|
|
+ spin_unlock(&(__iface)->lock); \
|
|
+ } while (0)
|
|
+
|
|
+/**
|
|
+ * panthor_fw_update_reqs() - Update bits to reflect a configuration change
|
|
+ * @__iface: The interface to operate on.
|
|
+ * @__in_reg: Name of the register to update in the input section of the interface.
|
|
+ * @__val: Value to set.
|
|
+ * @__mask: Mask to apply to the update.
|
|
+ *
|
|
+ * Some configuration get passed through req registers that are also used to
|
|
+ * send events to the FW. Those req registers being updated from the interrupt
|
|
+ * handler, they require special helpers to update the configuration part as well.
|
|
+ *
|
|
+ * Concurrent accesses to the same req register is covered.
|
|
+ *
|
|
+ * Anything requiring atomic updates to multiple registers requires a dedicated lock.
|
|
+ */
|
|
+#define panthor_fw_update_reqs(__iface, __in_reg, __val, __mask) \
|
|
+ do { \
|
|
+ u32 __cur_val, __new_val; \
|
|
+ spin_lock(&(__iface)->lock); \
|
|
+ __cur_val = READ_ONCE((__iface)->input->__in_reg); \
|
|
+ __new_val = (__cur_val & ~(__mask)) | ((__val) & (__mask)); \
|
|
+ WRITE_ONCE((__iface)->input->__in_reg, __new_val); \
|
|
+ spin_unlock(&(__iface)->lock); \
|
|
+ } while (0)
|
|
+
|
|
+struct panthor_fw_global_iface *
|
|
+panthor_fw_get_glb_iface(struct panthor_device *ptdev);
|
|
+
|
|
+struct panthor_fw_csg_iface *
|
|
+panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot);
|
|
+
|
|
+struct panthor_fw_cs_iface *
|
|
+panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot);
|
|
+
|
|
+int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_id, u32 req_mask,
|
|
+ u32 *acked, u32 timeout_ms);
|
|
+
|
|
+int panthor_fw_glb_wait_acks(struct panthor_device *ptdev, u32 req_mask, u32 *acked,
|
|
+ u32 timeout_ms);
|
|
+
|
|
+void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_slot);
|
|
+
|
|
+struct panthor_kernel_bo *
|
|
+panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev,
|
|
+ struct panthor_fw_ringbuf_input_iface **input,
|
|
+ const struct panthor_fw_ringbuf_output_iface **output,
|
|
+ u32 *input_fw_va, u32 *output_fw_va);
|
|
+struct panthor_kernel_bo *
|
|
+panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size);
|
|
+
|
|
+struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev);
|
|
+
|
|
+void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang);
|
|
+int panthor_fw_post_reset(struct panthor_device *ptdev);
|
|
+
|
|
+static inline void panthor_fw_suspend(struct panthor_device *ptdev)
|
|
+{
|
|
+ panthor_fw_pre_reset(ptdev, false);
|
|
+}
|
|
+
|
|
+static inline int panthor_fw_resume(struct panthor_device *ptdev)
|
|
+{
|
|
+ return panthor_fw_post_reset(ptdev);
|
|
+}
|
|
+
|
|
+int panthor_fw_init(struct panthor_device *ptdev);
|
|
+void panthor_fw_unplug(struct panthor_device *ptdev);
|
|
+
|
|
+#endif
|