Skip to content

Commit

Permalink
drm/amdgpu: Make SDMA phase quantum configurable
Browse files Browse the repository at this point in the history
Set a configurable SDMA phase quantum when enabling SDMA context
switching. The default value significantly reduces SDMA latency
in page table updates when user-mode SDMA queues have concurrent
activity, compared to the initial HW setting.

Change-Id: Id99c52e893d0358374ea9a3fbc2181f0c60b1b42
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
  • Loading branch information
fxkamd committed Jul 16, 2016
1 parent d020404 commit c8bd412
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 2 deletions.
1 change: 1 addition & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ extern int amdgpu_sched_hw_submission;
extern int amdgpu_powerplay;
extern unsigned amdgpu_pcie_gen_cap;
extern unsigned amdgpu_pcie_lane_cap;
extern unsigned amdgpu_sdma_phase_quantum;

#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
#define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */
Expand Down
4 changes: 4 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ int amdgpu_sched_hw_submission = 2;
int amdgpu_powerplay = -1;
unsigned amdgpu_pcie_gen_cap = 0;
unsigned amdgpu_pcie_lane_cap = 0;
unsigned amdgpu_sdma_phase_quantum = 32;

MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
Expand Down Expand Up @@ -174,6 +175,9 @@ module_param_named(pcie_gen_cap, amdgpu_pcie_gen_cap, uint, 0444);
MODULE_PARM_DESC(pcie_lane_cap, "PCIE Lane Caps (0: autodetect (default))");
module_param_named(pcie_lane_cap, amdgpu_pcie_lane_cap, uint, 0444);

MODULE_PARM_DESC(sdma_phase_quantum, "SDMA context switch phase quantum (x 1K GPU clock cycles, 0 = no change (default 32))");
module_param_named(sdma_phase_quantum, amdgpu_sdma_phase_quantum, uint, 0444);

static const struct pci_device_id pciidlist[] = {
#ifdef CONFIG_DRM_AMDGPU_CIK
/* Kaveri */
Expand Down
32 changes: 31 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/cik_sdma.c
Original file line number Diff line number Diff line change
Expand Up @@ -351,14 +351,44 @@ static void cik_sdma_rlc_stop(struct amdgpu_device *adev)
*/
static void cik_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
{
u32 f32_cntl;
u32 f32_cntl, phase_quantum = 0;
int i;

if (amdgpu_sdma_phase_quantum) {
unsigned value = amdgpu_sdma_phase_quantum;
unsigned unit = 0;

while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
value = (value + 1) >> 1;
unit++;
}
if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
WARN_ONCE(1,
"clamping sdma_phase_quantum to %uK clock cycles\n",
value << unit);
}
phase_quantum =
value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
}

for (i = 0; i < adev->sdma.num_instances; i++) {
f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]);
if (enable) {
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
AUTO_CTXSW_ENABLE, 1);
if (amdgpu_sdma_phase_quantum) {
WREG32(mmSDMA0_PHASE0_QUANTUM + sdma_offsets[i],
phase_quantum);
WREG32(mmSDMA0_PHASE1_QUANTUM + sdma_offsets[i],
phase_quantum);
}
} else {
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
AUTO_CTXSW_ENABLE, 0);
Expand Down
32 changes: 31 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -576,16 +576,46 @@ static void sdma_v3_0_rlc_stop(struct amdgpu_device *adev)
*/
static void sdma_v3_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
{
u32 f32_cntl;
u32 f32_cntl, phase_quantum = 0;
int i;

if (amdgpu_sdma_phase_quantum) {
unsigned value = amdgpu_sdma_phase_quantum;
unsigned unit = 0;

while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
value = (value + 1) >> 1;
unit++;
}
if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
WARN_ONCE(1,
"clamping sdma_phase_quantum to %uK clock cycles\n",
value << unit);
}
phase_quantum =
value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
}

for (i = 0; i < adev->sdma.num_instances; i++) {
f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]);
if (enable) {
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
AUTO_CTXSW_ENABLE, 1);
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
ATC_L1_ENABLE, 1);
if (amdgpu_sdma_phase_quantum) {
WREG32(mmSDMA0_PHASE0_QUANTUM + sdma_offsets[i],
phase_quantum);
WREG32(mmSDMA0_PHASE1_QUANTUM + sdma_offsets[i],
phase_quantum);
}
} else {
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
AUTO_CTXSW_ENABLE, 0);
Expand Down

0 comments on commit c8bd412

Please sign in to comment.