-
Notifications
You must be signed in to change notification settings - Fork 4
/
mali_base_jm_kernel.h
1236 lines (1109 loc) · 51 KB
/
mali_base_jm_kernel.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
* (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation, and any use by you of this program is subject to the terms
* of such GNU license.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
*/
#ifndef _UAPI_BASE_JM_KERNEL_H_
#define _UAPI_BASE_JM_KERNEL_H_
#include <linux/types.h>
typedef __u32 base_mem_alloc_flags;
/* Memory allocation, access/hint flags.
*
* See base_mem_alloc_flags.
*/
/* IN */
/* Read access CPU side
*/
#define BASE_MEM_PROT_CPU_RD ((base_mem_alloc_flags)1 << 0)
/* Write access CPU side
*/
#define BASE_MEM_PROT_CPU_WR ((base_mem_alloc_flags)1 << 1)
/* Read access GPU side
*/
#define BASE_MEM_PROT_GPU_RD ((base_mem_alloc_flags)1 << 2)
/* Write access GPU side
*/
#define BASE_MEM_PROT_GPU_WR ((base_mem_alloc_flags)1 << 3)
/* Execute allowed on the GPU side
*/
#define BASE_MEM_PROT_GPU_EX ((base_mem_alloc_flags)1 << 4)
/* Will be permanently mapped in kernel space.
* Flag is only allowed on allocations originating from kbase.
*/
#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5)
/* The allocation will completely reside within the same 4GB chunk in the GPU
* virtual space.
* Since this flag is primarily required only for the TLS memory which will
* not be used to contain executable code and also not used for Tiler heap,
* it can't be used along with BASE_MEM_PROT_GPU_EX and TILER_ALIGN_TOP flags.
*/
#define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6)
/* Userspace is not allowed to free this memory.
* Flag is only allowed on allocations originating from kbase.
*/
#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7)
#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8)
/* Grow backing store on GPU Page Fault
*/
#define BASE_MEM_GROW_ON_GPF ((base_mem_alloc_flags)1 << 9)
/* Page coherence Outer shareable, if available
*/
#define BASE_MEM_COHERENT_SYSTEM ((base_mem_alloc_flags)1 << 10)
/* Page coherence Inner shareable
*/
#define BASE_MEM_COHERENT_LOCAL ((base_mem_alloc_flags)1 << 11)
/* IN/OUT */
/* Should be cached on the CPU, returned if actually cached
*/
#define BASE_MEM_CACHED_CPU ((base_mem_alloc_flags)1 << 12)
/* IN/OUT */
/* Must have same VA on both the GPU and the CPU
*/
#define BASE_MEM_SAME_VA ((base_mem_alloc_flags)1 << 13)
/* OUT */
/* Must call mmap to acquire a GPU address for the allocation
*/
#define BASE_MEM_NEED_MMAP ((base_mem_alloc_flags)1 << 14)
/* IN */
/* Page coherence Outer shareable, required.
*/
#define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15)
/* Protected memory
*/
#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16)
/* Not needed physical memory
*/
#define BASE_MEM_DONT_NEED ((base_mem_alloc_flags)1 << 17)
/* Must use shared CPU/GPU zone (SAME_VA zone) but doesn't require the
* addresses to be the same
*/
#define BASE_MEM_IMPORT_SHARED ((base_mem_alloc_flags)1 << 18)
/**
* Bit 19 is reserved.
*
* Do not remove, use the next unreserved bit for new flags
*/
#define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19)
/**
* Memory starting from the end of the initial commit is aligned to 'extension'
* pages, where 'extension' must be a power of 2 and no more than
* BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES
*/
#define BASE_MEM_TILER_ALIGN_TOP ((base_mem_alloc_flags)1 << 20)
/* Should be uncached on the GPU, will work only for GPUs using AARCH64 mmu
* mode. Some components within the GPU might only be able to access memory
* that is GPU cacheable. Refer to the specific GPU implementation for more
* details. The 3 shareability flags will be ignored for GPU uncached memory.
* If used while importing USER_BUFFER type memory, then the import will fail
* if the memory is not aligned to GPU and CPU cache line width.
*/
#define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21)
/*
* Bits [22:25] for group_id (0~15).
*
* base_mem_group_id_set() should be used to pack a memory group ID into a
* base_mem_alloc_flags value instead of accessing the bits directly.
* base_mem_group_id_get() should be used to extract the memory group ID from
* a base_mem_alloc_flags value.
*/
#define BASEP_MEM_GROUP_ID_SHIFT 22
#define BASE_MEM_GROUP_ID_MASK \
((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT)
/* Must do CPU cache maintenance when imported memory is mapped/unmapped
* on GPU. Currently applicable to dma-buf type only.
*/
#define BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP ((base_mem_alloc_flags)1 << 26)
/* Use the GPU VA chosen by the kernel client */
#define BASE_MEM_FLAG_MAP_FIXED ((base_mem_alloc_flags)1 << 27)
/* OUT */
/* Kernel side cache sync ops required */
#define BASE_MEM_KERNEL_SYNC ((base_mem_alloc_flags)1 << 28)
/* Force trimming of JIT allocations when creating a new allocation */
#define BASEP_MEM_PERFORM_JIT_TRIM ((base_mem_alloc_flags)1 << 29)
/* Number of bits used as flags for base memory management
*
* Must be kept in sync with the base_mem_alloc_flags flags
*/
#define BASE_MEM_FLAGS_NR_BITS 30
/* A mask of all the flags which are only valid for allocations within kbase,
* and may not be passed from user space.
*/
#define BASEP_MEM_FLAGS_KERNEL_ONLY \
(BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE | \
BASE_MEM_FLAG_MAP_FIXED | BASEP_MEM_PERFORM_JIT_TRIM)
/* A mask for all output bits, excluding IN/OUT bits.
*/
#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP
/* A mask for all input bits, including IN/OUT bits.
*/
#define BASE_MEM_FLAGS_INPUT_MASK \
(((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK)
/* A mask of all currently reserved flags
*/
#define BASE_MEM_FLAGS_RESERVED \
(BASE_MEM_RESERVED_BIT_8 | BASE_MEM_RESERVED_BIT_19)
#define BASEP_MEM_INVALID_HANDLE (0ull << 12)
#define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12)
#define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12)
#define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12)
#define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12)
/* reserved handles ..-47<<PAGE_SHIFT> for future special handles */
#define BASE_MEM_COOKIE_BASE (64ul << 12)
#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \
BASE_MEM_COOKIE_BASE)
/* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the
* initial commit is aligned to 'extension' pages, where 'extension' must be a power
* of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES
*/
#define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP (1 << 0)
/**
* If set, the heap info address points to a __u32 holding the used size in bytes;
* otherwise it points to a __u64 holding the lowest address of unused memory.
*/
#define BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE (1 << 1)
/**
* Valid set of just-in-time memory allocation flags
*
* Note: BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE cannot be set if heap_info_gpu_addr
* in %base_jit_alloc_info is 0 (atom with BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE set
* and heap_info_gpu_addr being 0 will be rejected).
*/
#define BASE_JIT_ALLOC_VALID_FLAGS \
(BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP | BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE)
/**
* typedef base_context_create_flags - Flags to pass to ::base_context_init.
*
* Flags can be ORed together to enable multiple things.
*
* These share the same space as BASEP_CONTEXT_FLAG_*, and so must
* not collide with them.
*/
typedef __u32 base_context_create_flags;
/* No flags set */
#define BASE_CONTEXT_CREATE_FLAG_NONE ((base_context_create_flags)0)
/* Base context is embedded in a cctx object (flag used for CINSTR
* software counter macros)
*/
#define BASE_CONTEXT_CCTX_EMBEDDED ((base_context_create_flags)1 << 0)
/* Base context is a 'System Monitor' context for Hardware counters.
*
* One important side effect of this is that job submission is disabled.
*/
#define BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED \
((base_context_create_flags)1 << 1)
/* Bit-shift used to encode a memory group ID in base_context_create_flags
*/
#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3)
/* Bitmask used to encode a memory group ID in base_context_create_flags
*/
#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \
((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT)
/* Bitpattern describing the base_context_create_flags that can be
* passed to the kernel
*/
#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \
(BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | \
BASEP_CONTEXT_MMU_GROUP_ID_MASK)
/* Bitpattern describing the ::base_context_create_flags that can be
* passed to base_context_init()
*/
#define BASEP_CONTEXT_CREATE_ALLOWED_FLAGS \
(BASE_CONTEXT_CCTX_EMBEDDED | BASEP_CONTEXT_CREATE_KERNEL_FLAGS)
/*
* Private flags used on the base context
*
* These start at bit 31, and run down to zero.
*
* They share the same space as base_context_create_flags, and so must
* not collide with them.
*/
/* Private flag tracking whether job descriptor dumping is disabled */
#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED \
((base_context_create_flags)(1 << 31))
/* Enable additional tracepoints for latency measurements (TL_ATOM_READY,
* TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST)
*/
#define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0)
/* Indicate that job dumping is enabled. This could affect certain timers
* to account for the performance impact.
*/
#define BASE_TLSTREAM_JOB_DUMPING_ENABLED (1 << 1)
#define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \
BASE_TLSTREAM_JOB_DUMPING_ENABLED)
/*
* Dependency stuff, keep it private for now. May want to expose it if
* we decide to make the number of semaphores a configurable
* option.
*/
#define BASE_JD_ATOM_COUNT 256
/* Maximum number of concurrent render passes.
*/
#define BASE_JD_RP_COUNT (256)
/* Set/reset values for a software event */
#define BASE_JD_SOFT_EVENT_SET ((unsigned char)1)
#define BASE_JD_SOFT_EVENT_RESET ((unsigned char)0)
/**
* struct base_jd_udata - Per-job data
*
* This structure is used to store per-job data, and is completely unused
* by the Base driver. It can be used to store things such as callback
* function pointer, data to handle job completion. It is guaranteed to be
* untouched by the Base driver.
*
* @blob: per-job data array
*/
struct base_jd_udata {
__u64 blob[2];
};
/**
* typedef base_jd_dep_type - Job dependency type.
*
* A flags field will be inserted into the atom structure to specify whether a
* dependency is a data or ordering dependency (by putting it before/after
* 'core_req' in the structure it should be possible to add without changing
* the structure size).
* When the flag is set for a particular dependency to signal that it is an
* ordering only dependency then errors will not be propagated.
*/
typedef __u8 base_jd_dep_type;
#define BASE_JD_DEP_TYPE_INVALID (0) /**< Invalid dependency */
#define BASE_JD_DEP_TYPE_DATA (1U << 0) /**< Data dependency */
#define BASE_JD_DEP_TYPE_ORDER (1U << 1) /**< Order dependency */
/**
* typedef base_jd_core_req - Job chain hardware requirements.
*
* A job chain must specify what GPU features it needs to allow the
* driver to schedule the job correctly. By not specifying the
* correct settings can/will cause an early job termination. Multiple
* values can be ORed together to specify multiple requirements.
* Special case is ::BASE_JD_REQ_DEP, which is used to express complex
* dependencies, and that doesn't execute anything on the hardware.
*/
typedef __u32 base_jd_core_req;
/* Requirements that come from the HW */
/* No requirement, dependency only
*/
#define BASE_JD_REQ_DEP ((base_jd_core_req)0)
/* Requires fragment shaders
*/
#define BASE_JD_REQ_FS ((base_jd_core_req)1 << 0)
/* Requires compute shaders
*
* This covers any of the following GPU job types:
* - Vertex Shader Job
* - Geometry Shader Job
* - An actual Compute Shader Job
*
* Compare this with BASE_JD_REQ_ONLY_COMPUTE, which specifies that the
* job is specifically just the "Compute Shader" job type, and not the "Vertex
* Shader" nor the "Geometry Shader" job type.
*/
#define BASE_JD_REQ_CS ((base_jd_core_req)1 << 1)
/* Requires tiling */
#define BASE_JD_REQ_T ((base_jd_core_req)1 << 2)
/* Requires cache flushes */
#define BASE_JD_REQ_CF ((base_jd_core_req)1 << 3)
/* Requires value writeback */
#define BASE_JD_REQ_V ((base_jd_core_req)1 << 4)
/* SW-only requirements - the HW does not expose these as part of the job slot
* capabilities
*/
/* Requires fragment job with AFBC encoding */
#define BASE_JD_REQ_FS_AFBC ((base_jd_core_req)1 << 13)
/* SW-only requirement: coalesce completion events.
* If this bit is set then completion of this atom will not cause an event to
* be sent to userspace, whether successful or not; completion events will be
* deferred until an atom completes which does not have this bit set.
*
* This bit may not be used in combination with BASE_JD_REQ_EXTERNAL_RESOURCES.
*/
#define BASE_JD_REQ_EVENT_COALESCE ((base_jd_core_req)1 << 5)
/* SW Only requirement: the job chain requires a coherent core group. We don't
* mind which coherent core group is used.
*/
#define BASE_JD_REQ_COHERENT_GROUP ((base_jd_core_req)1 << 6)
/* SW Only requirement: The performance counters should be enabled only when
* they are needed, to reduce power consumption.
*/
#define BASE_JD_REQ_PERMON ((base_jd_core_req)1 << 7)
/* SW Only requirement: External resources are referenced by this atom.
*
* This bit may not be used in combination with BASE_JD_REQ_EVENT_COALESCE and
* BASE_JD_REQ_SOFT_EVENT_WAIT.
*/
#define BASE_JD_REQ_EXTERNAL_RESOURCES ((base_jd_core_req)1 << 8)
/* SW Only requirement: Software defined job. Jobs with this bit set will not be
* submitted to the hardware but will cause some action to happen within the
* driver
*/
#define BASE_JD_REQ_SOFT_JOB ((base_jd_core_req)1 << 9)
#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME (BASE_JD_REQ_SOFT_JOB | 0x1)
#define BASE_JD_REQ_SOFT_FENCE_TRIGGER (BASE_JD_REQ_SOFT_JOB | 0x2)
#define BASE_JD_REQ_SOFT_FENCE_WAIT (BASE_JD_REQ_SOFT_JOB | 0x3)
/* 0x4 RESERVED for now */
/* SW only requirement: event wait/trigger job.
*
* - BASE_JD_REQ_SOFT_EVENT_WAIT: this job will block until the event is set.
* - BASE_JD_REQ_SOFT_EVENT_SET: this job sets the event, thus unblocks the
* other waiting jobs. It completes immediately.
* - BASE_JD_REQ_SOFT_EVENT_RESET: this job resets the event, making it
* possible for other jobs to wait upon. It completes immediately.
*/
#define BASE_JD_REQ_SOFT_EVENT_WAIT (BASE_JD_REQ_SOFT_JOB | 0x5)
#define BASE_JD_REQ_SOFT_EVENT_SET (BASE_JD_REQ_SOFT_JOB | 0x6)
#define BASE_JD_REQ_SOFT_EVENT_RESET (BASE_JD_REQ_SOFT_JOB | 0x7)
#define BASE_JD_REQ_SOFT_DEBUG_COPY (BASE_JD_REQ_SOFT_JOB | 0x8)
/* SW only requirement: Just In Time allocation
*
* This job requests a single or multiple just-in-time allocations through a
* list of base_jit_alloc_info structure which is passed via the jc element of
* the atom. The number of base_jit_alloc_info structures present in the
* list is passed via the nr_extres element of the atom
*
* It should be noted that the id entry in base_jit_alloc_info must not
* be reused until it has been released via BASE_JD_REQ_SOFT_JIT_FREE.
*
* Should this soft job fail it is expected that a BASE_JD_REQ_SOFT_JIT_FREE
* soft job to free the JIT allocation is still made.
*
* The job will complete immediately.
*/
#define BASE_JD_REQ_SOFT_JIT_ALLOC (BASE_JD_REQ_SOFT_JOB | 0x9)
/* SW only requirement: Just In Time free
*
* This job requests a single or multiple just-in-time allocations created by
* BASE_JD_REQ_SOFT_JIT_ALLOC to be freed. The ID list of the just-in-time
* allocations is passed via the jc element of the atom.
*
* The job will complete immediately.
*/
#define BASE_JD_REQ_SOFT_JIT_FREE (BASE_JD_REQ_SOFT_JOB | 0xa)
/* SW only requirement: Map external resource
*
* This job requests external resource(s) are mapped once the dependencies
* of the job have been satisfied. The list of external resources are
* passed via the jc element of the atom which is a pointer to a
* base_external_resource_list.
*/
#define BASE_JD_REQ_SOFT_EXT_RES_MAP (BASE_JD_REQ_SOFT_JOB | 0xb)
/* SW only requirement: Unmap external resource
*
* This job requests external resource(s) are unmapped once the dependencies
* of the job has been satisfied. The list of external resources are
* passed via the jc element of the atom which is a pointer to a
* base_external_resource_list.
*/
#define BASE_JD_REQ_SOFT_EXT_RES_UNMAP (BASE_JD_REQ_SOFT_JOB | 0xc)
/* HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders)
*
* This indicates that the Job Chain contains GPU jobs of the 'Compute
* Shaders' type.
*
* In contrast to BASE_JD_REQ_CS, this does not indicate that the Job
* Chain contains 'Geometry Shader' or 'Vertex Shader' jobs.
*/
#define BASE_JD_REQ_ONLY_COMPUTE ((base_jd_core_req)1 << 10)
/* HW Requirement: Use the base_jd_atom::device_nr field to specify a
* particular core group
*
* If both BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag
* takes priority
*
* This is only guaranteed to work for BASE_JD_REQ_ONLY_COMPUTE atoms.
*
* If the core availability policy is keeping the required core group turned
* off, then the job will fail with a BASE_JD_EVENT_PM_EVENT error code.
*/
#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP ((base_jd_core_req)1 << 11)
/* SW Flag: If this bit is set then the successful completion of this atom
* will not cause an event to be sent to userspace
*/
#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE ((base_jd_core_req)1 << 12)
/* SW Flag: If this bit is set then completion of this atom will not cause an
* event to be sent to userspace, whether successful or not.
*/
#define BASEP_JD_REQ_EVENT_NEVER ((base_jd_core_req)1 << 14)
/* SW Flag: Skip GPU cache clean and invalidation before starting a GPU job.
*
* If this bit is set then the GPU's cache will not be cleaned and invalidated
* until a GPU job starts which does not have this bit set or a job completes
* which does not have the BASE_JD_REQ_SKIP_CACHE_END bit set. Do not use
* if the CPU may have written to memory addressed by the job since the last job
* without this bit set was submitted.
*/
#define BASE_JD_REQ_SKIP_CACHE_START ((base_jd_core_req)1 << 15)
/* SW Flag: Skip GPU cache clean and invalidation after a GPU job completes.
*
* If this bit is set then the GPU's cache will not be cleaned and invalidated
* until a GPU job completes which does not have this bit set or a job starts
* which does not have the BASE_JD_REQ_SKIP_CACHE_START bit set. Do not use
* if the CPU may read from or partially overwrite memory addressed by the job
* before the next job without this bit set completes.
*/
#define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16)
/* Request the atom be executed on a specific job slot.
*
* When this flag is specified, it takes precedence over any existing job slot
* selection logic.
*/
#define BASE_JD_REQ_JOB_SLOT ((base_jd_core_req)1 << 17)
/* SW-only requirement: The atom is the start of a renderpass.
*
* If this bit is set then the job chain will be soft-stopped if it causes the
* GPU to write beyond the end of the physical pages backing the tiler heap, and
* committing more memory to the heap would exceed an internal threshold. It may
* be resumed after running one of the job chains attached to an atom with
* BASE_JD_REQ_END_RENDERPASS set and the same renderpass ID. It may be
* resumed multiple times until it completes without memory usage exceeding the
* threshold.
*
* Usually used with BASE_JD_REQ_T.
*/
#define BASE_JD_REQ_START_RENDERPASS ((base_jd_core_req)1 << 18)
/* SW-only requirement: The atom is the end of a renderpass.
*
* If this bit is set then the atom incorporates the CPU address of a
* base_jd_fragment object instead of the GPU address of a job chain.
*
* Which job chain is run depends upon whether the atom with the same renderpass
* ID and the BASE_JD_REQ_START_RENDERPASS bit set completed normally or
* was soft-stopped when it exceeded an upper threshold for tiler heap memory
* usage.
*
* It also depends upon whether one of the job chains attached to the atom has
* already been run as part of the same renderpass (in which case it would have
* written unresolved multisampled and otherwise-discarded output to temporary
* buffers that need to be read back). The job chain for doing a forced read and
* forced write (from/to temporary buffers) is run as many times as necessary.
*
* Usually used with BASE_JD_REQ_FS.
*/
#define BASE_JD_REQ_END_RENDERPASS ((base_jd_core_req)1 << 19)
/* SW-only requirement: The atom needs to run on a limited core mask affinity.
*
* If this bit is set then the kbase_context.limited_core_mask will be applied
* to the affinity.
*/
#define BASE_JD_REQ_LIMITED_CORE_MASK ((base_jd_core_req)1 << 20)
/* These requirement bits are currently unused in base_jd_core_req
*/
#define BASEP_JD_REQ_RESERVED \
(~(BASE_JD_REQ_ATOM_TYPE | BASE_JD_REQ_EXTERNAL_RESOURCES | \
BASE_JD_REQ_EVENT_ONLY_ON_FAILURE | BASEP_JD_REQ_EVENT_NEVER | \
BASE_JD_REQ_EVENT_COALESCE | \
BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \
BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \
BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END | \
BASE_JD_REQ_JOB_SLOT | BASE_JD_REQ_START_RENDERPASS | \
BASE_JD_REQ_END_RENDERPASS | BASE_JD_REQ_LIMITED_CORE_MASK))
/* Mask of all bits in base_jd_core_req that control the type of the atom.
*
* This allows dependency only atoms to have flags set
*/
#define BASE_JD_REQ_ATOM_TYPE \
(BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T | BASE_JD_REQ_CF | \
BASE_JD_REQ_V | BASE_JD_REQ_SOFT_JOB | BASE_JD_REQ_ONLY_COMPUTE)
/**
* Mask of all bits in base_jd_core_req that control the type of a soft job.
*/
#define BASE_JD_REQ_SOFT_JOB_TYPE (BASE_JD_REQ_SOFT_JOB | 0x1f)
/* Returns non-zero value if core requirements passed define a soft job or
* a dependency only job.
*/
#define BASE_JD_REQ_SOFT_JOB_OR_DEP(core_req) \
(((core_req) & BASE_JD_REQ_SOFT_JOB) || \
((core_req) & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP)
/**
* enum kbase_jd_atom_state
*
* @KBASE_JD_ATOM_STATE_UNUSED: Atom is not used.
* @KBASE_JD_ATOM_STATE_QUEUED: Atom is queued in JD.
* @KBASE_JD_ATOM_STATE_IN_JS: Atom has been given to JS (is runnable/running).
* @KBASE_JD_ATOM_STATE_HW_COMPLETED: Atom has been completed, but not yet
* handed back to job dispatcher for
* dependency resolution.
* @KBASE_JD_ATOM_STATE_COMPLETED: Atom has been completed, but not yet handed
* back to userspace.
*/
enum kbase_jd_atom_state {
KBASE_JD_ATOM_STATE_UNUSED,
KBASE_JD_ATOM_STATE_QUEUED,
KBASE_JD_ATOM_STATE_IN_JS,
KBASE_JD_ATOM_STATE_HW_COMPLETED,
KBASE_JD_ATOM_STATE_COMPLETED
};
/**
* typedef base_atom_id - Type big enough to store an atom number in.
*/
typedef __u8 base_atom_id;
/**
* struct base_dependency -
*
* @atom_id: An atom number
* @dependency_type: Dependency type
*/
struct base_dependency {
base_atom_id atom_id;
base_jd_dep_type dependency_type;
};
/**
* struct base_jd_fragment - Set of GPU fragment job chains used for rendering.
*
* @norm_read_norm_write: Job chain for full rendering.
* GPU address of a fragment job chain to render in the
* circumstance where the tiler job chain did not exceed
* its memory usage threshold and no fragment job chain
* was previously run for the same renderpass.
* It is used no more than once per renderpass.
* @norm_read_forced_write: Job chain for starting incremental
* rendering.
* GPU address of a fragment job chain to render in
* the circumstance where the tiler job chain exceeded
* its memory usage threshold for the first time and
* no fragment job chain was previously run for the
* same renderpass.
* Writes unresolved multisampled and normally-
* discarded output to temporary buffers that must be
* read back by a subsequent forced_read job chain
* before the renderpass is complete.
* It is used no more than once per renderpass.
* @forced_read_forced_write: Job chain for continuing incremental
* rendering.
* GPU address of a fragment job chain to render in
* the circumstance where the tiler job chain
* exceeded its memory usage threshold again
* and a fragment job chain was previously run for
* the same renderpass.
* Reads unresolved multisampled and
* normally-discarded output from temporary buffers
* written by a previous forced_write job chain and
* writes the same to temporary buffers again.
* It is used as many times as required until
* rendering completes.
* @forced_read_norm_write: Job chain for ending incremental rendering.
* GPU address of a fragment job chain to render in the
* circumstance where the tiler job chain did not
* exceed its memory usage threshold this time and a
* fragment job chain was previously run for the same
* renderpass.
* Reads unresolved multisampled and normally-discarded
* output from temporary buffers written by a previous
* forced_write job chain in order to complete a
* renderpass.
* It is used no more than once per renderpass.
*
* This structure is referenced by the main atom structure if
* BASE_JD_REQ_END_RENDERPASS is set in the base_jd_core_req.
*/
struct base_jd_fragment {
__u64 norm_read_norm_write;
__u64 norm_read_forced_write;
__u64 forced_read_forced_write;
__u64 forced_read_norm_write;
};
/**
* typedef base_jd_prio - Base Atom priority.
*
* Only certain priority levels are actually implemented, as specified by the
* BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority
* level that is not one of those defined below.
*
* Priority levels only affect scheduling after the atoms have had dependencies
* resolved. For example, a low priority atom that has had its dependencies
* resolved might run before a higher priority atom that has not had its
* dependencies resolved.
*
* In general, fragment atoms do not affect non-fragment atoms with
* lower priorities, and vice versa. One exception is that there is only one
* priority value for each context. So a high-priority (e.g.) fragment atom
* could increase its context priority, causing its non-fragment atoms to also
* be scheduled sooner.
*
* The atoms are scheduled as follows with respect to their priorities:
* * Let atoms 'X' and 'Y' be for the same job slot who have dependencies
* resolved, and atom 'X' has a higher priority than atom 'Y'
* * If atom 'Y' is currently running on the HW, then it is interrupted to
* allow atom 'X' to run soon after
* * If instead neither atom 'Y' nor atom 'X' are running, then when choosing
* the next atom to run, atom 'X' will always be chosen instead of atom 'Y'
* * Any two atoms that have the same priority could run in any order with
* respect to each other. That is, there is no ordering constraint between
* atoms of the same priority.
*
* The sysfs file 'js_ctx_scheduling_mode' is used to control how atoms are
* scheduled between contexts. The default value, 0, will cause higher-priority
* atoms to be scheduled first, regardless of their context. The value 1 will
* use a round-robin algorithm when deciding which context's atoms to schedule
* next, so higher-priority atoms can only preempt lower priority atoms within
* the same context. See KBASE_JS_SYSTEM_PRIORITY_MODE and
* KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE for more details.
*/
typedef __u8 base_jd_prio;
/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */
#define BASE_JD_PRIO_MEDIUM ((base_jd_prio)0)
/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and
* BASE_JD_PRIO_LOW
*/
#define BASE_JD_PRIO_HIGH ((base_jd_prio)1)
/* Low atom priority. */
#define BASE_JD_PRIO_LOW ((base_jd_prio)2)
/* Real-Time atom priority. This is a priority higher than BASE_JD_PRIO_HIGH,
* BASE_JD_PRIO_MEDIUM, and BASE_JD_PRIO_LOW
*/
#define BASE_JD_PRIO_REALTIME ((base_jd_prio)3)
/* Count of the number of priority levels. This itself is not a valid
* base_jd_prio setting
*/
#define BASE_JD_NR_PRIO_LEVELS 4
/**
* struct base_jd_atom_v2 - Node of a dependency graph used to submit a
* GPU job chain or soft-job to the kernel driver.
*
* @jc: GPU address of a job chain or (if BASE_JD_REQ_END_RENDERPASS
* is set in the base_jd_core_req) the CPU address of a
* base_jd_fragment object.
* @udata: User data.
* @extres_list: List of external resources.
* @nr_extres: Number of external resources or JIT allocations.
* @jit_id: Zero-terminated array of IDs of just-in-time memory
* allocations written to by the atom. When the atom
* completes, the value stored at the
* &struct_base_jit_alloc_info.heap_info_gpu_addr of
* each allocation is read in order to enforce an
* overall physical memory usage limit.
* @pre_dep: Pre-dependencies. One need to use SETTER function to assign
* this field; this is done in order to reduce possibility of
* improper assignment of a dependency field.
* @atom_number: Unique number to identify the atom.
* @prio: Atom priority. Refer to base_jd_prio for more details.
* @device_nr: Core group when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP
* specified.
* @jobslot: Job slot to use when BASE_JD_REQ_JOB_SLOT is specified.
* @core_req: Core requirements.
* @renderpass_id: Renderpass identifier used to associate an atom that has
* BASE_JD_REQ_START_RENDERPASS set in its core requirements
* with an atom that has BASE_JD_REQ_END_RENDERPASS set.
* @padding: Unused. Must be zero.
*
* This structure has changed since UK 10.2 for which base_jd_core_req was a
* __u16 value.
*
* In UK 10.3 a core_req field of a __u32 type was added to the end of the
* structure, and the place in the structure previously occupied by __u16
* core_req was kept but renamed to compat_core_req.
*
* From UK 11.20 - compat_core_req is now occupied by __u8 jit_id[2].
* Compatibility with UK 10.x from UK 11.y is not handled because
* the major version increase prevents this.
*
* For UK 11.20 jit_id[2] must be initialized to zero.
*/
/*
struct base_jd_atom_v2 {
__u64 jc;
struct base_jd_udata udata;
__u64 extres_list;
__u16 nr_extres;
__u16 compat_core_req; //added from Bifrost r16p0
// __u8 jit_id[2]; //missing from Bifrost r16p0
struct base_dependency pre_dep[2];
base_atom_id atom_number;
base_jd_prio prio;
__u8 device_nr;
__u8 padding[1];
// __u8 jobslot; //missing from Bifrost r16p0
base_jd_core_req core_req;
// __u8 renderpass_id; //missing from Bifrost r16p0
};
*/
typedef struct base_jd_atom_v2 {
__u64 jc; /**< job-chain GPU address */
struct base_jd_udata udata; /**< user data */
__u64 extres_list; /**< list of external resources */
__u16 nr_extres; /**< nr of external resources or JIT allocations */
__u16 compat_core_req; /**< core requirements which correspond to the legacy support for UK 10.2 */
struct base_dependency pre_dep[2]; /**< pre-dependencies, one need to use SETTER function to assign this field,
this is done in order to reduce possibility of improper assigment of a dependency field */
base_atom_id atom_number; /**< unique number to identify the atom */
base_jd_prio prio; /**< Atom priority. Refer to @ref base_jd_prio for more details */
__u8 device_nr; /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */
__u8 padding[1];
base_jd_core_req core_req; /**< core requirements */
} base_jd_atom_v2;
/**
* struct base_jd_atom - Same as base_jd_atom_v2, but has an extra seq_nr
* at the beginning.
*
* @seq_nr: Sequence number of logical grouping of atoms.
* @jc: GPU address of a job chain or (if BASE_JD_REQ_END_RENDERPASS
* is set in the base_jd_core_req) the CPU address of a
* base_jd_fragment object.
* @udata: User data.
* @extres_list: List of external resources.
* @nr_extres: Number of external resources or JIT allocations.
* @jit_id: Zero-terminated array of IDs of just-in-time memory
* allocations written to by the atom. When the atom
* completes, the value stored at the
* &struct_base_jit_alloc_info.heap_info_gpu_addr of
* each allocation is read in order to enforce an
* overall physical memory usage limit.
* @pre_dep: Pre-dependencies. One need to use SETTER function to assign
* this field; this is done in order to reduce possibility of
* improper assignment of a dependency field.
* @atom_number: Unique number to identify the atom.
* @prio: Atom priority. Refer to base_jd_prio for more details.
* @device_nr: Core group when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP
* specified.
* @jobslot: Job slot to use when BASE_JD_REQ_JOB_SLOT is specified.
* @core_req: Core requirements.
* @renderpass_id: Renderpass identifier used to associate an atom that has
* BASE_JD_REQ_START_RENDERPASS set in its core requirements
* with an atom that has BASE_JD_REQ_END_RENDERPASS set.
* @padding: Unused. Must be zero.
*/
typedef struct base_jd_atom {
__u64 seq_nr;
__u64 jc;
struct base_jd_udata udata;
__u64 extres_list;
__u16 nr_extres;
__u8 jit_id[2];
struct base_dependency pre_dep[2];
base_atom_id atom_number;
base_jd_prio prio;
__u8 device_nr;
__u8 jobslot;
base_jd_core_req core_req;
__u8 renderpass_id;
__u8 padding[7];
} base_jd_atom;
struct base_jit_alloc_info {
__u64 gpu_alloc_addr;
__u64 va_pages;
__u64 commit_pages;
__u64 extension;
__u8 id;
__u8 bin_id;
__u8 max_allocations;
__u8 flags;
__u8 padding[2];
__u16 usage_id;
__u64 heap_info_gpu_addr;
};
/* Job chain event code bits
* Defines the bits used to create ::base_jd_event_code
*/
enum {
BASE_JD_SW_EVENT_KERNEL = (1u << 15), /* Kernel side event */
BASE_JD_SW_EVENT = (1u << 14), /* SW defined event */
/* Event indicates success (SW events only) */
BASE_JD_SW_EVENT_SUCCESS = (1u << 13),
BASE_JD_SW_EVENT_JOB = (0u << 11), /* Job related event */
BASE_JD_SW_EVENT_BAG = (1u << 11), /* Bag related event */
BASE_JD_SW_EVENT_INFO = (2u << 11), /* Misc/info event */
BASE_JD_SW_EVENT_RESERVED = (3u << 11), /* Reserved event type */
/* Mask to extract the type from an event code */
BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11)
};
/**
* enum base_jd_event_code - Job chain event codes
*
* @BASE_JD_EVENT_RANGE_HW_NONFAULT_START: Start of hardware non-fault status
* codes.
* Obscurely, BASE_JD_EVENT_TERMINATED
* indicates a real fault, because the
* job was hard-stopped.
* @BASE_JD_EVENT_NOT_STARTED: Can't be seen by userspace, treated as
* 'previous job done'.
* @BASE_JD_EVENT_STOPPED: Can't be seen by userspace, becomes
* TERMINATED, DONE or JOB_CANCELLED.
* @BASE_JD_EVENT_TERMINATED: This is actually a fault status code - the job
* was hard stopped.
* @BASE_JD_EVENT_ACTIVE: Can't be seen by userspace, jobs only returned on
* complete/fail/cancel.
* @BASE_JD_EVENT_RANGE_HW_NONFAULT_END: End of hardware non-fault status codes.
* Obscurely, BASE_JD_EVENT_TERMINATED
* indicates a real fault,
* because the job was hard-stopped.
* @BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START: Start of hardware fault and
* software error status codes.
* @BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END: End of hardware fault and
* software error status codes.
* @BASE_JD_EVENT_RANGE_SW_SUCCESS_START: Start of software success status
* codes.
* @BASE_JD_EVENT_RANGE_SW_SUCCESS_END: End of software success status codes.
* @BASE_JD_EVENT_RANGE_KERNEL_ONLY_START: Start of kernel-only status codes.
* Such codes are never returned to
* user-space.
* @BASE_JD_EVENT_RANGE_KERNEL_ONLY_END: End of kernel-only status codes.
* @BASE_JD_EVENT_DONE: atom has completed successfull
* @BASE_JD_EVENT_JOB_CONFIG_FAULT: Atom dependencies configuration error which
* shall result in a failed atom
* @BASE_JD_EVENT_JOB_POWER_FAULT: The job could not be executed because the
* part of the memory system required to access
* job descriptors was not powered on
* @BASE_JD_EVENT_JOB_READ_FAULT: Reading a job descriptor into the Job
* manager failed
* @BASE_JD_EVENT_JOB_WRITE_FAULT: Writing a job descriptor from the Job
* manager failed
* @BASE_JD_EVENT_JOB_AFFINITY_FAULT: The job could not be executed because the
* specified affinity mask does not intersect
* any available cores
* @BASE_JD_EVENT_JOB_BUS_FAULT: A bus access failed while executing a job
* @BASE_JD_EVENT_INSTR_INVALID_PC: A shader instruction with an illegal program
* counter was executed.
* @BASE_JD_EVENT_INSTR_INVALID_ENC: A shader instruction with an illegal
* encoding was executed.
* @BASE_JD_EVENT_INSTR_TYPE_MISMATCH: A shader instruction was executed where
* the instruction encoding did not match the
* instruction type encoded in the program
* counter.
* @BASE_JD_EVENT_INSTR_OPERAND_FAULT: A shader instruction was executed that
* contained invalid combinations of operands.
* @BASE_JD_EVENT_INSTR_TLS_FAULT: A shader instruction was executed that tried
* to access the thread local storage section
* of another thread.
* @BASE_JD_EVENT_INSTR_ALIGN_FAULT: A shader instruction was executed that
* tried to do an unsupported unaligned memory
* access.
* @BASE_JD_EVENT_INSTR_BARRIER_FAULT: A shader instruction was executed that
* failed to complete an instruction barrier.
* @BASE_JD_EVENT_DATA_INVALID_FAULT: Any data structure read as part of the job
* contains invalid combinations of data.
* @BASE_JD_EVENT_TILE_RANGE_FAULT: Tile or fragment shading was asked to
* process a tile that is entirely outside the
* bounding box of the frame.
* @BASE_JD_EVENT_STATE_FAULT: Matches ADDR_RANGE_FAULT. A virtual address
* has been found that exceeds the virtual