Skip to content

Commit 60e75db

Browse files
committed
Merge branch 'master' into libretro
2 parents 3fc26d1 + db2804f commit 60e75db

File tree

6 files changed

+116
-109
lines changed

6 files changed

+116
-109
lines changed

plugins/gpu_neon/psx_gpu/psx_gpu.c

+17
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,11 @@ void flush_render_block_buffer(psx_gpu_struct *psx_gpu)
528528
render_block_handler_struct *render_block_handler =
529529
psx_gpu->render_block_handler;
530530

531+
#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
532+
// the asm doesn't bother to save callee-save vector regs, so do it here
533+
__asm__ __volatile__("":::"q4","q5","q6","q7");
534+
#endif
535+
531536
render_block_handler->texture_blocks(psx_gpu);
532537
render_block_handler->shade_blocks(psx_gpu);
533538
render_block_handler->blend_blocks(psx_gpu);
@@ -538,6 +543,9 @@ void flush_render_block_buffer(psx_gpu_struct *psx_gpu)
538543
#endif
539544

540545
psx_gpu->num_blocks = 0;
546+
#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
547+
__asm__ __volatile__("":::"q4","q5","q6","q7");
548+
#endif
541549
}
542550
}
543551

@@ -3037,6 +3045,11 @@ static void render_triangle_p(psx_gpu_struct *psx_gpu,
30373045
triangle_set_direction(y_direction_b, y_delta_b);
30383046
triangle_set_direction(y_direction_c, y_delta_c);
30393047

3048+
#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
3049+
// the asm doesn't bother to save callee-save vector regs, so do it here
3050+
__asm__ __volatile__("vstmia %0, {q4-q7}" :: "r"(psx_gpu->saved_q4_q7) : "memory");
3051+
#endif
3052+
30403053
compute_all_gradients(psx_gpu, a, b, c);
30413054

30423055
switch(y_direction_a | (y_direction_b << 2) | (y_direction_c << 4) |
@@ -3163,6 +3176,10 @@ static void render_triangle_p(psx_gpu_struct *psx_gpu,
31633176
&(render_triangle_block_handlers[render_state]);
31643177
((setup_blocks_function_type *)psx_gpu->render_block_handler->setup_blocks)
31653178
(psx_gpu);
3179+
3180+
#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
3181+
__asm__ __volatile__("vldmia %0, {q4-q7}" :: "r"(psx_gpu->saved_q4_q7));
3182+
#endif
31663183
}
31673184

31683185
void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,

plugins/gpu_neon/psx_gpu/psx_gpu.h

+5-1
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,11 @@ typedef struct
218218

219219
// Align up to 64 byte boundary to keep the upcoming buffers cache line
220220
// aligned, also make reachable with single immediate addition
221-
u8 reserved_a[180 + 9*4 - 9*sizeof(void *)];
221+
u8 reserved_a[68 + 9*4 - 9*sizeof(void *)];
222+
223+
// space for saving regs on c call to flush_render_block_buffer() and asm
224+
u32 saved_tmp[48 / sizeof(u32)];
225+
u32 saved_q4_q7[64 / sizeof(u32)];
222226

223227
// 8KB
224228
block_struct blocks[MAX_BLOCKS_PER_ROW];

0 commit comments

Comments
 (0)