@@ -528,6 +528,11 @@ void flush_render_block_buffer(psx_gpu_struct *psx_gpu)
528
528
render_block_handler_struct * render_block_handler =
529
529
psx_gpu -> render_block_handler ;
530
530
531
+ #if defined(__arm__ ) && defined(NEON_BUILD ) && !defined(SIMD_BUILD )
532
+ // the asm doesn't bother to save callee-save vector regs, so do it here
533
+ __asm__ __volatile__("" :::"q4" ,"q5" ,"q6" ,"q7" );
534
+ #endif
535
+
531
536
render_block_handler -> texture_blocks (psx_gpu );
532
537
render_block_handler -> shade_blocks (psx_gpu );
533
538
render_block_handler -> blend_blocks (psx_gpu );
@@ -538,6 +543,9 @@ void flush_render_block_buffer(psx_gpu_struct *psx_gpu)
538
543
#endif
539
544
540
545
psx_gpu -> num_blocks = 0 ;
546
+ #if defined(__arm__ ) && defined(NEON_BUILD ) && !defined(SIMD_BUILD )
547
+ __asm__ __volatile__("" :::"q4" ,"q5" ,"q6" ,"q7" );
548
+ #endif
541
549
}
542
550
}
543
551
@@ -3037,6 +3045,11 @@ static void render_triangle_p(psx_gpu_struct *psx_gpu,
3037
3045
triangle_set_direction (y_direction_b , y_delta_b );
3038
3046
triangle_set_direction (y_direction_c , y_delta_c );
3039
3047
3048
+ #if defined(__arm__ ) && defined(NEON_BUILD ) && !defined(SIMD_BUILD )
3049
+ // the asm doesn't bother to save callee-save vector regs, so do it here
3050
+ __asm__ __volatile__("vstmia %0, {q4-q7}" :: "r" (psx_gpu -> saved_q4_q7 ) : "memory" );
3051
+ #endif
3052
+
3040
3053
compute_all_gradients (psx_gpu , a , b , c );
3041
3054
3042
3055
switch (y_direction_a | (y_direction_b << 2 ) | (y_direction_c << 4 ) |
@@ -3163,6 +3176,10 @@ static void render_triangle_p(psx_gpu_struct *psx_gpu,
3163
3176
& (render_triangle_block_handlers [render_state ]);
3164
3177
((setup_blocks_function_type * )psx_gpu -> render_block_handler -> setup_blocks )
3165
3178
(psx_gpu );
3179
+
3180
+ #if defined(__arm__ ) && defined(NEON_BUILD ) && !defined(SIMD_BUILD )
3181
+ __asm__ __volatile__("vldmia %0, {q4-q7}" :: "r" (psx_gpu -> saved_q4_q7 ));
3182
+ #endif
3166
3183
}
3167
3184
3168
3185
void render_triangle (psx_gpu_struct * psx_gpu , vertex_struct * vertexes ,
0 commit comments