mirror of
https://github.com/nothings/stb
synced 2024-12-04 23:41:55 +03:00
stb_vorbis: Change imdct_step3_inner_s_loop_ld654
Released Clang 12 generates bad code for the original loop in here. While this is a compiler bug plain and simple, we still have to deal with it. This is related to the SLP vectorizer, and in particular the two reverse subtracts in the butterflies for the second half to avoid unary negates. Use the more regular dataflow that has the unary negates in it (we can at least fold one of them into a constant, namely for A2) and introduce a few temporaries that also make alias analysis (and possible block-level vectorization) a whole let easier while I'm at it. This fixes the codegen issues on Clang 12, which now produces a working decoder, and I expect the single unary negate that we actually gain per iteration of this loop is not a significant perf concern. (There are bigger fish to fry here regardless.) Fixes issue #1152.
This commit is contained in:
parent
0d47d17002
commit
70136cd5f1
41
stb_vorbis.c
41
stb_vorbis.c
@ -2592,34 +2592,33 @@ static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A,
|
||||
|
||||
while (z > base) {
|
||||
float k00,k11;
|
||||
float l00,l11;
|
||||
|
||||
k00 = z[-0] - z[-8];
|
||||
k11 = z[-1] - z[-9];
|
||||
z[-0] = z[-0] + z[-8];
|
||||
z[-1] = z[-1] + z[-9];
|
||||
z[-8] = k00;
|
||||
z[-9] = k11 ;
|
||||
k00 = z[-0] - z[ -8];
|
||||
k11 = z[-1] - z[ -9];
|
||||
l00 = z[-2] - z[-10];
|
||||
l11 = z[-3] - z[-11];
|
||||
z[ -0] = z[-0] + z[ -8];
|
||||
z[ -1] = z[-1] + z[ -9];
|
||||
z[ -2] = z[-2] + z[-10];
|
||||
z[ -3] = z[-3] + z[-11];
|
||||
z[ -8] = k00;
|
||||
z[ -9] = k11;
|
||||
z[-10] = (l00+l11) * A2;
|
||||
z[-11] = (l11-l00) * A2;
|
||||
|
||||
k00 = z[ -2] - z[-10];
|
||||
k11 = z[ -3] - z[-11];
|
||||
z[ -2] = z[ -2] + z[-10];
|
||||
z[ -3] = z[ -3] + z[-11];
|
||||
z[-10] = (k00+k11) * A2;
|
||||
z[-11] = (k11-k00) * A2;
|
||||
|
||||
k00 = z[-12] - z[ -4]; // reverse to avoid a unary negation
|
||||
k00 = z[ -4] - z[-12];
|
||||
k11 = z[ -5] - z[-13];
|
||||
l00 = z[ -6] - z[-14];
|
||||
l11 = z[ -7] - z[-15];
|
||||
z[ -4] = z[ -4] + z[-12];
|
||||
z[ -5] = z[ -5] + z[-13];
|
||||
z[-12] = k11;
|
||||
z[-13] = k00;
|
||||
|
||||
k00 = z[-14] - z[ -6]; // reverse to avoid a unary negation
|
||||
k11 = z[ -7] - z[-15];
|
||||
z[ -6] = z[ -6] + z[-14];
|
||||
z[ -7] = z[ -7] + z[-15];
|
||||
z[-14] = (k00+k11) * A2;
|
||||
z[-15] = (k00-k11) * A2;
|
||||
z[-12] = k11;
|
||||
z[-13] = -k00;
|
||||
z[-14] = (l11-l00) * A2;
|
||||
z[-15] = (l00+l11) * -A2;
|
||||
|
||||
iter_54(z);
|
||||
iter_54(z-8);
|
||||
|
Loading…
Reference in New Issue
Block a user