Skip to content

Commit c3465db

Browse files
Adjust loops handling
1 parent 55c5c70 commit c3465db

2 files changed

Lines changed: 20 additions & 48 deletions

File tree

src/layer/riscv/dequantize_riscv.cpp

Lines changed: 10 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ static void dequantize(const int* intptr, float* ptr, const Mat& scale_data, con
3434
#if __riscv_vector
3535
const size_t vlm1 = __riscv_vsetvlmax_e32m1();
3636
const size_t vlm2 = __riscv_vsetvlmax_e32m2();
37-
bool vectorize = true;
3837
vfloat32m8_t _scale;
3938
if (scale_data.w == 1)
4039
{
@@ -45,22 +44,17 @@ static void dequantize(const int* intptr, float* ptr, const Mat& scale_data, con
4544
vfloat32m1_t _s = __riscv_vle32_v_f32m1(scale_data, vlm1);
4645
_scale = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s);
4746
}
48-
else if (elempack == vlm2)
47+
else if (elempack == vlm2) // VLENB < Pack 8
4948
{
5049
vfloat32m2_t _s = __riscv_vle32_v_f32m2(scale_data, vlm2);
5150
_scale = __riscv_vcreate_v_f32m2_f32m8(_s, _s, _s, _s);
5251
}
53-
else
54-
{
55-
vectorize = false;
56-
}
5752
#endif // __riscv_vector
5853

5954
if (bias_data.w == 0)
6055
{
61-
int i = 0;
6256
#if __riscv_vector
63-
int n = vectorize ? size : 0;
57+
int n = size;
6458
while (n > 0)
6559
{
6660
size_t vl = __riscv_vsetvl_e32m8(n);
@@ -72,21 +66,18 @@ static void dequantize(const int* intptr, float* ptr, const Mat& scale_data, con
7266
ptr += vl;
7367
n -= vl;
7468
}
75-
76-
i += (size - n);
77-
#endif // __riscv_vector
78-
for (; i < size; i++)
69+
#else // __riscv_vector
70+
for (int i = 0; i < size; i++)
7971
{
8072
*ptr = *intptr * scale;
8173
intptr++;
8274
ptr++;
8375
}
76+
#endif // __riscv_vector
8477
}
8578
else
8679
{
8780
float bias = bias_data[0];
88-
89-
int i = 0;
9081
#if __riscv_vector
9182
vfloat32m8_t _bias;
9283
if (bias_data.w == 1)
@@ -98,17 +89,13 @@ static void dequantize(const int* intptr, float* ptr, const Mat& scale_data, con
9889
vfloat32m1_t _b = __riscv_vle32_v_f32m1(bias_data, vlm1);
9990
_bias = __riscv_vcreate_v_f32m1_f32m8(_b, _b, _b, _b, _b, _b, _b, _b);
10091
}
101-
else if (elempack == vlm2)
92+
else if (elempack == vlm2) // VLENB < Pack 8
10293
{
10394
vfloat32m2_t _b = __riscv_vle32_v_f32m2(bias_data, vlm2);
10495
_bias = __riscv_vcreate_v_f32m2_f32m8(_b, _b, _b, _b);
10596
}
106-
else
107-
{
108-
vectorize = false;
109-
}
11097

111-
int n = vectorize ? size : 0;
98+
int n = size;
11299
while (n > 0)
113100
{
114101
size_t vl = __riscv_vsetvl_e32m8(n);
@@ -120,15 +107,14 @@ static void dequantize(const int* intptr, float* ptr, const Mat& scale_data, con
120107
ptr += vl;
121108
n -= vl;
122109
}
123-
124-
i += (size - n);
125-
#endif // __riscv_vector
126-
for (; i < size; i++)
110+
#else // __riscv_vector
111+
for (int i = 0; i < size; i++)
127112
{
128113
*ptr = *intptr * scale + bias;
129114
intptr++;
130115
ptr++;
131116
}
117+
#endif // __riscv_vector
132118
}
133119
}
134120

src/layer/riscv/dequantize_riscv_zfh.cpp

Lines changed: 10 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ static void dequantize_fp16s(const int* intptr, __fp16* ptr, const Mat& scale_da
2020
#if __riscv_vector
2121
const size_t vlm1 = __riscv_vsetvlmax_e32m1();
2222
const size_t vlm2 = __riscv_vsetvlmax_e32m2();
23-
bool vectorize = true;
2423
vfloat32m8_t _scale;
2524
if (scale_data.w == 1)
2625
{
@@ -31,22 +30,17 @@ static void dequantize_fp16s(const int* intptr, __fp16* ptr, const Mat& scale_da
3130
vfloat32m1_t _s = __riscv_vle32_v_f32m1(scale_data, vlm1);
3231
_scale = __riscv_vcreate_v_f32m1_f32m8(_s, _s, _s, _s, _s, _s, _s, _s);
3332
}
34-
else if (elempack == vlm2)
33+
else if (elempack == vlm2) // VLENB < Pack 8
3534
{
3635
vfloat32m2_t _s = __riscv_vle32_v_f32m2(scale_data, vlm2);
3736
_scale = __riscv_vcreate_v_f32m2_f32m8(_s, _s, _s, _s);
3837
}
39-
else
40-
{
41-
vectorize = false;
42-
}
4338
#endif // __riscv_vector
4439

4540
if (bias_data.w == 0)
4641
{
47-
int i = 0;
4842
#if __riscv_vector
49-
int n = vectorize ? size : 0;
43+
int n = size;
5044
while (n > 0)
5145
{
5246
size_t vl = __riscv_vsetvl_e16m4(n);
@@ -58,21 +52,18 @@ static void dequantize_fp16s(const int* intptr, __fp16* ptr, const Mat& scale_da
5852
ptr += vl;
5953
n -= vl;
6054
}
61-
62-
i += (size - n);
63-
#endif // __riscv_vector
64-
for (; i < size; i++)
55+
#else // __riscv_vector
56+
for (int i = 0; i < size; i++)
6557
{
6658
*ptr = (__fp16)((float)*intptr * scale);
6759
intptr++;
6860
ptr++;
6961
}
62+
#endif // __riscv_vector
7063
}
7164
else
7265
{
7366
float bias = bias_data[0];
74-
75-
int i = 0;
7667
#if __riscv_vector
7768
vfloat32m8_t _bias;
7869
if (bias_data.w == 1)
@@ -84,17 +75,13 @@ static void dequantize_fp16s(const int* intptr, __fp16* ptr, const Mat& scale_da
8475
vfloat32m1_t _b = __riscv_vle32_v_f32m1(bias_data, vlm1);
8576
_bias = __riscv_vcreate_v_f32m1_f32m8(_b, _b, _b, _b, _b, _b, _b, _b);
8677
}
87-
else if (elempack == vlm2)
78+
else if (elempack == vlm2) // VLENB < Pack 8
8879
{
8980
vfloat32m2_t _b = __riscv_vle32_v_f32m2(bias_data, vlm2);
9081
_bias = __riscv_vcreate_v_f32m2_f32m8(_b, _b, _b, _b);
9182
}
92-
else
93-
{
94-
vectorize = false;
95-
}
9683

97-
int n = vectorize ? size : 0;
84+
int n = size;
9885
while (n > 0)
9986
{
10087
size_t vl = __riscv_vsetvl_e16m4(n);
@@ -106,15 +93,14 @@ static void dequantize_fp16s(const int* intptr, __fp16* ptr, const Mat& scale_da
10693
ptr += vl;
10794
n -= vl;
10895
}
109-
110-
i += (size - n);
111-
#endif // __riscv_vector
112-
for (; i < size; i++)
96+
#else // __riscv_vector
97+
for (int i = 0; i < size; i++)
11398
{
11499
*ptr = (__fp16)((float)*intptr * scale + bias);
115100
intptr++;
116101
ptr++;
117102
}
103+
#endif // __riscv_vector
118104
}
119105
}
120106

0 commit comments

Comments
 (0)