summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordengjianbo <dengjianbo@loongson.cn>2023-08-28 10:08:35 +0800
committercaiyinyu <caiyinyu@loongson.cn>2023-08-29 10:35:38 +0800
commitf8664fe2155eb5ddc22272bac72ab26368735718 (patch)
treeaac9760625ff65d3f84561ff4ac30f63ae3e6f1b
parent3efa26749e4d28768558330353dc15c6f325ed4e (diff)
LoongArch: Add ifunc support for rawmemchr{aligned, lsx, lasx}
According to glibc rawmemchr microbenchmark, A few cases tested with char '\0' experience performance degradation due to the lasx and lsx versions don't handle the '\0' separately. Overall, rawmemchr-lasx implementation could reduce the runtime about 40%-80%, rawmemchr-lsx implementation could reduce the runtime about 40%-66%, rawmemchr-aligned implementation could reduce the runtime about 20%-40%.
-rw-r--r--sysdeps/loongarch/lp64/multiarch/Makefile3
-rw-r--r--sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c8
-rw-r--r--sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h40
-rw-r--r--sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S124
-rw-r--r--sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S82
-rw-r--r--sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S71
-rw-r--r--sysdeps/loongarch/lp64/multiarch/rawmemchr.c37
7 files changed, 365 insertions, 0 deletions
diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
index 5d7ae7ae73..64416b025a 100644
--- a/sysdeps/loongarch/lp64/multiarch/Makefile
+++ b/sysdeps/loongarch/lp64/multiarch/Makefile
@@ -21,5 +21,8 @@ sysdep_routines += \
memmove-unaligned \
memmove-lsx \
memmove-lasx \
+ rawmemchr-aligned \
+ rawmemchr-lsx \
+ rawmemchr-lasx \
# sysdep_routines
endif
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
index c8ba87bd81..3db9af1460 100644
--- a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
@@ -94,5 +94,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_aligned)
)
+ IFUNC_IMPL (i, name, rawmemchr,
+#if !defined __loongarch_soft_float
+ IFUNC_IMPL_ADD (array, i, rawmemchr, SUPPORT_LASX, __rawmemchr_lasx)
+ IFUNC_IMPL_ADD (array, i, rawmemchr, SUPPORT_LSX, __rawmemchr_lsx)
+#endif
+ IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_aligned)
+ )
+
return i;
}
diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h
new file mode 100644
index 0000000000..a7bb4cf9ea
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h
@@ -0,0 +1,40 @@
+/* Common definition for rawmemchr ifunc selections.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <ldsodefs.h>
+#include <ifunc-init.h>
+
+#if !defined __loongarch_soft_float
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
+#endif
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+#if !defined __loongarch_soft_float
+ if (SUPPORT_LASX)
+ return OPTIMIZE (lasx);
+ else if (SUPPORT_LSX)
+ return OPTIMIZE (lsx);
+ else
+#endif
+ return OPTIMIZE (aligned);
+}
diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S
new file mode 100644
index 0000000000..9c7155ae82
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S
@@ -0,0 +1,124 @@
+/* Optimized rawmemchr implementation using basic LoongArch instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc)
+# define RAWMEMCHR_NAME __rawmemchr_aligned
+#else
+# define RAWMEMCHR_NAME __rawmemchr
+#endif
+
+LEAF(RAWMEMCHR_NAME, 6)
+ andi t1, a0, 0x7
+ bstrins.d a0, zero, 2, 0
+ lu12i.w a2, 0x01010
+ bstrins.d a1, a1, 15, 8
+
+ ld.d t0, a0, 0
+ slli.d t1, t1, 3
+ ori a2, a2, 0x101
+ bstrins.d a1, a1, 31, 16
+
+ li.w t8, -1
+ bstrins.d a1, a1, 63, 32
+ bstrins.d a2, a2, 63, 32
+ sll.d t2, t8, t1
+
+ sll.d t3, a1, t1
+ orn t0, t0, t2
+ slli.d a3, a2, 7
+ beqz a1, L(find_zero)
+
+ xor t0, t0, t3
+ sub.d t1, t0, a2
+ andn t2, a3, t0
+ and t3, t1, t2
+
+ bnez t3, L(count_pos)
+ addi.d a0, a0, 8
+
+L(loop):
+ ld.d t0, a0, 0
+ xor t0, t0, a1
+
+ sub.d t1, t0, a2
+ andn t2, a3, t0
+ and t3, t1, t2
+ bnez t3, L(count_pos)
+
+ ld.d t0, a0, 8
+ addi.d a0, a0, 16
+ xor t0, t0, a1
+ sub.d t1, t0, a2
+
+ andn t2, a3, t0
+ and t3, t1, t2
+ beqz t3, L(loop)
+ addi.d a0, a0, -8
+L(count_pos):
+ ctz.d t0, t3
+ srli.d t0, t0, 3
+ add.d a0, a0, t0
+ jr ra
+
+L(loop_7bit):
+ ld.d t0, a0, 0
+L(find_zero):
+ sub.d t1, t0, a2
+ and t2, t1, a3
+ bnez t2, L(more_check)
+
+ ld.d t0, a0, 8
+ addi.d a0, a0, 16
+ sub.d t1, t0, a2
+ and t2, t1, a3
+
+ beqz t2, L(loop_7bit)
+ addi.d a0, a0, -8
+
+L(more_check):
+ andn t2, a3, t0
+ and t3, t1, t2
+ bnez t3, L(count_pos)
+ addi.d a0, a0, 8
+
+L(loop_8bit):
+ ld.d t0, a0, 0
+
+ sub.d t1, t0, a2
+ andn t2, a3, t0
+ and t3, t1, t2
+ bnez t3, L(count_pos)
+
+ ld.d t0, a0, 8
+ addi.d a0, a0, 16
+ sub.d t1, t0, a2
+
+ andn t2, a3, t0
+ and t3, t1, t2
+ beqz t3, L(loop_8bit)
+
+ addi.d a0, a0, -8
+ b L(count_pos)
+
+END(RAWMEMCHR_NAME)
+
+libc_hidden_builtin_def (__rawmemchr)
diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S
new file mode 100644
index 0000000000..be2eb59dbe
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S
@@ -0,0 +1,82 @@
+/* Optimized rawmemchr implementation using LoongArch LASX instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+#include <sys/regdef.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define RAWMEMCHR __rawmemchr_lasx
+
+LEAF(RAWMEMCHR, 6)
+ move a2, a0
+ bstrins.d a0, zero, 5, 0
+ xvld xr0, a0, 0
+ xvld xr1, a0, 32
+
+ xvreplgr2vr.b xr2, a1
+ xvseq.b xr0, xr0, xr2
+ xvseq.b xr1, xr1, xr2
+ xvmsknz.b xr0, xr0
+
+ xvmsknz.b xr1, xr1
+ xvpickve.w xr3, xr0, 4
+ xvpickve.w xr4, xr1, 4
+ vilvl.h vr0, vr3, vr0
+
+ vilvl.h vr1, vr4, vr1
+ vilvl.w vr0, vr1, vr0
+ movfr2gr.d t0, fa0
+ sra.d t0, t0, a2
+
+
+ beqz t0, L(loop)
+ ctz.d t0, t0
+ add.d a0, a2, t0
+ jr ra
+
+L(loop):
+ xvld xr0, a0, 64
+ xvld xr1, a0, 96
+ addi.d a0, a0, 64
+ xvseq.b xr0, xr0, xr2
+
+ xvseq.b xr1, xr1, xr2
+ xvmax.bu xr3, xr0, xr1
+ xvseteqz.v fcc0, xr3
+ bcnez fcc0, L(loop)
+
+ xvmsknz.b xr0, xr0
+ xvmsknz.b xr1, xr1
+ xvpickve.w xr3, xr0, 4
+ xvpickve.w xr4, xr1, 4
+
+
+ vilvl.h vr0, vr3, vr0
+ vilvl.h vr1, vr4, vr1
+ vilvl.w vr0, vr1, vr0
+ movfr2gr.d t0, fa0
+
+ ctz.d t0, t0
+ add.d a0, a0, t0
+ jr ra
+END(RAWMEMCHR)
+
+libc_hidden_builtin_def (RAWMEMCHR)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S
new file mode 100644
index 0000000000..2f6fe024dc
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S
@@ -0,0 +1,71 @@
+/* Optimized rawmemchr implementation using LoongArch LSX instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define RAWMEMCHR __rawmemchr_lsx
+
+LEAF(RAWMEMCHR, 6)
+ move a2, a0
+ bstrins.d a0, zero, 4, 0
+ vld vr0, a0, 0
+ vld vr1, a0, 16
+
+ vreplgr2vr.b vr2, a1
+ vseq.b vr0, vr0, vr2
+ vseq.b vr1, vr1, vr2
+ vmsknz.b vr0, vr0
+
+ vmsknz.b vr1, vr1
+ vilvl.h vr0, vr1, vr0
+ movfr2gr.s t0, fa0
+ sra.w t0, t0, a2
+
+ beqz t0, L(loop)
+ ctz.w t0, t0
+ add.d a0, a2, t0
+ jr ra
+
+
+L(loop):
+ vld vr0, a0, 32
+ vld vr1, a0, 48
+ addi.d a0, a0, 32
+ vseq.b vr0, vr0, vr2
+
+ vseq.b vr1, vr1, vr2
+ vmax.bu vr3, vr0, vr1
+ vseteqz.v fcc0, vr3
+ bcnez fcc0, L(loop)
+
+ vmsknz.b vr0, vr0
+ vmsknz.b vr1, vr1
+ vilvl.h vr0, vr1, vr0
+ movfr2gr.s t0, fa0
+
+ ctz.w t0, t0
+ add.d a0, a0, t0
+ jr ra
+END(RAWMEMCHR)
+
+libc_hidden_builtin_def (RAWMEMCHR)
+#endif
diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr.c b/sysdeps/loongarch/lp64/multiarch/rawmemchr.c
new file mode 100644
index 0000000000..89c7ffff8f
--- /dev/null
+++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr.c
@@ -0,0 +1,37 @@
+/* Multiple versions of rawmemchr.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+# define rawmemchr __redirect_rawmemchr
+# define __rawmemchr __redirect___rawmemchr
+# include <string.h>
+# undef rawmemchr
+# undef __rawmemchr
+
+# define SYMBOL_NAME rawmemchr
+# include "ifunc-rawmemchr.h"
+
+libc_ifunc_redirected (__redirect_rawmemchr, __rawmemchr,
+ IFUNC_SELECTOR ());
+weak_alias (__rawmemchr, rawmemchr)
+# ifdef SHARED
+__hidden_ver1 (__rawmemchr, __GI___rawmemchr, __redirect___rawmemchr)
+ __attribute__((visibility ("hidden")));
+# endif
+#endif