diff options
Diffstat (limited to 'src/viterbi_sse.c')
-rw-r--r-- | src/viterbi_sse.c | 129 |
1 files changed, 0 insertions, 129 deletions
diff --git a/src/viterbi_sse.c b/src/viterbi_sse.c deleted file mode 100644 index f3890850..00000000 --- a/src/viterbi_sse.c +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Intel SSE Viterbi decoder - * - * Copyright (C) 2013, 2014 Thomas Tsou <tom@tsou.cc> - * - * All Rights Reserved - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -#include <stdint.h> -#include "config.h" - -#include <emmintrin.h> -#include <tmmintrin.h> -#include <xmmintrin.h> - -#if defined(HAVE_SSE4_1) -#include <smmintrin.h> -#endif - -#define SSE_ALIGN 16 - -/* Broadcast 16-bit integer - * Repeat the low 16-bit integer to all elements of the 128-bit SSE - * register. Only AVX2 has a dedicated broadcast instruction; use repeat - * unpacks for SSE only architectures. This is a destructive operation and - * the source register is overwritten. - * - * Input: - * M0 - Low 16-bit element is read - * - * Output: - * M0 - Contains broadcasted values - */ -#define SSE_BROADCAST(M0) \ -{ \ - M0 = _mm_unpacklo_epi16(M0, M0); \ - M0 = _mm_unpacklo_epi32(M0, M0); \ - M0 = _mm_unpacklo_epi64(M0, M0); \ -} - -/** - * Include common SSE implementation - */ -#include <viterbi_sse_common.h> - -/* Aligned Memory Allocator - * SSE requires 16-byte memory alignment. We store relevant trellis values - * (accumulated sums, outputs, and path decisions) as 16 bit signed integers - * so the allocated memory is casted as such. - */ -__attribute__ ((visibility("hidden"))) -int16_t *osmo_conv_sse_vdec_malloc(size_t n) -{ - return (int16_t *) _mm_malloc(sizeof(int16_t) * n, SSE_ALIGN); -} - -__attribute__ ((visibility("hidden"))) -void osmo_conv_sse_vdec_free(int16_t *ptr) -{ - _mm_free(ptr); -} - -__attribute__ ((visibility("hidden"))) -void osmo_conv_sse_metrics_k5_n2(const int8_t *val, const int16_t *out, - int16_t *sums, int16_t *paths, int norm) -{ - const int16_t _val[4] = { val[0], val[1], val[0], val[1] }; - - _sse_metrics_k5_n2(_val, out, sums, paths, norm); -} - -__attribute__ ((visibility("hidden"))) -void osmo_conv_sse_metrics_k5_n3(const int8_t *val, const int16_t *out, - int16_t *sums, int16_t *paths, int norm) -{ - const int16_t _val[4] = { val[0], val[1], val[2], 0 }; - - _sse_metrics_k5_n4(_val, out, sums, paths, norm); -} - -__attribute__ ((visibility("hidden"))) -void osmo_conv_sse_metrics_k5_n4(const int8_t *val, const int16_t *out, - int16_t *sums, int16_t *paths, int norm) -{ - const int16_t _val[4] = { val[0], val[1], val[2], val[3] }; - - _sse_metrics_k5_n4(_val, out, sums, paths, norm); -} - -__attribute__ ((visibility("hidden"))) -void osmo_conv_sse_metrics_k7_n2(const int8_t *val, const int16_t *out, - int16_t *sums, int16_t *paths, int norm) -{ - const int16_t _val[4] = { val[0], val[1], val[0], val[1] }; - - _sse_metrics_k7_n2(_val, out, sums, paths, norm); -} - -__attribute__ ((visibility("hidden"))) -void osmo_conv_sse_metrics_k7_n3(const int8_t *val, const int16_t *out, - int16_t *sums, int16_t *paths, int norm) -{ - const int16_t _val[4] = { val[0], val[1], val[2], 0 }; - - _sse_metrics_k7_n4(_val, out, sums, paths, norm); -} - -__attribute__ ((visibility("hidden"))) -void osmo_conv_sse_metrics_k7_n4(const int8_t *val, const int16_t *out, - int16_t *sums, int16_t *paths, int norm) -{ - const int16_t _val[4] = { val[0], val[1], val[2], val[3] }; - - _sse_metrics_k7_n4(_val, out, sums, paths, norm); -} |