#ifndef SSE2NEON_H
#define SSE2NEON_H
+#include <stdlib.h> // For aligned_malloc
+
// This header file provides a simple API translation layer
// between SSE intrinsics to their corresponding Arm/Aarch64 NEON versions
//
#define _mm_extract_pi16(a, imm) \
(int32_t) vget_lane_u16(vreinterpret_u16_m64(a), (imm))
-#ifndef __MM_MALLOC_H
-// Free aligned memory that was allocated with _mm_malloc.
-// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_free
-FORCE_INLINE void _mm_free(void *addr)
-{
- free(addr);
-}
-#endif
-
// Macro: Get the flush zero bits from the MXCSR control and status register.
// The flush zero may contain any of the following flags: _MM_FLUSH_ZERO_ON or
// _MM_FLUSH_ZERO_OFF
vcombine_s64(vld1_s64((const int64_t *) p), vdup_n_s64(0)));
}
-#ifndef __MM_MALLOC_H
-// Allocate aligned blocks of memory.
-// https://software.intel.com/en-us/
-// cpp-compiler-developer-guide-and-reference-allocating-and-freeing-aligned-memory-blocks
-FORCE_INLINE void *_mm_malloc(size_t size, size_t align)
-{
- void *ptr;
- if (align == 1)
- return malloc(size);
- if (align == 2 || (sizeof(void *) == 8 && align == 4))
- align = sizeof(void *);
- if (!posix_memalign(&ptr, align, size))
- return ptr;
- return NULL;
-}
-#endif
-
// Conditionally store 8-bit integer elements from a into memory using mask
// (elements are not stored when the highest bit is not set in the corresponding
// element) and a non-temporal memory hint.