Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions openh264-sys2/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ fn try_compile_nasm(target: &Target, cc_build_command: &mut Build, root: &str) {
let mut nasm_build = nasm_rs::Build::new();
let mut nasm_build = nasm_build.include(format!("upstream/codec/common/{}/", config.include_dir));
nasm_build = nasm_build.define(&config.asm_platform_define, None);
nasm_build = nasm_build.define("HAVE_AVX2", None);
if config.prefix_symbols {
nasm_build = nasm_build.define("PREFIX", None);
}
Expand All @@ -235,6 +236,7 @@ fn try_compile_nasm(target: &Target, cc_build_command: &mut Build, root: &str) {
// _RUN_ any build command on its own (we still invoked `nasm` above
// though).
cc_build_command.define(&config.cpp_define, None);
cc_build_command.define("HAVE_AVX2", None);

for object in &object_files {
cc_build_command.object(object);
Expand Down
5 changes: 5 additions & 0 deletions openh264-sys2/upstream/codec/common/inc/intra_pred_common.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/*!
* \copy
* Copyright (c) 2009-2013, Cisco Systems
* Copyright (c) 2026, Richard Ben Aleya
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -56,6 +57,10 @@ extern "C" {
//for intra-prediction ASM functions
void WelsI16x16LumaPredV_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
void WelsI16x16LumaPredH_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
#if defined(HAVE_AVX2)
void WelsI16x16LumaPredV_avx2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
void WelsI16x16LumaPredH_avx2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
#endif//HAVE_AVX2
#endif//X86_ASM

#if defined(HAVE_NEON)
Expand Down
13 changes: 13 additions & 0 deletions openh264-sys2/upstream/codec/common/inc/sad_common.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* Copyright (c) 2026, Richard Ben Aleya
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -73,6 +74,18 @@ void WelsSampleSadFour8x16_sse2 (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*)
void WelsSampleSadFour8x8_sse2 (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
void WelsSampleSadFour4x4_sse2 (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);

#if defined(HAVE_AVX2)
int32_t WelsSampleSad16x16_avx2 (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSad16x8_avx2 (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSad8x16_avx2 (uint8_t*, int32_t, uint8_t*, int32_t);
int32_t WelsSampleSad8x8_avx2 (uint8_t*, int32_t, uint8_t*, int32_t);

void WelsSampleSadFour16x16_avx2 (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
void WelsSampleSadFour16x8_avx2 (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
void WelsSampleSadFour8x16_avx2 (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
void WelsSampleSadFour8x8_avx2 (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*);
#endif

#endif//X86_ASM

#if defined (HAVE_NEON)
Expand Down
44 changes: 44 additions & 0 deletions openh264-sys2/upstream/codec/common/x86/intra_pred_com.asm
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
;*!
;* \copy
;* Copyright (c) 2009-2013, Cisco Systems
;* Copyright (c) 2026, Richard Ben Aleya
;* All rights reserved.
;*
;* Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -115,3 +116,46 @@ WELS_EXTERN WelsI16x16LumaPredV_sse2

ret

%ifdef HAVE_AVX2
;***********************************************************************
; void WelsI16x16LumaPredV_avx2(uint8_t *pred, uint8_t *pRef, int32_t stride);
;***********************************************************************
WELS_EXTERN WelsI16x16LumaPredV_avx2
%assign push_num 0
LOAD_3_PARA
SIGN_EXTENSION r2, r2d
sub r1, r2
vbroadcasti128 ymm0, [r1]
vmovdqu [r0], ymm0
vmovdqu [r0+32], ymm0
vmovdqu [r0+64], ymm0
vmovdqu [r0+96], ymm0
vmovdqu [r0+128], ymm0
vmovdqu [r0+160], ymm0
vmovdqu [r0+192], ymm0
vmovdqu [r0+224], ymm0
vzeroupper
ret

;***********************************************************************
; void WelsI16x16LumaPredH_avx2(uint8_t *pred, uint8_t *pRef, int32_t stride);
;***********************************************************************
WELS_EXTERN WelsI16x16LumaPredH_avx2
%assign push_num 0
LOAD_3_PARA
SIGN_EXTENSION r2, r2d
dec r1
%assign h_row 0
%rep 16
%if h_row > 0
add r1, r2
%endif
vpbroadcastb xmm0, [r1]
vmovdqa [r0 + h_row * 16], xmm0
%assign h_row h_row + 1
%endrep
vzeroupper
ret

%endif ; HAVE_AVX2

Loading