ref: c5f04cfbd4246dd98978db924fb596b2f73dcd9a
parent: 84ff16c0156e71de20af1cf149f26e3433213174
parent: 00a724076be885682ed7f5ae4e9009d834c0ad3b
author: volvet <[email protected]>
date: Fri Apr 25 15:05:12 EDT 2014
Merge pull request #750 from mstorsjo/deblocking-neon-cpu-features Check for WELS_CPU_NEON before calling DeblockingBSCalcEnc_neon
--- a/codec/encoder/core/inc/wels_func_ptr_def.h
+++ b/codec/encoder/core/inc/wels_func_ptr_def.h
@@ -94,6 +94,7 @@
int32_t iBeta, int8_t* iTc);
typedef void (*PChromaDeblockingEQ4Func) (uint8_t* iSampleCb, uint8_t* iSampleCr, int32_t iStride, int32_t iAlpha,
int32_t iBeta);
+typedef void (*PDeblockingBSCalc) (SWelsFuncPtrList* pFunc, SMB* pCurMb, uint8_t uiBS[2][4][4], Mb_Type uiCurMbType, int32_t iMbStride, int32_t iLeftFlag, int32_t iTopFlag);
typedef struct tagDeblockingFunc {
PLumaDeblockingLT4Func pfLumaDeblockingLT4Ver;
@@ -105,6 +106,8 @@
PChromaDeblockingEQ4Func pfChromaDeblockingEQ4Ver;
PChromaDeblockingLT4Func pfChromaDeblockingLT4Hor;
PChromaDeblockingEQ4Func pfChromaDeblockingEQ4Hor;
+
+ PDeblockingBSCalc pfDeblockingBSCalc;
} DeblockingFunc;
typedef void (*PSetNoneZeroCountZeroFunc) (int8_t* pNonZeroCount);
--- a/codec/encoder/core/src/deblocking.cpp
+++ b/codec/encoder/core/src/deblocking.cpp
@@ -583,6 +583,56 @@
FilteringEdgeChromaHV (pfDeblocking, pCurMb, pFilter);
}
+#if defined(HAVE_NEON) && defined(SINGLE_REF_FRAME)
+void DeblockingBSCalc_neon (SWelsFuncPtrList* pFunc, SMB* pCurMb, uint8_t uiBS[2][4][4], Mb_Type uiCurMbType,
+ int32_t iMbStride, int32_t iLeftFlag, int32_t iTopFlag) {
+ DeblockingBSCalcEnc_neon (pCurMb->pNonZeroCount, pCurMb->sMv, pCurMb->uiNeighborAvail, iMbStride, uiBS);
+ if (iLeftFlag) {
+ if (IS_INTRA ((pCurMb - 1)->uiMbType)) {
+ * (uint32_t*)uiBS[0][0] = 0x04040404;
+ }
+ } else {
+ * (uint32_t*)uiBS[0][0] = 0;
+ }
+ if (iTopFlag) {
+ if (IS_INTRA ((pCurMb - iMbStride)->uiMbType)) {
+ * (uint32_t*)uiBS[1][0] = 0x04040404;
+ }
+ } else {
+ * (uint32_t*)uiBS[1][0] = 0;
+ }
+}
+#endif
+
+void DeblockingBSCalc_c (SWelsFuncPtrList* pFunc, SMB* pCurMb, uint8_t uiBS[2][4][4], Mb_Type uiCurMbType,
+ int32_t iMbStride, int32_t iLeftFlag, int32_t iTopFlag) {
+ if (iLeftFlag) {
+ * (uint32_t*)uiBS[0][0] = IS_INTRA ((pCurMb - 1)->uiMbType) ? 0x04040404 : DeblockingBSMarginalMBAvcbase (pCurMb,
+ pCurMb - 1, 0);
+ } else {
+ * (uint32_t*)uiBS[0][0] = 0;
+ }
+ if (iTopFlag) {
+ * (uint32_t*)uiBS[1][0] = IS_INTRA ((pCurMb - iMbStride)->uiMbType) ? 0x04040404 : DeblockingBSMarginalMBAvcbase (
+ pCurMb, (pCurMb - iMbStride), 1);
+ } else {
+ * (uint32_t*)uiBS[1][0] = 0;
+ }
+ //SKIP MB_16x16 or others
+ if (uiCurMbType != MB_TYPE_SKIP) {
+ pFunc->pfSetNZCZero (pCurMb->pNonZeroCount); // set all none-zero nzc to 1; dbk can be opti!
+
+ if (uiCurMbType == MB_TYPE_16x16) {
+ DeblockingBSInsideMBAvsbase (pCurMb->pNonZeroCount, uiBS, 1);
+ } else {
+ DeblockingBSInsideMBNormal (pCurMb, uiBS, pCurMb->pNonZeroCount);
+ }
+ } else {
+ * (uint32_t*)uiBS[0][1] = * (uint32_t*)uiBS[0][2] = * (uint32_t*)uiBS[0][3] =
+ * (uint32_t*)uiBS[1][1] = * (uint32_t*)uiBS[1][2] = * (uint32_t*)uiBS[1][3] = 0;
+ }
+}
+
void DeblockingMbAvcbase (SWelsFuncPtrList* pFunc, SMB* pCurMb, SDeblockingFilter* pFilter) {
uint8_t uiBS[2][4][4] = {{{ 0 }}};
@@ -605,49 +655,7 @@
DeblockingIntraMb (&pFunc->pfDeblocking, pCurMb, pFilter);
break;
default:
-#if (defined(HAVE_NEON) && defined(SINGLE_REF_FRAME))
- DeblockingBSCalcEnc_neon(pCurMb->pNonZeroCount, pCurMb->sMv, pCurMb->uiNeighborAvail, iMbStride, uiBS);
- if (iLeftFlag){
- if (IS_INTRA((pCurMb-1)->uiMbType)) {
- *(uint32_t*)uiBS[0][0] = 0x04040404;
- }
- } else {
- *(uint32_t*)uiBS[0][0] = 0;
- }
- if (iTopFlag) {
- if (IS_INTRA((pCurMb-iMbStride)->uiMbType)) {
- *(uint32_t*)uiBS[1][0] = 0x04040404;
- }
- } else {
- *(uint32_t*)uiBS[1][0] = 0;
- }
-#else
- if (iLeftFlag) {
- * (uint32_t*)uiBS[0][0] = IS_INTRA ((pCurMb - 1)->uiMbType) ? 0x04040404 : DeblockingBSMarginalMBAvcbase (pCurMb,
- pCurMb - 1, 0);
- } else {
- * (uint32_t*)uiBS[0][0] = 0;
- }
- if (iTopFlag) {
- * (uint32_t*)uiBS[1][0] = IS_INTRA ((pCurMb - iMbStride)->uiMbType) ? 0x04040404 : DeblockingBSMarginalMBAvcbase (
- pCurMb, (pCurMb - iMbStride), 1);
- } else {
- * (uint32_t*)uiBS[1][0] = 0;
- }
- //SKIP MB_16x16 or others
- if (uiCurMbType != MB_TYPE_SKIP) {
- pFunc->pfSetNZCZero (pCurMb->pNonZeroCount); // set all none-zero nzc to 1; dbk can be opti!
-
- if (uiCurMbType == MB_TYPE_16x16) {
- DeblockingBSInsideMBAvsbase (pCurMb->pNonZeroCount, uiBS, 1);
- } else {
- DeblockingBSInsideMBNormal (pCurMb, uiBS, pCurMb->pNonZeroCount);
- }
- } else {
- * (uint32_t*)uiBS[0][1] = * (uint32_t*)uiBS[0][2] = * (uint32_t*)uiBS[0][3] =
- * (uint32_t*)uiBS[1][1] = * (uint32_t*)uiBS[1][2] = * (uint32_t*)uiBS[1][3] = 0;
- }
-#endif
+ pFunc->pfDeblocking.pfDeblockingBSCalc (pFunc, pCurMb, uiBS, uiCurMbType, iMbStride, iLeftFlag, iTopFlag);
DeblockingInterMb (&pFunc->pfDeblocking, pCurMb, pFilter, uiBS);
break;
}
@@ -803,7 +811,9 @@
pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_c;
pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_c;
+ pFunc->pfDeblockingBSCalc = DeblockingBSCalc_c;
+
#ifdef X86_ASM
if (iCpu & WELS_CPU_SSSE3) {
pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_ssse3;
@@ -828,6 +838,10 @@
pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_neon;
pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_neon;
pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_neon;
+
+#if defined(SINGLE_REF_FRAME)
+ pFunc->pfDeblockingBSCalc = DeblockingBSCalc_neon;
+#endif
}
#endif
}