瀏覽代碼

copy: do not copy more than the cache width on cache

Thomas Guillem 7 年之前
父節點
當前提交
09d421a208
共有 1 個文件被更改,包括 8 次插入4 次删除
  1. 8 4
      modules/video_chroma/copy.c

+ 8 - 4
modules/video_chroma/copy.c

@@ -469,6 +469,7 @@ static void SSE_CopyPlane(uint8_t *dst, size_t dst_pitch,
     const size_t copy_pitch = __MIN(src_pitch, dst_pitch);
     const unsigned w16 = (copy_pitch+15) & ~15;
     const unsigned hstep = cache_size / w16;
+    const unsigned cache_width = __MIN(src_pitch, hstep);
     assert(hstep > 0);
 
     /* If SSE4.1: CopyFromUswc is faster than memcpy */
@@ -479,7 +480,7 @@ static void SSE_CopyPlane(uint8_t *dst, size_t dst_pitch,
         const unsigned hblock =  __MIN(hstep, height - y);
 
         /* Copy a bunch of line into our cache */
-        CopyFromUswc(cache, w16, src, src_pitch, src_pitch, hblock, bitshift);
+        CopyFromUswc(cache, w16, src, src_pitch, cache_width, hblock, bitshift);
 
         /* Copy from our cache to the destination */
         Copy2d(dst, dst_pitch, cache, w16, copy_pitch, hblock);
@@ -501,6 +502,8 @@ SSE_InterleavePlanes(uint8_t *dst, size_t dst_pitch,
     size_t copy_pitch = __MIN(dst_pitch / 2, srcu_pitch);
     unsigned int const  w16 = (srcu_pitch+15) & ~15;
     unsigned int const  hstep = (cache_size) / (2*w16);
+    const unsigned cacheu_width = __MIN(srcu_pitch, hstep);
+    const unsigned cachev_width = __MIN(srcv_pitch, hstep);
     assert(hstep > 0);
 
     for (unsigned int y = 0; y < height; y += hstep)
@@ -508,9 +511,9 @@ SSE_InterleavePlanes(uint8_t *dst, size_t dst_pitch,
         unsigned int const      hblock = __MIN(hstep, height - y);
 
         /* Copy a bunch of line into our cache */
-        CopyFromUswc(cache, w16, srcu, srcu_pitch, srcu_pitch, hblock, bitshift);
+        CopyFromUswc(cache, w16, srcu, srcu_pitch, cacheu_width, hblock, bitshift);
         CopyFromUswc(cache+w16*hblock, w16, srcv, srcv_pitch,
-                     srcv_pitch, hblock, bitshift);
+                     cachev_width, hblock, bitshift);
 
         /* Copy from our cache to the destination */
         SSE_InterleaveUV(dst, dst_pitch, cache, w16,
@@ -533,13 +536,14 @@ static void SSE_SplitPlanes(uint8_t *dstu, size_t dstu_pitch,
     size_t copy_pitch = __MIN(__MIN(src_pitch / 2, dstu_pitch), dstv_pitch);
     const unsigned w16 = (src_pitch+15) & ~15;
     const unsigned hstep = cache_size / w16;
+    const unsigned cache_width = __MIN(src_pitch, hstep);
     assert(hstep > 0);
 
     for (unsigned y = 0; y < height; y += hstep) {
         const unsigned hblock =  __MIN(hstep, height - y);
 
         /* Copy a bunch of line into our cache */
-        CopyFromUswc(cache, w16, src, src_pitch, src_pitch, hblock, bitshift);
+        CopyFromUswc(cache, w16, src, src_pitch, cache_width, hblock, bitshift);
 
         /* Copy from our cache to the destination */
         SSE_SplitUV(dstu, dstu_pitch, dstv, dstv_pitch,