Bläddra i källkod

Pre-calculate start offset, optimize bound check

Increased performance by 30% for RGBA and 45% for Gray images, minor
performance increase for 16-bit images.

The start offset calculated by createWeights are stored in a slice and
passed to the resize functions to prevent duplication of effort.
Charlie Vieth 12 år sedan
förälder
incheckning
80b3fc2b3f
3 ändrade filer med 146 tillägg och 125 borttagningar
  1. 112 93
      converter.go
  2. 10 8
      filters.go
  3. 24 24
      resize.go

+ 112 - 93
converter.go

@@ -43,32 +43,35 @@ func clampUint16(in int64) uint16 {
 	return uint16(in)
 }
 
-func resizeGeneric(in image.Image, out *image.RGBA64, scale float64, coeffs []int32, filterLength int) {
+func resizeGeneric(in image.Image, out *image.RGBA64, scale float64, coeffs []int32, offset []int, filterLength int) {
 	oldBounds := in.Bounds()
 	newBounds := out.Bounds()
 
 	for x := newBounds.Min.X; x < newBounds.Max.X; x++ {
 		for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ {
-			interpX := scale*(float64(y)+0.5) + float64(oldBounds.Min.X)
-			start := int(interpX) - filterLength/2 + 1
-
 			var rgba [4]int64
 			var sum int64
+			start := offset[y]
+			ci := (y - newBounds.Min.Y) * filterLength
 			for i := 0; i < filterLength; i++ {
-				xx := start + i
-				if xx < oldBounds.Min.X {
-					xx = oldBounds.Min.X
-				} else if xx >= oldBounds.Max.X {
-					xx = oldBounds.Max.X - 1
+				coeff := coeffs[ci+i]
+				if coeff != 0 {
+					xi := start + i
+					switch {
+					case uint(xi) < uint(oldBounds.Max.X):
+						break
+					case xi >= oldBounds.Max.X:
+						xi = oldBounds.Min.X
+					default:
+						xi = oldBounds.Max.X - 1
+					}
+					r, g, b, a := in.At(xi, x).RGBA()
+					rgba[0] += int64(coeff) * int64(r)
+					rgba[1] += int64(coeff) * int64(g)
+					rgba[2] += int64(coeff) * int64(b)
+					rgba[3] += int64(coeff) * int64(a)
+					sum += int64(coeff)
 				}
-
-				coeff := coeffs[(y-newBounds.Min.Y)*filterLength+i]
-				r, g, b, a := in.At(xx, x).RGBA()
-				rgba[0] += int64(coeff) * int64(r)
-				rgba[1] += int64(coeff) * int64(g)
-				rgba[2] += int64(coeff) * int64(b)
-				rgba[3] += int64(coeff) * int64(a)
-				sum += int64(coeff)
 			}
 
 			offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8
@@ -88,114 +91,126 @@ func resizeGeneric(in image.Image, out *image.RGBA64, scale float64, coeffs []in
 	}
 }
 
-func resizeRGBA(in *image.RGBA, out *image.RGBA, scale float64, coeffs []int16, filterLength int) {
+func resizeRGBA(in *image.RGBA, out *image.RGBA, scale float64, coeffs []int16, offset []int, filterLength int) {
 	oldBounds := in.Bounds()
 	newBounds := out.Bounds()
+	minX := oldBounds.Min.X * 4
+	maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) * 4
 
 	for x := newBounds.Min.X; x < newBounds.Max.X; x++ {
 		row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:]
 		for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ {
-			interpX := scale*(float64(y)+0.5) + float64(oldBounds.Min.X)
-			start := int(interpX) - filterLength/2 + 1
-
 			var rgba [4]int32
 			var sum int32
+			start := offset[y]
+			ci := (y - newBounds.Min.Y) * filterLength
 			for i := 0; i < filterLength; i++ {
-				xx := start + i
-				if xx < oldBounds.Min.X {
-					xx = oldBounds.Min.X
-				} else if xx >= oldBounds.Max.X {
-					xx = oldBounds.Max.X - 1
+				coeff := coeffs[ci+i]
+				if coeff != 0 {
+					xi := start + i
+					switch {
+					case uint(xi) < uint(oldBounds.Max.X):
+						xi *= 4
+					case xi >= oldBounds.Max.X:
+						xi = maxX
+					default:
+						xi = minX
+					}
+					rgba[0] += int32(coeff) * int32(row[xi+0])
+					rgba[1] += int32(coeff) * int32(row[xi+1])
+					rgba[2] += int32(coeff) * int32(row[xi+2])
+					rgba[3] += int32(coeff) * int32(row[xi+3])
+					sum += int32(coeff)
 				}
-
-				coeff := coeffs[(y-newBounds.Min.Y)*filterLength+i]
-				offset := (xx - oldBounds.Min.X) * 4
-				rgba[0] += int32(coeff) * int32(row[offset+0])
-				rgba[1] += int32(coeff) * int32(row[offset+1])
-				rgba[2] += int32(coeff) * int32(row[offset+2])
-				rgba[3] += int32(coeff) * int32(row[offset+3])
-				sum += int32(coeff)
 			}
 
-			offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*4
-			out.Pix[offset+0] = clampUint8(rgba[0] / sum)
-			out.Pix[offset+1] = clampUint8(rgba[1] / sum)
-			out.Pix[offset+2] = clampUint8(rgba[2] / sum)
-			out.Pix[offset+3] = clampUint8(rgba[3] / sum)
+			xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*4
+			out.Pix[xo+0] = clampUint8(rgba[0] / sum)
+			out.Pix[xo+1] = clampUint8(rgba[1] / sum)
+			out.Pix[xo+2] = clampUint8(rgba[2] / sum)
+			out.Pix[xo+3] = clampUint8(rgba[3] / sum)
 		}
 	}
 }
 
-func resizeRGBA64(in *image.RGBA64, out *image.RGBA64, scale float64, coeffs []int32, filterLength int) {
+func resizeRGBA64(in *image.RGBA64, out *image.RGBA64, scale float64, coeffs []int32, offset []int, filterLength int) {
 	oldBounds := in.Bounds()
 	newBounds := out.Bounds()
+	minX := oldBounds.Min.X * 8
+	maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) * 8
 
 	for x := newBounds.Min.X; x < newBounds.Max.X; x++ {
 		row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:]
 		for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ {
-			interpX := scale*(float64(y)+0.5) + float64(oldBounds.Min.X)
-			start := int(interpX) - filterLength/2 + 1
-
 			var rgba [4]int64
 			var sum int64
+			start := offset[y]
+			ci := (y - newBounds.Min.Y) * filterLength
 			for i := 0; i < filterLength; i++ {
-				xx := start + i
-				if xx < oldBounds.Min.X {
-					xx = oldBounds.Min.X
-				} else if xx >= oldBounds.Max.X {
-					xx = oldBounds.Max.X - 1
+				coeff := coeffs[ci+i]
+				if coeff != 0 {
+					xi := start + i
+					switch {
+					case uint(xi) < uint(oldBounds.Max.X):
+						xi *= 8
+					case xi >= oldBounds.Max.X:
+						xi = maxX
+					default:
+						xi = minX
+					}
+					rgba[0] += int64(coeff) * int64(uint16(row[xi+0])<<8|uint16(row[xi+1]))
+					rgba[1] += int64(coeff) * int64(uint16(row[xi+2])<<8|uint16(row[xi+3]))
+					rgba[2] += int64(coeff) * int64(uint16(row[xi+4])<<8|uint16(row[xi+5]))
+					rgba[3] += int64(coeff) * int64(uint16(row[xi+6])<<8|uint16(row[xi+7]))
+					sum += int64(coeff)
 				}
-
-				coeff := coeffs[(y-newBounds.Min.Y)*filterLength+i]
-				offset := (xx - oldBounds.Min.X) * 8
-				rgba[0] += int64(coeff) * int64(uint16(row[offset+0])<<8|uint16(row[offset+1]))
-				rgba[1] += int64(coeff) * int64(uint16(row[offset+2])<<8|uint16(row[offset+3]))
-				rgba[2] += int64(coeff) * int64(uint16(row[offset+4])<<8|uint16(row[offset+5]))
-				rgba[3] += int64(coeff) * int64(uint16(row[offset+6])<<8|uint16(row[offset+7]))
-				sum += int64(coeff)
 			}
 
-			offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8
+			xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8
 			value := clampUint16(rgba[0] / sum)
-			out.Pix[offset+0] = uint8(value >> 8)
-			out.Pix[offset+1] = uint8(value)
+			out.Pix[xo+0] = uint8(value >> 8)
+			out.Pix[xo+1] = uint8(value)
 			value = clampUint16(rgba[1] / sum)
-			out.Pix[offset+2] = uint8(value >> 8)
-			out.Pix[offset+3] = uint8(value)
+			out.Pix[xo+2] = uint8(value >> 8)
+			out.Pix[xo+3] = uint8(value)
 			value = clampUint16(rgba[2] / sum)
-			out.Pix[offset+4] = uint8(value >> 8)
-			out.Pix[offset+5] = uint8(value)
+			out.Pix[xo+4] = uint8(value >> 8)
+			out.Pix[xo+5] = uint8(value)
 			value = clampUint16(rgba[3] / sum)
-			out.Pix[offset+6] = uint8(value >> 8)
-			out.Pix[offset+7] = uint8(value)
+			out.Pix[xo+6] = uint8(value >> 8)
+			out.Pix[xo+7] = uint8(value)
 		}
 	}
 }
 
-func resizeGray(in *image.Gray, out *image.Gray, scale float64, coeffs []int16, filterLength int) {
+func resizeGray(in *image.Gray, out *image.Gray, scale float64, coeffs []int16, offset []int, filterLength int) {
 	oldBounds := in.Bounds()
 	newBounds := out.Bounds()
+	minX := oldBounds.Min.X
+	maxX := (oldBounds.Max.X - oldBounds.Min.X - 1)
 
 	for x := newBounds.Min.X; x < newBounds.Max.X; x++ {
 		row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:]
 		for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ {
-			interpX := scale*(float64(y)+0.5) + float64(oldBounds.Min.X)
-			start := int(interpX) - filterLength/2 + 1
-
 			var gray int32
 			var sum int32
+			start := offset[y]
+			ci := (y - newBounds.Min.Y) * filterLength
 			for i := 0; i < filterLength; i++ {
-				xx := start + i
-				if xx < oldBounds.Min.X {
-					xx = oldBounds.Min.X
-				} else if xx >= oldBounds.Max.X {
-					xx = oldBounds.Max.X - 1
+				coeff := coeffs[ci+i]
+				if coeff != 0 {
+					xi := start + i
+					switch {
+					case uint(xi) < uint(oldBounds.Max.X):
+						break
+					case xi >= oldBounds.Max.X:
+						xi = maxX
+					default:
+						xi = minX
+					}
+					gray += int32(coeff) * int32(row[xi])
+					sum += int32(coeff)
 				}
-
-				coeff := coeffs[(y-newBounds.Min.Y)*filterLength+i]
-				offset := (xx - oldBounds.Min.X)
-				gray += int32(coeff) * int32(row[offset])
-				sum += int32(coeff)
 			}
 
 			offset := (y-newBounds.Min.Y)*out.Stride + (x - newBounds.Min.X)
@@ -204,30 +219,34 @@ func resizeGray(in *image.Gray, out *image.Gray, scale float64, coeffs []int16,
 	}
 }
 
-func resizeGray16(in *image.Gray16, out *image.Gray16, scale float64, coeffs []int32, filterLength int) {
+func resizeGray16(in *image.Gray16, out *image.Gray16, scale float64, coeffs []int32, offset []int, filterLength int) {
 	oldBounds := in.Bounds()
 	newBounds := out.Bounds()
+	minX := oldBounds.Min.X * 2
+	maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) * 2
 
 	for x := newBounds.Min.X; x < newBounds.Max.X; x++ {
 		row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:]
 		for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ {
-			interpX := scale*(float64(y)+0.5) + float64(oldBounds.Min.X)
-			start := int(interpX) - filterLength/2 + 1
-
 			var gray int64
 			var sum int64
+			start := offset[y]
+			ci := (y - newBounds.Min.Y) * filterLength
 			for i := 0; i < filterLength; i++ {
-				xx := start + i
-				if xx < oldBounds.Min.X {
-					xx = oldBounds.Min.X
-				} else if xx >= oldBounds.Max.X {
-					xx = oldBounds.Max.X - 1
+				coeff := coeffs[ci+i]
+				if coeff != 0 {
+					xi := start + i
+					switch {
+					case uint(xi) < uint(oldBounds.Max.X):
+						xi *= 2
+					case xi >= oldBounds.Max.X:
+						xi = maxX
+					default:
+						xi = minX
+					}
+					gray += int64(coeff) * int64(uint16(row[xi+0])<<8|uint16(row[xi+1]))
+					sum += int64(coeff)
 				}
-
-				coeff := coeffs[(y-newBounds.Min.Y)*filterLength+i]
-				offset := (xx - oldBounds.Min.X) * 2
-				gray += int64(coeff) * int64(uint16(row[offset+0])<<8|uint16(row[offset+1]))
-				sum += int64(coeff)
 			}
 
 			offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*2

+ 10 - 8
filters.go

@@ -80,37 +80,39 @@ func lanczos3(in float64) float64 {
 }
 
 // range [-256,256]
-func createWeights8(dy, minx, filterLength int, blur, scale float64, kernel func(float64) float64) ([]int16, int) {
+func createWeights8(dy, minx, filterLength int, blur, scale float64, kernel func(float64) float64) ([]int16, []int, int) {
 	filterLength = filterLength * int(math.Max(math.Ceil(blur*scale), 1))
 	filterFactor := math.Min(1./(blur*scale), 1)
 
 	coeffs := make([]int16, dy*filterLength)
+	start := make([]int, dy)
 	for y := 0; y < dy; y++ {
 		interpX := scale*(float64(y)+0.5) + float64(minx)
-		start := int(interpX) - filterLength/2 + 1
+		start[y] = int(interpX) - filterLength/2 + 1
 		for i := 0; i < filterLength; i++ {
-			in := (interpX - float64(start) - float64(i)) * filterFactor
+			in := (interpX - float64(start[y]) - float64(i)) * filterFactor
 			coeffs[y*filterLength+i] = int16(kernel(in) * 256)
 		}
 	}
 
-	return coeffs, filterLength
+	return coeffs, start, filterLength
 }
 
 // range [-65536,65536]
-func createWeights16(dy, minx, filterLength int, blur, scale float64, kernel func(float64) float64) ([]int32, int) {
+func createWeights16(dy, minx, filterLength int, blur, scale float64, kernel func(float64) float64) ([]int32, []int, int) {
 	filterLength = filterLength * int(math.Max(math.Ceil(blur*scale), 1))
 	filterFactor := math.Min(1./(blur*scale), 1)
 
 	coeffs := make([]int32, dy*filterLength)
+	start := make([]int, dy)
 	for y := 0; y < dy; y++ {
 		interpX := scale*(float64(y)+0.5) + float64(minx)
-		start := int(interpX) - filterLength/2 + 1
+		start[y] = int(interpX) - filterLength/2 + 1
 		for i := 0; i < filterLength; i++ {
-			in := (interpX - float64(start) - float64(i)) * filterFactor
+			in := (interpX - float64(start[y]) - float64(i)) * filterFactor
 			coeffs[y*filterLength+i] = int32(kernel(in) * 65536)
 		}
 	}
 
-	return coeffs, filterLength
+	return coeffs, start, filterLength
 }

+ 24 - 24
resize.go

@@ -95,25 +95,25 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i
 		result := image.NewRGBA(image.Rect(0, 0, int(width), int(height)))
 
 		// horizontal filter, results in transposed temporary image
-		coeffs, filterLength := createWeights8(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel)
+		coeffs, offset, filterLength := createWeights8(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel)
 		wg.Add(cpus)
 		for i := 0; i < cpus; i++ {
 			slice := makeSlice(temp, i, cpus).(*image.RGBA)
 			go func() {
 				defer wg.Done()
-				resizeRGBA(input, slice, scaleX, coeffs, filterLength)
+				resizeRGBA(input, slice, scaleX, coeffs, offset, filterLength)
 			}()
 		}
 		wg.Wait()
 
 		// horizontal filter on transposed image, result is not transposed
-		coeffs, filterLength = createWeights8(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel)
+		coeffs, offset, filterLength = createWeights8(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel)
 		wg.Add(cpus)
 		for i := 0; i < cpus; i++ {
 			slice := makeSlice(result, i, cpus).(*image.RGBA)
 			go func() {
 				defer wg.Done()
-				resizeRGBA(temp, slice, scaleY, coeffs, filterLength)
+				resizeRGBA(temp, slice, scaleY, coeffs, offset, filterLength)
 			}()
 		}
 		wg.Wait()
@@ -127,25 +127,25 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i
 		result := image.NewRGBA(image.Rect(0, 0, int(width), int(height)))
 
 		// horizontal filter, results in transposed temporary image
-		coeffs, filterLength := createWeights8(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel)
+		coeffs, offset, filterLength := createWeights8(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel)
 		wg.Add(cpus)
 		for i := 0; i < cpus; i++ {
 			slice := makeSlice(temp, i, cpus).(*image.RGBA)
 			go func() {
 				defer wg.Done()
-				resizeRGBA(inputAsRGBA, slice, scaleX, coeffs, filterLength)
+				resizeRGBA(inputAsRGBA, slice, scaleX, coeffs, offset, filterLength)
 			}()
 		}
 		wg.Wait()
 
 		// horizontal filter on transposed image, result is not transposed
-		coeffs, filterLength = createWeights8(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel)
+		coeffs, offset, filterLength = createWeights8(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel)
 		wg.Add(cpus)
 		for i := 0; i < cpus; i++ {
 			slice := makeSlice(result, i, cpus).(*image.RGBA)
 			go func() {
 				defer wg.Done()
-				resizeRGBA(temp, slice, scaleY, coeffs, filterLength)
+				resizeRGBA(temp, slice, scaleY, coeffs, offset, filterLength)
 			}()
 		}
 		wg.Wait()
@@ -156,25 +156,25 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i
 		result := image.NewRGBA64(image.Rect(0, 0, int(width), int(height)))
 
 		// horizontal filter, results in transposed temporary image
-		coeffs, filterLength := createWeights16(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel)
+		coeffs, offset, filterLength := createWeights16(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel)
 		wg.Add(cpus)
 		for i := 0; i < cpus; i++ {
 			slice := makeSlice(temp, i, cpus).(*image.RGBA64)
 			go func() {
 				defer wg.Done()
-				resizeRGBA64(input, slice, scaleX, coeffs, filterLength)
+				resizeRGBA64(input, slice, scaleX, coeffs, offset, filterLength)
 			}()
 		}
 		wg.Wait()
 
 		// horizontal filter on transposed image, result is not transposed
-		coeffs, filterLength = createWeights16(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel)
+		coeffs, offset, filterLength = createWeights16(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel)
 		wg.Add(cpus)
 		for i := 0; i < cpus; i++ {
 			slice := makeSlice(result, i, cpus).(*image.RGBA64)
 			go func() {
 				defer wg.Done()
-				resizeGeneric(temp, slice, scaleY, coeffs, filterLength)
+				resizeGeneric(temp, slice, scaleY, coeffs, offset, filterLength)
 			}()
 		}
 		wg.Wait()
@@ -185,25 +185,25 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i
 		result := image.NewGray(image.Rect(0, 0, int(width), int(height)))
 
 		// horizontal filter, results in transposed temporary image
-		coeffs, filterLength := createWeights8(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel)
+		coeffs, offset, filterLength := createWeights8(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel)
 		wg.Add(cpus)
 		for i := 0; i < cpus; i++ {
 			slice := makeSlice(temp, i, cpus).(*image.Gray)
 			go func() {
 				defer wg.Done()
-				resizeGray(input, slice, scaleX, coeffs, filterLength)
+				resizeGray(input, slice, scaleX, coeffs, offset, filterLength)
 			}()
 		}
 		wg.Wait()
 
 		// horizontal filter on transposed image, result is not transposed
-		coeffs, filterLength = createWeights8(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel)
+		coeffs, offset, filterLength = createWeights8(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel)
 		wg.Add(cpus)
 		for i := 0; i < cpus; i++ {
 			slice := makeSlice(result, i, cpus).(*image.Gray)
 			go func() {
 				defer wg.Done()
-				resizeGray(temp, slice, scaleY, coeffs, filterLength)
+				resizeGray(temp, slice, scaleY, coeffs, offset, filterLength)
 			}()
 		}
 		wg.Wait()
@@ -214,25 +214,25 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i
 		result := image.NewGray16(image.Rect(0, 0, int(width), int(height)))
 
 		// horizontal filter, results in transposed temporary image
-		coeffs, filterLength := createWeights16(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel)
+		coeffs, offset, filterLength := createWeights16(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel)
 		wg.Add(cpus)
 		for i := 0; i < cpus; i++ {
 			slice := makeSlice(temp, i, cpus).(*image.Gray16)
 			go func() {
 				defer wg.Done()
-				resizeGray16(input, slice, scaleX, coeffs, filterLength)
+				resizeGray16(input, slice, scaleX, coeffs, offset, filterLength)
 			}()
 		}
 		wg.Wait()
 
 		// horizontal filter on transposed image, result is not transposed
-		coeffs, filterLength = createWeights16(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel)
+		coeffs, offset, filterLength = createWeights16(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel)
 		wg.Add(cpus)
 		for i := 0; i < cpus; i++ {
 			slice := makeSlice(result, i, cpus).(*image.Gray16)
 			go func() {
 				defer wg.Done()
-				resizeGray16(temp, slice, scaleY, coeffs, filterLength)
+				resizeGray16(temp, slice, scaleY, coeffs, offset, filterLength)
 			}()
 		}
 		wg.Wait()
@@ -243,25 +243,25 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i
 		result := image.NewRGBA64(image.Rect(0, 0, int(width), int(height)))
 
 		// horizontal filter, results in transposed temporary image
-		coeffs, filterLength := createWeights16(temp.Bounds().Dy(), img.Bounds().Min.X, taps, blur, scaleX, kernel)
+		coeffs, offset, filterLength := createWeights16(temp.Bounds().Dy(), img.Bounds().Min.X, taps, blur, scaleX, kernel)
 		wg.Add(cpus)
 		for i := 0; i < cpus; i++ {
 			slice := makeSlice(temp, i, cpus).(*image.RGBA64)
 			go func() {
 				defer wg.Done()
-				resizeGeneric(img, slice, scaleX, coeffs, filterLength)
+				resizeGeneric(img, slice, scaleX, coeffs, offset, filterLength)
 			}()
 		}
 		wg.Wait()
 
 		// horizontal filter on transposed image, result is not transposed
-		coeffs, filterLength = createWeights16(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel)
+		coeffs, offset, filterLength = createWeights16(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel)
 		wg.Add(cpus)
 		for i := 0; i < cpus; i++ {
 			slice := makeSlice(result, i, cpus).(*image.RGBA64)
 			go func() {
 				defer wg.Done()
-				resizeRGBA64(temp, slice, scaleY, coeffs, filterLength)
+				resizeRGBA64(temp, slice, scaleY, coeffs, offset, filterLength)
 			}()
 		}
 		wg.Wait()