Added missing call to previous layer in SIMD Zoom

This commit is contained in:
Cubitect 2018-04-25 00:36:45 +01:00
parent 21599a1761
commit bcb58c935f
3 changed files with 18 additions and 12 deletions

View File

@ -162,11 +162,14 @@ void mapIsland(Layer *l, int * __restrict out, int areaX, int areaZ, int areaWid
}
}
#ifdef __AVX2__
#if defined USE_SIMD && defined __AVX2__
void mapZoom(Layer *l, int* __restrict out, int areaX, int areaZ, int areaWidth, int areaHeight) {
int pWidth = (areaWidth>>1)+2, pHeight = (areaHeight>>1)+1;
l->p->getMap(l->p, out, areaX>>1, areaZ>>1, pWidth, pHeight+1);
__m256i (*selectRand)(__m256i* cs, int ws, __m256i a1, __m256i a2, __m256i a3, __m256i a4) = (l->p->getMap == mapIsland) ? select8Random4 : select8ModeOrRandom;
int newWidth = areaWidth+10&0xFFFFFFFE;//modified to ignore ends
int newWidth = (areaWidth+10)&0xFFFFFFFE;//modified to ignore ends
int x, z;
__m256i cs, a, b, a1, b1, toBuf1, toBuf2, aSuf;
__m256i mask1 = _mm256_setr_epi32(0xFFFFFFFF, 0x0, 0xFFFFFFFF, 0x0, 0xFFFFFFFF, 0x0, 0xFFFFFFFF, 0x0), mask2 = _mm256_setr_epi32(0x0, 0xFFFFFFFF, 0x0, 0xFFFFFFFF, 0x0, 0xFFFFFFFF, 0x0, 0xFFFFFFFF);
@ -174,7 +177,7 @@ void mapZoom(Layer *l, int* __restrict out, int areaX, int areaZ, int areaWidth,
int pX = areaX&0xFFFFFFFE;
__m256i xs = _mm256_set_epi32(pX+14, pX+12, pX+10, pX+8, pX+6, pX+4, pX+2, pX), zs;
__m256i v2 = _mm256_set1_epi32(2), v16 = _mm256_set1_epi32(16);
int* buf = malloc((newWidth+1)*(areaHeight+2|1)*sizeof(*buf));
int* buf = malloc((newWidth+1)*((areaHeight+2)|1)*sizeof(*buf));
int* idx = buf;
int* outIdx = out;
//z first!
@ -217,9 +220,12 @@ void mapZoom(Layer *l, int* __restrict out, int areaX, int areaZ, int areaWidth,
free(buf);
}
#elif defined __SSE4_2__
#elif defined USE_SIMD && defined __SSE4_2__
void mapZoom(Layer *l, int* __restrict out, int areaX, int areaZ, int areaWidth, int areaHeight) {
int pWidth = (areaWidth>>1)+2, pHeight = (areaHeight>>1)+1;
l->p->getMap(l->p, out, areaX>>1, areaZ>>1, pWidth, pHeight+1);
__m128i (*selectRand)(__m128i* cs, int ws, __m128i a1, __m128i a2, __m128i a3, __m128i a4) = (l->p->getMap == mapIsland) ? select4Random4 : select4ModeOrRandom;
int newWidth = areaWidth+6&0xFFFFFFFE;//modified to ignore ends
int x, z;

View File

@ -3,17 +3,17 @@
#include <stdlib.h>
#ifdef __AVX2__
#if defined USE_SIMD && __AVX2__
#include <emmintrin.h>
#include <smmintrin.h>
#include <immintrin.h>
#warning "Using AVX2 extensions."
#elif defined __SSE4_2__
#elif defined USE_SIMD && defined __SSE4_2__
#include <emmintrin.h>
#include <smmintrin.h>
#warning "Using SSE4.2 extensions."
#else
#warning "Using no SIMD extensions."
//#warning "Using no SIMD extensions."
#endif
#define STRUCT(S) typedef struct S S; struct S
@ -185,7 +185,7 @@ static inline void setBaseSeed(Layer *layer, long seed)
layer->chunkSeed = 0;
}
#ifdef __AVX2__
#if defined USE_SIMD && __AVX2__
static inline __m256i set8ChunkSeeds(int ws, __m256i xs, __m256i zs) {
__m256i out = _mm256_set1_epi32(ws);
__m256i mul = _mm256_set1_epi32(1284865837);
@ -256,7 +256,7 @@ static inline __m256i select8ModeOrRandom(__m256i* cs, int ws, __m256i a1, __m25
)
);
}
#elif defined __SSE4_2__
#elif defined USE_SIMD && defined __SSE4_2__
static inline __m128i set4ChunkSeeds(int ws, __m128i xs, __m128i zs) {
__m128i out = _mm_set1_epi32(ws);
__m128i mul = _mm_set1_epi32(1284865837);

View File

@ -1,6 +1,6 @@
CC = gcc
CFLAGS = -O3 -Wall -fwrapv -march=native
LDFLAGS = -lm -pthread
override CFLAGS += -O3 -Wall -fwrapv -march=native
.PHONY : all clean
@ -8,13 +8,13 @@ all: find_quadhuts find_compactbiomes clean
find_compactbiomes: find_compactbiomes.o layers.o generator.o finders.o
$(CC) -o $@ $^ $(LDFLAGS)
find_compactbiomes.o: find_compactbiomes.c
$(CC) -c $(CFLAGS) $<
find_quadhuts: find_quadhuts.o layers.o generator.o finders.o
$(CC) -o $@ $^ $(LDFLAGS)
find_quadhuts.o: find_quadhuts.c
$(CC) -c $(CFLAGS) $<