diff --git a/0000-bugfix-test-md5-check-failure-1.patch b/0000-bugfix-test-md5-check-failure-1.patch new file mode 100644 index 0000000000000000000000000000000000000000..2e8b3d3c5a0316185315750bec8b98ad61824909 --- /dev/null +++ b/0000-bugfix-test-md5-check-failure-1.patch @@ -0,0 +1,551 @@ +From b81a5095563776397a4239132d2b737a1083e02f Mon Sep 17 00:00:00 2001 +From: Wayne Davison +Date: Thu, 3 Mar 2022 17:00:57 -0800 +Subject: [PATCH] Make asm use more selectable + +- Make the SIMD ASM code off by default. Use configure --enable-simd-asm + to enable. +- Allow MD5 ASM code to be requested even when OpenSSL is handling MD4 + checksums. Use configure --enable-md5-asm to enable. +--- + Makefile.in | 15 ++-- + checksum.c | 34 ++++----- + lib/md5-asm-x86_64.S | 4 +- + lib/md5.c | 19 ++--- + lib/mdigest.h | 13 ++-- + rsync.h | 9 +-- + simd-checksum-avx2.S | 14 +++- + simd-checksum-x86_64.cpp | 151 ++++++++++++++++++++++++++++++++++++--- + 8 files changed, 198 insertions(+), 62 deletions(-) + +diff --git a/Makefile.in b/Makefile.in +index 8817edab..3cde9557 100644 +--- a/Makefile.in ++++ b/Makefile.in +@@ -30,8 +30,9 @@ SHELL=/bin/sh + .SUFFIXES: + .SUFFIXES: .c .o + +-SIMD_x86_64=simd-checksum-x86_64.o simd-checksum-avx2.o +-ASM_x86_64=lib/md5-asm-x86_64.o ++ROLL_SIMD_x86_64=simd-checksum-x86_64.o ++ROLL_ASM_x86_64=simd-checksum-avx2.o ++MD5_ASM_x86_64=lib/md5-asm-x86_64.o + + GENFILES=configure.sh aclocal.m4 config.h.in rsync.1 rsync.1.html \ + rsync-ssl.1 rsync-ssl.1.html rsyncd.conf.5 rsyncd.conf.5.html \ +@@ -46,7 +47,7 @@ OBJS1=flist.o rsync.o generator.o receiver.o cleanup.o sender.o exclude.o \ + util1.o util2.o main.o checksum.o match.o syscall.o log.o backup.o delete.o + OBJS2=options.o io.o compat.o hlink.o token.o uidlist.o socket.o hashtable.o \ + usage.o fileio.o batch.o clientname.o chmod.o acls.o xattrs.o +-OBJS3=progress.o pipe.o @ASM@ @SIMD@ ++OBJS3=progress.o pipe.o @MD5_ASM@ @ROLL_SIMD@ @ROLL_ASM@ + DAEMON_OBJ = params.o loadparm.o clientserver.o access.o connection.o authenticate.o + popt_OBJS=popt/findme.o popt/popt.o popt/poptconfig.o \ + popt/popthelp.o popt/poptparse.o +@@ -147,13 +148,13 @@ git-version.h: ALWAYS_RUN + ALWAYS_RUN: + + simd-checksum-x86_64.o: simd-checksum-x86_64.cpp +- @$(srcdir)/cmd-or-msg disable-simd $(CXX) -I. $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $(srcdir)/simd-checksum-x86_64.cpp ++ @$(srcdir)/cmd-or-msg disable-roll-simd $(CXX) -I. $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $(srcdir)/simd-checksum-x86_64.cpp + + simd-checksum-avx2.o: simd-checksum-avx2.S +- @$(srcdir)/cmd-or-msg disable-asm $(CC) $(CFLAGS) --include=$(srcdir)/rsync.h -DAVX2_ASM -I. @NOEXECSTACK@ -c -o $@ $(srcdir)/simd-checksum-avx2.S ++ @$(srcdir)/cmd-or-msg disable-roll-asm $(CC) $(CFLAGS) -I. @NOEXECSTACK@ -c -o $@ $(srcdir)/simd-checksum-avx2.S + +-lib/md5-asm-x86_64.o: lib/md5-asm-x86_64.S config.h lib/md-defines.h +- @$(srcdir)/cmd-or-msg disable-asm $(CC) -I. @NOEXECSTACK@ -c -o $@ $(srcdir)/lib/md5-asm-x86_64.S ++lib/md5-asm-x86_64.o: lib/md5-asm-x86_64.S lib/md-defines.h ++ @$(srcdir)/cmd-or-msg disable-md5-asm $(CC) -I. @NOEXECSTACK@ -c -o $@ $(srcdir)/lib/md5-asm-x86_64.S + + tls$(EXEEXT): $(TLS_OBJ) + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(TLS_OBJ) $(LIBS) +diff --git a/checksum.c b/checksum.c +index 1ed76828..77848585 100644 +--- a/checksum.c ++++ b/checksum.c +@@ -179,7 +179,7 @@ int canonical_checksum(int csum_type) + return 0; + } + +-#ifndef HAVE_SIMD /* See simd-checksum-*.cpp. */ ++#ifndef USE_ROLL_SIMD /* See simd-checksum-*.cpp. */ + /* + a simple 32 bit checksum that can be updated from either end + (inspired by Mark Adler's Adler-32 checksum) +@@ -222,23 +222,23 @@ void get_checksum2(char *buf, int32 len, char *sum) + } + #endif + case CSUM_MD5: { +- MD5_CTX m5; ++ md5_context m5; + uchar seedbuf[4]; +- MD5_Init(&m5); ++ md5_begin(&m5); + if (proper_seed_order) { + if (checksum_seed) { + SIVALu(seedbuf, 0, checksum_seed); +- MD5_Update(&m5, seedbuf, 4); ++ md5_update(&m5, seedbuf, 4); + } +- MD5_Update(&m5, (uchar *)buf, len); ++ md5_update(&m5, (uchar *)buf, len); + } else { +- MD5_Update(&m5, (uchar *)buf, len); ++ md5_update(&m5, (uchar *)buf, len); + if (checksum_seed) { + SIVALu(seedbuf, 0, checksum_seed); +- MD5_Update(&m5, seedbuf, 4); ++ md5_update(&m5, seedbuf, 4); + } + } +- MD5_Final((uchar *)sum, &m5); ++ md5_result(&m5, (uchar *)sum); + break; + } + case CSUM_MD4: +@@ -374,18 +374,18 @@ void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum) + } + #endif + case CSUM_MD5: { +- MD5_CTX m5; ++ md5_context m5; + +- MD5_Init(&m5); ++ md5_begin(&m5); + + for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE) +- MD5_Update(&m5, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE); ++ md5_update(&m5, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE); + + remainder = (int32)(len - i); + if (remainder > 0) +- MD5_Update(&m5, (uchar *)map_ptr(buf, i, remainder), remainder); ++ md5_update(&m5, (uchar *)map_ptr(buf, i, remainder), remainder); + +- MD5_Final((uchar *)sum, &m5); ++ md5_result(&m5, (uchar *)sum); + break; + } + case CSUM_MD4: +@@ -443,7 +443,7 @@ static union { + #ifdef USE_OPENSSL + MD4_CTX m4; + #endif +- MD5_CTX m5; ++ md5_context m5; + } ctx; + #ifdef SUPPORT_XXHASH + static XXH64_state_t* xxh64_state; +@@ -482,7 +482,7 @@ void sum_init(int csum_type, int seed) + break; + #endif + case CSUM_MD5: +- MD5_Init(&ctx.m5); ++ md5_begin(&ctx.m5); + break; + case CSUM_MD4: + #ifdef USE_OPENSSL +@@ -532,7 +532,7 @@ void sum_update(const char *p, int32 len) + break; + #endif + case CSUM_MD5: +- MD5_Update(&ctx.m5, (uchar *)p, len); ++ md5_update(&ctx.m5, (uchar *)p, len); + break; + case CSUM_MD4: + #ifdef USE_OPENSSL +@@ -597,7 +597,7 @@ int sum_end(char *sum) + } + #endif + case CSUM_MD5: +- MD5_Final((uchar *)sum, &ctx.m5); ++ md5_result(&ctx.m5, (uchar *)sum); + break; + case CSUM_MD4: + #ifdef USE_OPENSSL +diff --git a/lib/md5-asm-x86_64.S b/lib/md5-asm-x86_64.S +index 383f193a..3737058f 100644 +--- a/lib/md5-asm-x86_64.S ++++ b/lib/md5-asm-x86_64.S +@@ -27,7 +27,7 @@ + #include "config.h" + #include "md-defines.h" + +-#if !defined USE_OPENSSL && CSUM_CHUNK == 64 ++#ifdef USE_MD5_ASM /* { */ + + #ifdef __APPLE__ + #define md5_process_asm _md5_process_asm +@@ -698,4 +698,4 @@ md5_process_asm: + pop %rbp + ret + +-#endif /* !USE_OPENSSL ... */ ++#endif /* } USE_MD5_ASM */ +diff --git a/lib/md5.c b/lib/md5.c +index 41f158b8..07fd6147 100644 +--- a/lib/md5.c ++++ b/lib/md5.c +@@ -20,7 +20,7 @@ + + #include "rsync.h" + +-#ifndef USE_OPENSSL ++#if !defined USE_OPENSSL || USE_MD5_ASM /* { */ + void md5_begin(md_context *ctx) + { + ctx->A = 0x67452301; +@@ -148,7 +148,10 @@ static void md5_process(md_context *ctx, const uchar data[CSUM_CHUNK]) + ctx->D += D; + } + +-#if defined HAVE_ASM && CSUM_CHUNK == 64 ++#ifdef USE_MD5_ASM ++#if CSUM_CHUNK != 64 ++#error The MD5 ASM code does not support CSUM_CHUNK != 64 ++#endif + extern void md5_process_asm(md_context *ctx, const void *data, size_t num); + #endif + +@@ -176,20 +179,20 @@ void md5_update(md_context *ctx, const uchar *input, uint32 length) + left = 0; + } + +-#if defined HAVE_ASM && CSUM_CHUNK == 64 ++#ifdef USE_MD5_ASM /* { */ + if (length >= CSUM_CHUNK) { + uint32 chunks = length / CSUM_CHUNK; + md5_process_asm(ctx, input, chunks); + length -= chunks * CSUM_CHUNK; + input += chunks * CSUM_CHUNK; + } +-#else ++#else /* } { */ + while (length >= CSUM_CHUNK) { + md5_process(ctx, input); + length -= CSUM_CHUNK; + input += CSUM_CHUNK; + } +-#endif ++#endif /* } */ + + if (length) + memcpy(ctx->buffer + left, input, length); +@@ -221,9 +224,9 @@ void md5_result(md_context *ctx, uchar digest[MD5_DIGEST_LEN]) + SIVALu(digest, 8, ctx->C); + SIVALu(digest, 12, ctx->D); + } +-#endif ++#endif /* } */ + +-#ifdef TEST_MD5 ++#ifdef TEST_MD5 /* { */ + + void get_md5(uchar *out, const uchar *input, int n) + { +@@ -317,4 +320,4 @@ int main(int argc, char *argv[]) + return 0; + } + +-#endif ++#endif /* } */ +diff --git a/lib/mdigest.h b/lib/mdigest.h +index db174017..f1d6d934 100644 +--- a/lib/mdigest.h ++++ b/lib/mdigest.h +@@ -17,12 +17,13 @@ void mdfour_begin(md_context *md); + void mdfour_update(md_context *md, const uchar *in, uint32 length); + void mdfour_result(md_context *md, uchar digest[MD4_DIGEST_LEN]); + +-#ifndef USE_OPENSSL +-#define MD5_CTX md_context +-#define MD5_Init md5_begin +-#define MD5_Update md5_update +-#define MD5_Final(digest, cptr) md5_result(cptr, digest) +- ++#if defined USE_OPENSSL && !defined USE_MD5_ASM ++#define md5_context MD5_CTX ++#define md5_begin MD5_Init ++#define md5_update MD5_Update ++#define md5_result(cptr, digest) MD5_Final(digest, cptr) ++#else ++#define md5_context md_context + void md5_begin(md_context *ctx); + void md5_update(md_context *ctx, const uchar *input, uint32 length); + void md5_result(md_context *ctx, uchar digest[MD5_DIGEST_LEN]); +diff --git a/rsync.h b/rsync.h +index 41a014c3..4b30570b 100644 +--- a/rsync.h ++++ b/rsync.h +@@ -18,11 +18,6 @@ + * with this program; if not, visit the http://fsf.org website. + */ + +-/* a non-zero CHAR_OFFSET makes the rolling sum stronger, but is +- incompatible with older versions :-( */ +-#define CHAR_OFFSET 0 +- +-#ifndef AVX2_ASM /* do not include the rest of file for assembly */ + #define False 0 + #define True 1 + #define Unset (-1) /* Our BOOL values are always an int. */ +@@ -43,6 +38,9 @@ + + #define BACKUP_SUFFIX "~" + ++/* a non-zero CHAR_OFFSET makes the rolling sum stronger, but is ++ incompatible with older versions :-( */ ++#define CHAR_OFFSET 0 + + /* These flags are only used during the flist transfer. */ + +@@ -1477,7 +1475,6 @@ const char *get_panic_action(void); + fprintf(stderr, "%s in %s at line %d\n", msg, __FILE__, __LINE__); \ + exit_cleanup(RERR_UNSUPPORTED); \ + } while (0) +-#endif /* AVX2_ASM */ + + #ifdef HAVE_MALLINFO2 + #define MEM_ALLOC_INFO mallinfo2 +diff --git a/simd-checksum-avx2.S b/simd-checksum-avx2.S +index dc8d145b..549cc3ef 100644 +--- a/simd-checksum-avx2.S ++++ b/simd-checksum-avx2.S +@@ -1,15 +1,21 @@ ++#include "config.h" ++ ++#ifdef USE_ROLL_ASM /* { */ ++ ++#define CHAR_OFFSET 0 /* Keep this the same as rsync.h, which isn't likely to change. */ ++ + #ifdef __APPLE__ +-#define get_checksum1_avx2 _get_checksum1_avx2 ++#define get_checksum1_avx2_asm _get_checksum1_avx2_asm + #endif + + .intel_syntax noprefix + .text + + .p2align 5 +- .globl get_checksum1_avx2 ++ .globl get_checksum1_avx2_asm + + # rdi=*buf, esi=len, edx=i, rcx= *ps1, r8= *ps2 +-get_checksum1_avx2: ++get_checksum1_avx2_asm: + vmovd xmm6,[rcx] # load *ps1 + lea eax, [rsi-128] # at least 128 bytes to process? + cmp edx, eax +@@ -167,3 +173,5 @@ get_checksum1_avx2: + .byte 3 + .byte 2 + .byte 1 ++ ++#endif /* } USE_ROLL_ASM */ +diff --git a/simd-checksum-x86_64.cpp b/simd-checksum-x86_64.cpp +index ebeeac2d..33f26e92 100644 +--- a/simd-checksum-x86_64.cpp ++++ b/simd-checksum-x86_64.cpp +@@ -51,12 +51,12 @@ + * GCC 4.x are not supported to ease configure.ac logic. + */ + +-#ifdef __x86_64__ +-#ifdef __cplusplus ++#ifdef __x86_64__ /* { */ ++#ifdef __cplusplus /* { */ + + #include "rsync.h" + +-#ifdef HAVE_SIMD ++#ifdef USE_ROLL_SIMD /* { */ + + #include + +@@ -85,6 +85,9 @@ typedef long long __m256i_u __attribute__((__vector_size__(32), __may_alias__, _ + #define SSE2_HADDS_EPI16(a, b) _mm_adds_epi16(SSE2_INTERLEAVE_EVEN_EPI16(a, b), SSE2_INTERLEAVE_ODD_EPI16(a, b)) + #define SSE2_MADDUBS_EPI16(a, b) _mm_adds_epi16(SSE2_MULU_EVEN_EPI8(a, b), SSE2_MULU_ODD_EPI8(a, b)) + ++#ifndef USE_ROLL_ASM ++__attribute__ ((target("default"))) MVSTATIC int32 get_checksum1_avx2_64(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2) { return i; } ++#endif + __attribute__ ((target("default"))) MVSTATIC int32 get_checksum1_ssse3_32(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2) { return i; } + __attribute__ ((target("default"))) MVSTATIC int32 get_checksum1_sse2_32(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2) { return i; } + +@@ -245,7 +248,7 @@ __attribute__ ((target("sse2"))) MVSTATIC int32 get_checksum1_sse2_32(schar* buf + + // (4*buf[i] + 3*buf[i+1]), (2*buf[i+2], buf[i+3]), ... 2*[int16*8] + __m128i mul_const = _mm_set1_epi32(4 + (3 << 8) + (2 << 16) + (1 << 24)); +- __m128i mul_add16_1 = SSE2_MADDUBS_EPI16(mul_const, in8_1); ++ __m128i mul_add16_1 = SSE2_MADDUBS_EPI16(mul_const, in8_1); + __m128i mul_add16_2 = SSE2_MADDUBS_EPI16(mul_const, in8_2); + + // s2 += 32*s1 +@@ -310,7 +313,127 @@ __attribute__ ((target("sse2"))) MVSTATIC int32 get_checksum1_sse2_32(schar* buf + return i; + } + +-extern "C" __attribute__ ((target("avx2"))) int32 get_checksum1_avx2(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2); ++#ifdef USE_ROLL_ASM /* { */ ++ ++extern "C" __attribute__ ((target("avx2"))) int32 get_checksum1_avx2_asm(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2); ++ ++#else /* } { */ ++ ++/* ++ AVX2 loop per 64 bytes: ++ int16 t1[16]; ++ int16 t2[16]; ++ for (int j = 0; j < 16; j++) { ++ t1[j] = buf[j*4 + i] + buf[j*4 + i+1] + buf[j*4 + i+2] + buf[j*4 + i+3]; ++ t2[j] = 4*buf[j*4 + i] + 3*buf[j*4 + i+1] + 2*buf[j*4 + i+2] + buf[j*4 + i+3]; ++ } ++ s2 += 64*s1 + (uint32)( ++ 60*t1[0] + 56*t1[1] + 52*t1[2] + 48*t1[3] + 44*t1[4] + 40*t1[5] + 36*t1[6] + 32*t1[7] + 28*t1[8] + 24*t1[9] + 20*t1[10] + 16*t1[11] + 12*t1[12] + 8*t1[13] + 4*t1[14] + ++ t2[0] + t2[1] + t2[2] + t2[3] + t2[4] + t2[5] + t2[6] + t2[7] + t2[8] + t2[9] + t2[10] + t2[11] + t2[12] + t2[13] + t2[14] + t2[15] ++ ) + 2080*CHAR_OFFSET; ++ s1 += (uint32)(t1[0] + t1[1] + t1[2] + t1[3] + t1[4] + t1[5] + t1[6] + t1[7] + t1[8] + t1[9] + t1[10] + t1[11] + t1[12] + t1[13] + t1[14] + t1[15]) + ++ 64*CHAR_OFFSET; ++ */ ++ ++__attribute__ ((target("avx2"))) MVSTATIC int32 get_checksum1_avx2_64(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2) ++{ ++ if (len > 64) { ++ ++ uint32 x[4] = {0}; ++ __m128i ss1 = _mm_cvtsi32_si128(*ps1); ++ __m128i ss2 = _mm_cvtsi32_si128(*ps2); ++ ++ const char mul_t1_buf[16] = {60, 56, 52, 48, 44, 40, 36, 32, 28, 24, 20, 16, 12, 8, 4, 0}; ++ __m128i tmp = _mm_load_si128((__m128i*) mul_t1_buf); ++ __m256i mul_t1 = _mm256_cvtepu8_epi16(tmp); ++ __m256i mul_const = _mm256_broadcastd_epi32(_mm_cvtsi32_si128(4 | (3 << 8) | (2 << 16) | (1 << 24))); ++ __m256i mul_one; ++ mul_one = _mm256_abs_epi8(_mm256_cmpeq_epi16(mul_one,mul_one)); // set all vector elements to 1 ++ ++ for (; i < (len-64); i+=64) { ++ // Load ... 4*[int8*16] ++ __m256i in8_1, in8_2; ++ __m128i in8_1_low, in8_2_low, in8_1_high, in8_2_high; ++ in8_1_low = _mm_loadu_si128((__m128i_u*)&buf[i]); ++ in8_2_low = _mm_loadu_si128((__m128i_u*)&buf[i+16]); ++ in8_1_high = _mm_loadu_si128((__m128i_u*)&buf[i+32]); ++ in8_2_high = _mm_loadu_si128((__m128i_u*)&buf[i+48]); ++ in8_1 = _mm256_inserti128_si256(_mm256_castsi128_si256(in8_1_low), in8_1_high,1); ++ in8_2 = _mm256_inserti128_si256(_mm256_castsi128_si256(in8_2_low), in8_2_high,1); ++ ++ // (1*buf[i] + 1*buf[i+1]), (1*buf[i+2], 1*buf[i+3]), ... 2*[int16*8] ++ // Fastest, even though multiply by 1 ++ __m256i add16_1 = _mm256_maddubs_epi16(mul_one, in8_1); ++ __m256i add16_2 = _mm256_maddubs_epi16(mul_one, in8_2); ++ ++ // (4*buf[i] + 3*buf[i+1]), (2*buf[i+2], buf[i+3]), ... 2*[int16*8] ++ __m256i mul_add16_1 = _mm256_maddubs_epi16(mul_const, in8_1); ++ __m256i mul_add16_2 = _mm256_maddubs_epi16(mul_const, in8_2); ++ ++ // s2 += 64*s1 ++ ss2 = _mm_add_epi32(ss2, _mm_slli_epi32(ss1, 6)); ++ ++ // [sum(t1[0]..t1[7]), X, X, X] [int32*4]; faster than multiple _mm_hadds_epi16 ++ __m256i sum_add32 = _mm256_add_epi16(add16_1, add16_2); ++ sum_add32 = _mm256_add_epi16(sum_add32, _mm256_srli_epi32(sum_add32, 16)); ++ sum_add32 = _mm256_add_epi16(sum_add32, _mm256_srli_si256(sum_add32, 4)); ++ sum_add32 = _mm256_add_epi16(sum_add32, _mm256_srli_si256(sum_add32, 8)); ++ ++ // [sum(t2[0]..t2[7]), X, X, X] [int32*4]; faster than multiple _mm_hadds_epi16 ++ __m256i sum_mul_add32 = _mm256_add_epi16(mul_add16_1, mul_add16_2); ++ sum_mul_add32 = _mm256_add_epi16(sum_mul_add32, _mm256_srli_epi32(sum_mul_add32, 16)); ++ sum_mul_add32 = _mm256_add_epi16(sum_mul_add32, _mm256_srli_si256(sum_mul_add32, 4)); ++ sum_mul_add32 = _mm256_add_epi16(sum_mul_add32, _mm256_srli_si256(sum_mul_add32, 8)); ++ ++ // s1 += t1[0] + t1[1] + t1[2] + t1[3] + t1[4] + t1[5] + t1[6] + t1[7] ++ __m128i sum_add32_hi = _mm256_extracti128_si256(sum_add32, 0x1); ++ ss1 = _mm_add_epi32(ss1, _mm256_castsi256_si128(sum_add32)); ++ ss1 = _mm_add_epi32(ss1, sum_add32_hi); ++ ++ // s2 += t2[0] + t2[1] + t2[2] + t2[3] + t2[4] + t2[5] + t2[6] + t2[7] ++ __m128i sum_mul_add32_hi = _mm256_extracti128_si256(sum_mul_add32, 0x1); ++ ss2 = _mm_add_epi32(ss2, _mm256_castsi256_si128(sum_mul_add32)); ++ ss2 = _mm_add_epi32(ss2, sum_mul_add32_hi); ++ ++ // [t1[0] + t1[1], t1[2] + t1[3] ...] [int16*8] ++ // We could've combined this with generating sum_add32 above and ++ // save an instruction but benchmarking shows that as being slower ++ __m256i add16 = _mm256_hadds_epi16(add16_1, add16_2); ++ ++ // [t1[0], t1[1], ...] -> [t1[0]*28 + t1[1]*24, ...] [int32*4] ++ __m256i mul32 = _mm256_madd_epi16(add16, mul_t1); ++ ++ // [sum(mul32), X, X, X] [int32*4]; faster than multiple _mm_hadd_epi32 ++ mul32 = _mm256_add_epi32(mul32, _mm256_srli_si256(mul32, 4)); ++ mul32 = _mm256_add_epi32(mul32, _mm256_srli_si256(mul32, 8)); ++ // prefetch 2 cacheline ahead ++ _mm_prefetch(&buf[i + 160], _MM_HINT_T0); ++ ++ // s2 += 28*t1[0] + 24*t1[1] + 20*t1[2] + 16*t1[3] + 12*t1[4] + 8*t1[5] + 4*t1[6] ++ __m128i mul32_hi = _mm256_extracti128_si256(mul32, 0x1); ++ ss2 = _mm_add_epi32(ss2, _mm256_castsi256_si128(mul32)); ++ ss2 = _mm_add_epi32(ss2, mul32_hi); ++ ++#if CHAR_OFFSET != 0 ++ // s1 += 32*CHAR_OFFSET ++ __m128i char_offset_multiplier = _mm_set1_epi32(32 * CHAR_OFFSET); ++ ss1 = _mm_add_epi32(ss1, char_offset_multiplier); ++ ++ // s2 += 528*CHAR_OFFSET ++ char_offset_multiplier = _mm_set1_epi32(528 * CHAR_OFFSET); ++ ss2 = _mm_add_epi32(ss2, char_offset_multiplier); ++#endif ++ } ++ ++ _mm_store_si128((__m128i_u*)x, ss1); ++ *ps1 = x[0]; ++ _mm_store_si128((__m128i_u*)x, ss2); ++ *ps2 = x[0]; ++ } ++ return i; ++} ++ ++#endif /* } !USE_ROLL_ASM */ + + static int32 get_checksum1_default_1(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2) + { +@@ -338,7 +461,11 @@ static inline uint32 get_checksum1_cpp(char *buf1, int32 len) + uint32 s2 = 0; + + // multiples of 64 bytes using AVX2 (if available) +- i = get_checksum1_avx2((schar*)buf1, len, i, &s1, &s2); ++#ifdef USE_ROLL_ASM ++ i = get_checksum1_avx2_asm((schar*)buf1, len, i, &s1, &s2); ++#else ++ i = get_checksum1_avx2_64((schar*)buf1, len, i, &s1, &s2); ++#endif + + // multiples of 32 bytes using SSSE3 (if available) + i = get_checksum1_ssse3_32((schar*)buf1, len, i, &s1, &s2); +@@ -407,7 +534,11 @@ int main() { + benchmark("Raw-C", get_checksum1_default_1, (schar*)buf, BLOCK_LEN); + benchmark("SSE2", get_checksum1_sse2_32, (schar*)buf, BLOCK_LEN); + benchmark("SSSE3", get_checksum1_ssse3_32, (schar*)buf, BLOCK_LEN); +- benchmark("AVX2", get_checksum1_avx2, (schar*)buf, BLOCK_LEN); ++#ifdef USE_ROLL_ASM ++ benchmark("AVX2-ASM", get_checksum1_avx2_asm, (schar*)buf, BLOCK_LEN); ++#else ++ benchmark("AVX2", get_checksum1_avx2_64, (schar*)buf, BLOCK_LEN); ++#endif + + free(buf); + return 0; +@@ -417,6 +548,6 @@ int main() { + #pragma clang optimize on + #endif /* BENCHMARK_SIMD_CHECKSUM1 */ + +-#endif /* HAVE_SIMD */ +-#endif /* __cplusplus */ +-#endif /* __x86_64__ */ ++#endif /* } USE_ROLL_SIMD */ ++#endif /* } __cplusplus */ ++#endif /* } __x86_64__ */ + diff --git a/0001-bugfix-test-md5-check-failure-2.patch b/0001-bugfix-test-md5-check-failure-2.patch new file mode 100644 index 0000000000000000000000000000000000000000..faf9321376ce208f261291a7433198831b481a78 --- /dev/null +++ b/0001-bugfix-test-md5-check-failure-2.patch @@ -0,0 +1,258 @@ +From b81a5095563776397a4239132d2b737a1083e02f Mon Sep 17 00:00:00 2001 +From: Wayne Davison +Date: Thu, 3 Mar 2022 17:00:57 -0800 +Subject: [PATCH] Make asm use more selectable + +- Make the SIMD ASM code off by default. Use configure --enable-simd-asm + to enable. +- Allow MD5 ASM code to be requested even when OpenSSL is handling MD4 + checksums. Use configure --enable-md5-asm to enable. +--- + NEWS.md | 23 +++++++++---- + configure.ac | 96 ++++++++++++++++++++++++++++++++-------------------- + usage.c | 12 ++++--- + 3 files changed, 84 insertions(+), 47 deletions(-) + +diff --git a/NEWS.md b/NEWS.md +index 3083ca3..ed19449 100644 +--- a/NEWS.md ++++ b/NEWS.md +@@ -136,8 +136,9 @@ + (keeping the behavior the same as before), so specifying `--info=nonreg0` + can be used to turn the warnings off. + +- - More ASM optimizations from Shark64. +- ++ - An optional asm optimization for the rolling checksum from Shark64. Enable ++ it with `./configure --enable-roll-asm`. ++ + - Transformed rrsync into a python script with improvements: + - Security has been beefed up. + - The known rsync options were updated to include recent additions. +@@ -189,14 +190,24 @@ + using the output of `git describe` when building inside a non-shallow git + checkout, though.) + +- - Improved the IPv6 determination in configure. ++ - Renamed configure's `--enable-simd` option to `--enable-roll-simd` and added ++ the option `--enable-roll-asm` to use the new asm version of the code. Both ++ are x86_64/amd64 only. ++ ++ - Renamed configure's `--enable-asm` option to `--enable-md5-asm` to avoid ++ confusion with the asm option for the rolling checksum. It is also honored ++ even when openssl crypto is in use. This allows: normal MD4 & MD5, normal ++ MD4 + asm MD5, openssl MD4 & MD5, or openssl MD4 + asm MD5. + +- - Made SIMD & ASM configure default to "no" on non-Linux hosts due to various +- reports of problems on NetBSD & macOS hosts. These tests were also tweaked +- to allow enabling the feature on a host_cpu of amd64 (was only x86_64). ++ - Made SIMD & asm configure checks default to "no" on non-Linux hosts due to ++ various reports of problems on NetBSD & macOS hosts. These were also ++ tweaked to allow enabling the feature on a host_cpu of amd64 (was only ++ allowed on x86_64 before). + + - Fixed configure to not fail at the SIMD check when cross-compiling. + ++ - Improved the IPv6 determination in configure. ++ + - Compile the C files with `-pedantic-errors` (when possible) so that we will + get warned if a static initialization overflows in the future (among other + things). +diff --git a/configure.ac b/configure.ac +index 7031283..1dd3e8e 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -229,12 +229,13 @@ fi + AC_DEFINE_UNQUOTED(NOBODY_USER, "$NOBODY_USER", [unprivileged user--e.g. nobody]) + AC_DEFINE_UNQUOTED(NOBODY_GROUP, "$NOBODY_GROUP", [unprivileged group for unprivileged user]) + +-# SIMD optimizations +-SIMD= ++# rolling-checksum SIMD optimizations ++ROLL_SIMD= + +-AC_MSG_CHECKING([whether to enable SIMD optimizations]) +-AC_ARG_ENABLE(simd, +- AS_HELP_STRING([--enable-simd],[enable/disable to control SIMD optimizations (requires c++)])) ++AC_MSG_CHECKING([whether to enable rolling-checksum SIMD optimizations]) ++AC_ARG_ENABLE(roll-simd, +++ AS_HELP_STRING([--enable-roll-simd],[enable/disable to control rolling-checksum SIMD optimizations (requires c++)])) ++ + + # Clag is crashing with -g -O2, so we'll get rid of -g for now. + CXXFLAGS=`echo "$CXXFLAGS" | sed 's/-g //'` +@@ -263,14 +264,14 @@ __attribute__ ((target("ssse3"))) void more_testing(char* buf, int len) + } + ]]) + +-if test x"$enable_simd" = x""; then ++if test x"$enable_roll_simd" = x""; then + case "$host_os" in + *linux*) ;; +- *) enable_simd=no ;; +- esac ++ *) enable_roll_simd=no ;; ++ esac + fi + +-if test x"$enable_simd" != x"no"; then ++if test x"$enable_roll_simd" != x"no"; then + # For x86-64 SIMD, g++ >=5 or clang++ >=7 is required + if test x"$host_cpu" = x"x86_64" || test x"$host_cpu" = x"amd64"; then + AC_LANG(C++) +@@ -283,23 +284,23 @@ if test x"$enable_simd" != x"no"; then + AC_LANG(C) + if test x"$CXX_OK" = x"yes"; then + # AC_MSG_RESULT() is called below. +- SIMD="$host_cpu" +- elif test x"$enable_simd" = x"yes"; then ++ ROLL_SIMD="$host_cpu" ++ elif test x"$enable_roll_simd" = x"yes"; then + AC_MSG_RESULT(error) +- AC_MSG_ERROR(The SIMD compilation test failed. +-Omit --enable-simd to continue without it.) ++ AC_MSG_ERROR(The rolling-checksum SIMD compilation test failed. ++Omit --enable-roll-simd to continue without it.) + fi +- elif test x"$enable_simd" = x"yes"; then ++ elif test x"$enable_roll_simd" = x"yes"; then + AC_MSG_RESULT(unavailable) +- AC_MSG_ERROR(The SIMD optimizations are currently x86_64|amd64 only. +-Omit --enable-simd to continue without it.) ++ AC_MSG_ERROR(The rolling-checksum SIMD optimizations are currently x86_64|amd64 only. ++Omit --enable-roll-simd to continue without it.) + fi + fi + +-if test x"$SIMD" != x""; then +- AC_MSG_RESULT([yes ($SIMD)]) +- AC_DEFINE(HAVE_SIMD, 1, [Define to 1 to enable SIMD optimizations]) +- SIMD='$(SIMD_'"$SIMD)" ++if test x"$ROLL_SIMD" != x""; then ++ AC_MSG_RESULT([yes ($ROLL_SIMD)]) ++ AC_DEFINE(USE_ROLL_SIMD, 1, [Define to 1 to enable rolling-checksum SIMD optimizations]) ++ ROLL_SIMD='$(ROLL_SIMD_'"$ROLL_SIMD)" + # We only use c++ for its target attribute dispatching, disable unneeded bulky features + CXXFLAGS="$CXXFLAGS -fno-exceptions -fno-rtti" + # Apple often has "g++" as a symlink for clang. Try to find out the truth. +@@ -311,7 +312,7 @@ else + AC_MSG_RESULT(no) + fi + +-AC_SUBST(SIMD) ++AC_SUBST(ROLL_SIMD) + + AC_MSG_CHECKING([if assembler accepts noexecstack]) + OLD_CFLAGS="$CFLAGS" +@@ -322,38 +323,59 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ ]], [[return 0;]])], + CFLAGS="$OLD_CFLAGS" + AC_SUBST(NOEXECSTACK) + +-ASM= +- +-AC_MSG_CHECKING([whether to enable ASM optimizations]) +-AC_ARG_ENABLE(asm, +- AS_HELP_STRING([--enable-asm],[enable/disable to control ASM optimizations])) ++MD5_ASM= + +-if test x"$enable_asm" = x""; then ++AC_MSG_CHECKING([whether to enable MD5 ASM optimizations]) ++AC_ARG_ENABLE(md5-asm, ++ AS_HELP_STRING([--enable-md5-asm],[enable/disable to control MD5 ASM optimizations])) ++ ++if test x"$enable_md5_asm" = x""; then + case "$host_os" in + *linux*) ;; +- *) enable_asm=no ;; ++ *) enable_md5_asm=no ;; + esac + fi + +-if test x"$enable_asm" != x"no"; then ++if test x"$enable_md5_asm" != x"no"; then + if test x"$host_cpu" = x"x86_64" || test x"$host_cpu" = x"amd64"; then +- ASM="$host_cpu" +- elif test x"$enable_asm" = x"yes"; then ++ MD5_ASM="$host_cpu" ++ elif test x"$enable_md5_asm" = x"yes"; then + AC_MSG_RESULT(unavailable) + AC_MSG_ERROR(The ASM optimizations are currently x86_64|amd64 only. +-Omit --enable-asm to continue without it.) ++Omit --enable-md5-asm to continue without it.) + fi + fi + +-if test x"$ASM" != x""; then +- AC_MSG_RESULT([yes ($ASM)]) +- AC_DEFINE(HAVE_ASM, 1, [Define to 1 to enable ASM optimizations]) +- ASM='$(ASM_'"$ASM)" ++if test x"$MD5_ASM" != x""; then ++ AC_MSG_RESULT([yes ($MD5_ASM)]) ++ AC_DEFINE(USE_MD5_ASM, 1, [Define to 1 to enable MD5 ASM optimizations]) ++ MD5_ASM='$(MD5_ASM_'"$MD5_ASM)" ++else ++ AC_MSG_RESULT(no) ++fi ++ ++AC_SUBST(MD5_ASM) ++ ++ROLL_ASM= ++ ++AC_MSG_CHECKING([whether to enable rolling-checksum ASM optimizations]) ++AC_ARG_ENABLE(roll-asm, ++ AS_HELP_STRING([--enable-roll-asm],[enable/disable to control rolling-checksum ASM optimizations (requires --enable-roll-simd)])) ++ ++if test x"$ROLL_SIMD" = x""; then ++ enable_roll_asm=no ++fi ++ ++if test x"$enable_roll_asm" = x"yes"; then ++ ROLL_ASM="$host_cpu" ++ AC_MSG_RESULT([yes ($ROLL_ASM)]) ++ AC_DEFINE(USE_ROLL_ASM, 1, [Define to 1 to enable rolling-checksum ASM optimizations (requires --enable-roll-simd)]) ++ ROLL_ASM='$(ROLL_ASM_'"$ROLL_ASM)" + else + AC_MSG_RESULT(no) + fi + +-AC_SUBST(ASM) ++AC_SUBST(ROLL_ASM) + + # arrgh. libc in some old debian version screwed up the largefile + # stuff, getting byte range locking wrong +diff --git a/usage.c b/usage.c +index db13535..e710d84 100644 +--- a/usage.c ++++ b/usage.c +@@ -139,20 +139,24 @@ static void print_info_flags(enum logcode f) + + "*Optimizations", + +-#ifndef HAVE_SIMD ++#ifndef USE_ROLL_SIMD + "no " + #endif +- "SIMD", ++ "SIMD-roll", + +-#ifndef HAVE_ASM ++#ifndef USE_ROLL_ASM + "no " + #endif +- "asm", ++ "asm-roll", + + #ifndef USE_OPENSSL + "no " + #endif + "openssl-crypto", ++#ifndef USE_MD5_ASM ++ "no " ++#endif ++ "asm-MD5", + + NULL + }; +-- +2.27.0 + diff --git a/0002-rsync-3.2.2-runtests.patch b/0002-rsync-3.2.2-runtests.patch new file mode 100644 index 0000000000000000000000000000000000000000..0f682e56c921819d34bced97c3a3d2bb7ba03a76 --- /dev/null +++ b/0002-rsync-3.2.2-runtests.patch @@ -0,0 +1,12 @@ +diff --git a/runtests.sh.old b/runtests.sh +index ecb383e..1cd1d1a 100755 +--- a/runtests.sh.old ++++ b/runtests.sh +@@ -276,6 +276,7 @@ do + + case "$testscript" in + *hardlinks*) TESTRUN_TIMEOUT=600 ;; ++ *default-acls*) continue ;; + *) TESTRUN_TIMEOUT=300 ;; + esac + diff --git a/0003-rsync-3.2.4-hello-test.patch b/0003-rsync-3.2.4-hello-test.patch new file mode 100644 index 0000000000000000000000000000000000000000..e6bb4b727f7986d39505169cd9e4c08a53bd2fd7 --- /dev/null +++ b/0003-rsync-3.2.4-hello-test.patch @@ -0,0 +1,31 @@ +diff --git a/testsuite/00-hello.test b/testsuite/00-hello.test +index a359753..ec0279a 100644 +--- a/testsuite/00-hello.test ++++ b/testsuite/00-hello.test +@@ -29,7 +29,7 @@ append_line test1 + checkit "$RSYNC -ai '$fromdir/' '$todir/'" "$fromdir" "$todir" + + copy_weird() { +- checkit "$RSYNC $1 \"$2$fromdir/$weird_name/\" \"$3$todir/$weird_name\"" "$fromdir" "$todir" ++ checkit "$RSYNC $1 --rsync-path='$RSYNC' '$2$fromdir/$weird_name/' '$3$todir/$weird_name'" "$fromdir" "$todir" + } + + append_line test2 +@@ -47,7 +47,7 @@ copy_weird '-ais' '' 'lh:' + echo test6 + + touch "$fromdir/one" "$fromdir/two" +-(cd "$fromdir" && $RSYNC -ai --old-args lh:'one two' "$todir/") ++(cd "$fromdir" && $RSYNC -ai --old-args --rsync-path="$RSYNC" lh:'one two' "$todir/") + if [ ! -f "$todir/one" ] || [ ! -f "$todir/two" ]; then + test_fail "old-args copy of 'one two' failed" + fi +@@ -55,7 +55,7 @@ fi + echo test7 + + rm "$todir/one" "$todir/two" +-(cd "$fromdir" && RSYNC_OLD_ARGS=1 $RSYNC -ai lh:'one two' "$todir/") ++(cd "$fromdir" && RSYNC_OLD_ARGS=1 $RSYNC -ai --rsync-path="$RSYNC" lh:'one two' "$todir/") + + # The script would have aborted on error, so getting here means we've won. + exit 0 diff --git a/0004-cve-2018-25032.patch b/0004-cve-2018-25032.patch new file mode 100644 index 0000000000000000000000000000000000000000..6e558996110c2730474957fb2df960292618f7ae --- /dev/null +++ b/0004-cve-2018-25032.patch @@ -0,0 +1,343 @@ +From 5c44459c3b28a9bd3283aaceab7c615f8020c531 Mon Sep 17 00:00:00 2001 +From: Mark Adler +Date: Tue, 17 Apr 2018 22:09:22 -0700 +Subject: [PATCH] Fix a bug that can crash deflate on some input when using + Z_FIXED. + +This bug was reported by Danilo Ramos of Eideticom, Inc. It has +lain in wait 13 years before being found! The bug was introduced +in zlib 1.2.2.2, with the addition of the Z_FIXED option. That +option forces the use of fixed Huffman codes. For rare inputs with +a large number of distant matches, the pending buffer into which +the compressed data is written can overwrite the distance symbol +table which it overlays. That results in corrupted output due to +invalid distances, and can result in out-of-bound accesses, +crashing the application. + +The fix here combines the distance buffer and literal/length +buffers into a single symbol buffer. Now three bytes of pending +buffer space are opened up for each literal or length/distance +pair consumed, instead of the previous two bytes. This assures +that the pending buffer cannot overwrite the symbol table, since +the maximum fixed code compressed length/distance is 31 bits, and +since there are four bytes of pending space for every three bytes +of symbol space. +--- + deflate.c | 74 ++++++++++++++++++++++++++++++++++++++++--------------- + deflate.h | 25 +++++++++---------- + trees.c | 50 +++++++++++-------------------------- + 3 files changed, 79 insertions(+), 70 deletions(-) + +diff --git a/zlib/deflate.c b/zlib/deflate.c +index 425babc00..19cba873a 100644 +--- a/zlib/deflate.c ++++ b/zlib/deflate.c +@@ -255,11 +255,6 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, + int wrap = 1; + static const char my_version[] = ZLIB_VERSION; + +- ushf *overlay; +- /* We overlay pending_buf and d_buf+l_buf. This works since the average +- * output size for (length,distance) codes is <= 24 bits. +- */ +- + if (version == Z_NULL || version[0] != my_version[0] || + stream_size != sizeof(z_stream)) { + return Z_VERSION_ERROR; +@@ -329,9 +324,47 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, + + s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ + +- overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2); +- s->pending_buf = (uchf *) overlay; +- s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L); ++ /* We overlay pending_buf and sym_buf. This works since the average size ++ * for length/distance pairs over any compressed block is assured to be 31 ++ * bits or less. ++ * ++ * Analysis: The longest fixed codes are a length code of 8 bits plus 5 ++ * extra bits, for lengths 131 to 257. The longest fixed distance codes are ++ * 5 bits plus 13 extra bits, for distances 16385 to 32768. The longest ++ * possible fixed-codes length/distance pair is then 31 bits total. ++ * ++ * sym_buf starts one-fourth of the way into pending_buf. So there are ++ * three bytes in sym_buf for every four bytes in pending_buf. Each symbol ++ * in sym_buf is three bytes -- two for the distance and one for the ++ * literal/length. As each symbol is consumed, the pointer to the next ++ * sym_buf value to read moves forward three bytes. From that symbol, up to ++ * 31 bits are written to pending_buf. The closest the written pending_buf ++ * bits gets to the next sym_buf symbol to read is just before the last ++ * code is written. At that time, 31*(n-2) bits have been written, just ++ * after 24*(n-2) bits have been consumed from sym_buf. sym_buf starts at ++ * 8*n bits into pending_buf. (Note that the symbol buffer fills when n-1 ++ * symbols are written.) The closest the writing gets to what is unread is ++ * then n+14 bits. Here n is lit_bufsize, which is 16384 by default, and ++ * can range from 128 to 32768. ++ * ++ * Therefore, at a minimum, there are 142 bits of space between what is ++ * written and what is read in the overlain buffers, so the symbols cannot ++ * be overwritten by the compressed data. That space is actually 139 bits, ++ * due to the three-bit fixed-code block header. ++ * ++ * That covers the case where either Z_FIXED is specified, forcing fixed ++ * codes, or when the use of fixed codes is chosen, because that choice ++ * results in a smaller compressed block than dynamic codes. That latter ++ * condition then assures that the above analysis also covers all dynamic ++ * blocks. A dynamic-code block will only be chosen to be emitted if it has ++ * fewer bits than a fixed-code block would for the same set of symbols. ++ * Therefore its average symbol length is assured to be less than 31. So ++ * the compressed data for a dynamic block also cannot overwrite the ++ * symbols from which it is being constructed. ++ */ ++ ++ s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, 4); ++ s->pending_buf_size = (ulg)s->lit_bufsize * 4; + + if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || + s->pending_buf == Z_NULL) { +@@ -340,8 +373,12 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, + deflateEnd (strm); + return Z_MEM_ERROR; + } +- s->d_buf = overlay + s->lit_bufsize/sizeof(ush); +- s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize; ++ s->sym_buf = s->pending_buf + s->lit_bufsize; ++ s->sym_end = (s->lit_bufsize - 1) * 3; ++ /* We avoid equality with lit_bufsize*3 because of wraparound at 64K ++ * on 16 bit machines and because stored blocks are restricted to ++ * 64K-1 bytes. ++ */ + + s->level = level; + s->strategy = strategy; +@@ -552,7 +589,7 @@ int ZEXPORT deflatePrime (strm, bits, value) + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + s = strm->state; +- if ((Bytef *)(s->d_buf) < s->pending_out + ((Buf_size + 7) >> 3)) ++ if (s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3)) + return Z_BUF_ERROR; + do { + put = Buf_size - s->bi_valid; +@@ -1113,7 +1150,6 @@ int ZEXPORT deflateCopy (dest, source) + #else + deflate_state *ds; + deflate_state *ss; +- ushf *overlay; + + + if (source == Z_NULL || dest == Z_NULL || source->state == Z_NULL) { +@@ -1133,8 +1169,7 @@ int ZEXPORT deflateCopy (dest, source) + ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte)); + ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos)); + ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos)); +- overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2); +- ds->pending_buf = (uchf *) overlay; ++ ds->pending_buf = (uchf *) ZALLOC(dest, ds->lit_bufsize, 4); + + if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL || + ds->pending_buf == Z_NULL) { +@@ -1148,8 +1183,7 @@ int ZEXPORT deflateCopy (dest, source) + zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); + + ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); +- ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush); +- ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize; ++ ds->sym_buf = ds->pending_buf + ds->lit_bufsize; + + ds->l_desc.dyn_tree = ds->dyn_ltree; + ds->d_desc.dyn_tree = ds->dyn_dtree; +@@ -1771,7 +1771,7 @@ local block_state deflate_fast(s, flush) + FLUSH_BLOCK(s, 1); + return finish_done; + } +- if (s->last_lit) ++ if (s->sym_next) + FLUSH_BLOCK(s, 0); + return block_done; + } +@@ -1912,7 +1912,7 @@ local block_state deflate_slow(s, flush) + FLUSH_BLOCK(s, 1); + return finish_done; + } +- if (s->last_lit) ++ if (s->sym_next) + FLUSH_BLOCK(s, 0); + return block_done; + } +@@ -1987,7 +1987,7 @@ local block_state deflate_rle(s, flush) + FLUSH_BLOCK(s, 1); + return finish_done; + } +- if (s->last_lit) ++ if (s->sym_next) + FLUSH_BLOCK(s, 0); + return block_done; + } +@@ -2026,7 +2026,7 @@ local block_state deflate_huff(s, flush) + FLUSH_BLOCK(s, 1); + return finish_done; + } +- if (s->last_lit) ++ if (s->sym_next) + FLUSH_BLOCK(s, 0); + return block_done; + } +diff --git a/zlib/deflate.h b/zlib/deflate.h +index 23ecdd312..d4cf1a98b 100644 +--- a/zlib/deflate.h ++++ b/zlib/deflate.h +@@ -217,7 +217,7 @@ typedef struct internal_state { + /* Depth of each subtree used as tie breaker for trees of equal frequency + */ + +- uchf *l_buf; /* buffer for literals or lengths */ ++ uchf *sym_buf; /* buffer for distances and literals/lengths */ + + uInt lit_bufsize; + /* Size of match buffer for literals/lengths. There are 4 reasons for +@@ -239,13 +239,8 @@ typedef struct internal_state { + * - I can't count above 4 + */ + +- uInt last_lit; /* running index in l_buf */ +- +- ushf *d_buf; +- /* Buffer for distances. To simplify the code, d_buf and l_buf have +- * the same number of elements. To use different lengths, an extra flag +- * array would be necessary. +- */ ++ uInt sym_next; /* running index in sym_buf */ ++ uInt sym_end; /* symbol table full when sym_next reaches this */ + + ulg opt_len; /* bit length of current block with optimal trees */ + ulg static_len; /* bit length of current block with static trees */ +@@ -317,20 +317,22 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf, + + # define _tr_tally_lit(s, c, flush) \ + { uch cc = (c); \ +- s->d_buf[s->last_lit] = 0; \ +- s->l_buf[s->last_lit++] = cc; \ ++ s->sym_buf[s->sym_next++] = 0; \ ++ s->sym_buf[s->sym_next++] = 0; \ ++ s->sym_buf[s->sym_next++] = cc; \ + s->dyn_ltree[cc].Freq++; \ +- flush = (s->last_lit == s->lit_bufsize-1); \ ++ flush = (s->sym_next == s->sym_end); \ + } + # define _tr_tally_dist(s, distance, length, flush) \ + { uch len = (length); \ + ush dist = (distance); \ +- s->d_buf[s->last_lit] = dist; \ +- s->l_buf[s->last_lit++] = len; \ ++ s->sym_buf[s->sym_next++] = dist; \ ++ s->sym_buf[s->sym_next++] = dist >> 8; \ ++ s->sym_buf[s->sym_next++] = len; \ + dist--; \ + s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ + s->dyn_dtree[d_code(dist)].Freq++; \ +- flush = (s->last_lit == s->lit_bufsize-1); \ ++ flush = (s->sym_next == s->sym_end); \ + } + #else + # define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) +diff --git a/zlib/trees.c b/zlib/trees.c +index 4f4a65011..decaeb7c3 100644 +--- a/zlib/trees.c ++++ b/zlib/trees.c +@@ -416,7 +416,7 @@ local void init_block(s) + + s->dyn_ltree[END_BLOCK].Freq = 1; + s->opt_len = s->static_len = 0L; +- s->last_lit = s->matches = 0; ++ s->sym_next = s->matches = 0; + } + + #define SMALLEST 1 +@@ -948,7 +948,7 @@ void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last) + + Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", + opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, +- s->last_lit)); ++ s->sym_next / 3)); + + if (static_lenb <= opt_lenb) opt_lenb = static_lenb; + +@@ -1017,8 +1017,9 @@ int ZLIB_INTERNAL _tr_tally (s, dist, lc) + unsigned dist; /* distance of matched string */ + unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */ + { +- s->d_buf[s->last_lit] = (ush)dist; +- s->l_buf[s->last_lit++] = (uch)lc; ++ s->sym_buf[s->sym_next++] = dist; ++ s->sym_buf[s->sym_next++] = dist >> 8; ++ s->sym_buf[s->sym_next++] = lc; + if (dist == 0) { + /* lc is the unmatched char */ + s->dyn_ltree[lc].Freq++; +@@ -1033,30 +1034,7 @@ int ZLIB_INTERNAL _tr_tally (s, dist, lc) + s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++; + s->dyn_dtree[d_code(dist)].Freq++; + } +- +-#ifdef TRUNCATE_BLOCK +- /* Try to guess if it is profitable to stop the current block here */ +- if ((s->last_lit & 0x1fff) == 0 && s->level > 2) { +- /* Compute an upper bound for the compressed length */ +- ulg out_length = (ulg)s->last_lit*8L; +- ulg in_length = (ulg)((long)s->strstart - s->block_start); +- int dcode; +- for (dcode = 0; dcode < D_CODES; dcode++) { +- out_length += (ulg)s->dyn_dtree[dcode].Freq * +- (5L+extra_dbits[dcode]); +- } +- out_length >>= 3; +- Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ", +- s->last_lit, in_length, out_length, +- 100L - out_length*100L/in_length)); +- if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1; +- } +-#endif +- return (s->last_lit == s->lit_bufsize-1); +- /* We avoid equality with lit_bufsize because of wraparound at 64K +- * on 16 bit machines and because stored blocks are restricted to +- * 64K-1 bytes. +- */ ++ return (s->sym_next == s->sym_end); + } + + /* =========================================================================== +@@ -1069,13 +1047,14 @@ local void compress_block(s, ltree, dtree) + { + unsigned dist; /* distance of matched string */ + int lc; /* match length or unmatched char (if dist == 0) */ +- unsigned lx = 0; /* running index in l_buf */ ++ unsigned sx = 0; /* running index in sym_buf */ + unsigned code; /* the code to send */ + int extra; /* number of extra bits to send */ + +- if (s->last_lit != 0) do { +- dist = s->d_buf[lx]; +- lc = s->l_buf[lx++]; ++ if (s->sym_next != 0) do { ++ dist = s->sym_buf[sx++] & 0xff; ++ dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8; ++ lc = s->sym_buf[sx++]; + if (dist == 0) { + send_code(s, lc, ltree); /* send a literal byte */ + Tracecv(isgraph(lc), (stderr," '%c' ", lc)); +@@ -1100,11 +1079,10 @@ local void compress_block(s, ltree, dtree) + } + } /* literal or match pair ? */ + +- /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */ +- Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx, +- "pendingBuf overflow"); ++ /* Check that the overlay between pending_buf and sym_buf is ok: */ ++ Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow"); + +- } while (lx < s->last_lit); ++ } while (sx < s->sym_next); + + send_code(s, END_BLOCK, ltree); + } diff --git a/0005-restart-daemon-on-failure.patch b/0005-restart-daemon-on-failure.patch new file mode 100644 index 0000000000000000000000000000000000000000..0afba61f7f7db42122b7bf68950932fb2d5f0c8c --- /dev/null +++ b/0005-restart-daemon-on-failure.patch @@ -0,0 +1,27 @@ +From d41bb98c09bf0b999c4eee4e2125c7e5d0747ec4 Mon Sep 17 00:00:00 2001 +From: Simon Deziel +Date: Mon, 11 Apr 2022 12:08:11 -0400 +Subject: [PATCH] systemd: restart daemon on-failure (#302) + +man 5 systemd.service: +> Setting this to on-failure is the recommended choice for long-running services + +Partial fix for https://bugzilla.samba.org/show_bug.cgi?id=13463 + +Signed-off-by: Simon Deziel +--- + packaging/systemd/rsync.service | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/packaging/systemd/rsync.service b/packaging/systemd/rsync.service +index 8a0b5820..8a867ca6 100644 +--- a/packaging/systemd/rsync.service ++++ b/packaging/systemd/rsync.service +@@ -7,6 +7,7 @@ Documentation=man:rsync(1) man:rsyncd.conf(5) + [Service] + ExecStart=/usr/bin/rsync --daemon --no-detach + RestartSec=1 ++Restart=on-failure + + # Citing README.md: + # diff --git a/rsync-patches-3.2.4pre3.tar.gz b/rsync-patches-3.2.4pre3.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9991ca62145feabb05bcc18a9578d1e6363c1f46 Binary files /dev/null and b/rsync-patches-3.2.4pre3.tar.gz differ diff --git a/rsync.spec b/rsync.spec new file mode 100644 index 0000000000000000000000000000000000000000..b75c124b1fdf52ca7f26efe9a300410fcbd08a6d --- /dev/null +++ b/rsync.spec @@ -0,0 +1,117 @@ +%define anolis_release 1 +%define pre_release pre3 +%define version_num 3.2.4 + +Name: rsync +Version: %{version_num}~%{pre_release} +Release: %{anolis_release}%{?dist} +Summary: A program for synchronizing files over a network + +License: GPLv3+ +URL: https://github.com/WayneD/rsync +Source0: https://github.com/WayneD/rsync/archive/refs/tags/v%{version_num}%{pre_release}.tar.gz +Source1: https://github.com/WayneD/rsync/archive/refs/tags/%{name}-patches-%{version_num}%{pre_release}.tar.gz +Source2: rsyncd.socket +Source3: rsyncd.service +Source4: rsyncd.conf +Source5: rsyncd.sysconfig +Source6: rsyncd@.service + +BuildRequires: make vim +BuildRequires: gcc +BuildRequires: gcc-c++ +BuildRequires: libacl-devel +BuildRequires: libattr-devel +BuildRequires: autoconf automake +BuildRequires: popt-devel +BuildRequires: systemd +BuildRequires: lz4-devel +BuildRequires: openssl-devel libtool-ltdl +BuildRequires: libzstd-devel +BuildRequires: xxhash-devel +BuildRequires: python3-cmarkgfm + +#needed to make hello test run correctly +Patch0: 0000-bugfix-test-md5-check-failure-1.patch +Patch1: 0001-bugfix-test-md5-check-failure-2.patch +Patch2: 0002-rsync-3.2.2-runtests.patch +Patch3: 0003-rsync-3.2.4-hello-test.patch + +Patch4: 0004-cve-2018-25032.patch +Patch5: 0005-restart-daemon-on-failure.patch + +%description +Rsync uses a reliable algorithm to bring remote and host files into +sync very quickly. Rsync is fast because it just sends the differences +in the files over the network instead of sending the complete +files. Rsync is often used as a very powerful mirroring process or +just as a more capable replacement for the rcp command. A technical +report which describes the rsync algorithm is included in this +package. + +%package daemon +Summary: Service for anonymous access to rsync +BuildArch: noarch +Requires: %{name} = %{version}-%{release} +%{?systemd_requires} + +%description daemon +Rsync can be used to offer read only access to anonymous clients. This +package provides the anonymous rsync service. + +%prep +%autosetup -b 1 -n %{name}-%{version_num}%{pre_release} -p1 + +#Enable --copy-devices parameter +patch -p1 -i patches/copy-devices.diff + +%build +%configure \ + --enable-openssl \ + --enable-xxhash \ + --enable-zstd \ + --enable-lz4 \ + --enable-ipv6 + +%make_build + +%install +%make_install +install -D -m644 %{SOURCE2} %{buildroot}/%{_unitdir}/rsyncd.socket +install -D -m644 %{SOURCE3} %{buildroot}/%{_unitdir}/rsyncd.service +install -D -m644 %{SOURCE4} %{buildroot}/%{_sysconfdir}/rsyncd.conf +install -D -m644 %{SOURCE5} %{buildroot}/%{_sysconfdir}/sysconfig/rsyncd +install -D -m644 %{SOURCE6} %{buildroot}/%{_unitdir}/rsyncd@.service + +%check +make check +chmod -x support/* + +%post daemon +%systemd_post rsyncd.service + +%preun daemon +%systemd_preun rsyncd.service + +%postun daemon +%systemd_postun_with_restart rsyncd.service + +%files +%license COPYING +%doc support/ tech_report.tex +%{_bindir}/%{name} +%{_bindir}/%{name}-ssl +%{_mandir}/man1/%{name}.1* +%{_mandir}/man1/%{name}-ssl.1* +%{_mandir}/man5/rsyncd.conf.5* +%config(noreplace) %{_sysconfdir}/rsyncd.conf + +%files daemon +%config(noreplace) %{_sysconfdir}/sysconfig/rsyncd +%{_unitdir}/rsyncd.socket +%{_unitdir}/rsyncd.service +%{_unitdir}/rsyncd@.service + +%changelog +* Thu Apr 14 2022 happy_orange - 3.2.4~pre3-1 +- Init package from upstream diff --git a/rsyncd.conf b/rsyncd.conf new file mode 100644 index 0000000000000000000000000000000000000000..6e058aa2c6bcf142707adc8acd0f353dd9f08ba3 --- /dev/null +++ b/rsyncd.conf @@ -0,0 +1,20 @@ +# /etc/rsyncd: configuration file for rsync daemon mode + +# See rsyncd.conf man page for more options. + +# configuration example: + +# uid = nobody +# gid = nobody +# use chroot = yes +# max connections = 4 +# pid file = /var/run/rsyncd.pid +# exclude = lost+found/ +# transfer logging = yes +# timeout = 900 +# ignore nonreadable = yes +# dont compress = *.gz *.tgz *.zip *.z *.Z *.rpm *.deb *.bz2 + +# [ftp] +# path = /home/ftp +# comment = ftp export area diff --git a/rsyncd.service b/rsyncd.service new file mode 100644 index 0000000000000000000000000000000000000000..d2d6362e874a028bb4ef82df466b2b55a3ed2449 --- /dev/null +++ b/rsyncd.service @@ -0,0 +1,12 @@ +[Unit] +Description=fast remote file copy program daemon +ConditionPathExists=/etc/rsyncd.conf +Wants=network-online.target +After=network-online.target + +[Service] +EnvironmentFile=/etc/sysconfig/rsyncd +ExecStart=/usr/bin/rsync --daemon --no-detach "$OPTIONS" + +[Install] +WantedBy=multi-user.target diff --git a/rsyncd.socket b/rsyncd.socket new file mode 100644 index 0000000000000000000000000000000000000000..7306ad0fae875c5bac08d007f0c5d2f9d557c36e --- /dev/null +++ b/rsyncd.socket @@ -0,0 +1,10 @@ +[Unit] +Description=Rsync Server Socket +Conflicts=rsyncd.service + +[Socket] +ListenStream=873 +Accept=yes + +[Install] +WantedBy=sockets.target diff --git a/rsyncd.sysconfig b/rsyncd.sysconfig new file mode 100644 index 0000000000000000000000000000000000000000..90a5a43d0dd6339257675f7af97a0fa21832d446 --- /dev/null +++ b/rsyncd.sysconfig @@ -0,0 +1 @@ +OPTIONS="" diff --git a/rsyncd@.service b/rsyncd@.service new file mode 100644 index 0000000000000000000000000000000000000000..89f96214a40aa3964d4b0f37dfe1927de56bf70e --- /dev/null +++ b/rsyncd@.service @@ -0,0 +1,8 @@ +[Unit] +Description=fast remote file copy program daemon +ConditionPathExists=/etc/rsyncd.conf + +[Service] +EnvironmentFile=/etc/sysconfig/rsyncd +ExecStart=/usr/bin/rsync --daemon --no-detach "$OPTIONS" +StandardInput=socket diff --git a/v3.2.4pre3.tar.gz b/v3.2.4pre3.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..41ff6344ff07166e9ce79d0953eb11e3229bf461 Binary files /dev/null and b/v3.2.4pre3.tar.gz differ