diff --git a/0000-bugfix-test-md5-check-failure-1.patch b/0000-bugfix-test-md5-check-failure-1.patch
new file mode 100644
index 0000000000000000000000000000000000000000..2e8b3d3c5a0316185315750bec8b98ad61824909
--- /dev/null
+++ b/0000-bugfix-test-md5-check-failure-1.patch
@@ -0,0 +1,551 @@
+From b81a5095563776397a4239132d2b737a1083e02f Mon Sep 17 00:00:00 2001
+From: Wayne Davison <wayne@opencoder.net>
+Date: Thu, 3 Mar 2022 17:00:57 -0800
+Subject: [PATCH] Make asm use more selectable
+
+- Make the SIMD ASM code off by default. Use configure --enable-simd-asm
+  to enable.
+- Allow MD5 ASM code to be requested even when OpenSSL is handling MD4
+  checksums. Use configure --enable-md5-asm to enable.
+---
+ Makefile.in              |  15 ++--
+ checksum.c               |  34 ++++-----
+ lib/md5-asm-x86_64.S     |   4 +-
+ lib/md5.c                |  19 ++---
+ lib/mdigest.h            |  13 ++--
+ rsync.h                  |   9 +--
+ simd-checksum-avx2.S     |  14 +++-
+ simd-checksum-x86_64.cpp | 151 ++++++++++++++++++++++++++++++++++++---
+ 8 files changed, 198 insertions(+), 62 deletions(-)
+
+diff --git a/Makefile.in b/Makefile.in
+index 8817edab..3cde9557 100644
+--- a/Makefile.in
++++ b/Makefile.in
+@@ -30,8 +30,9 @@ SHELL=/bin/sh
+ .SUFFIXES:
+ .SUFFIXES: .c .o
+ 
+-SIMD_x86_64=simd-checksum-x86_64.o simd-checksum-avx2.o
+-ASM_x86_64=lib/md5-asm-x86_64.o
++ROLL_SIMD_x86_64=simd-checksum-x86_64.o
++ROLL_ASM_x86_64=simd-checksum-avx2.o
++MD5_ASM_x86_64=lib/md5-asm-x86_64.o
+ 
+ GENFILES=configure.sh aclocal.m4 config.h.in rsync.1 rsync.1.html \
+ 	 rsync-ssl.1 rsync-ssl.1.html rsyncd.conf.5 rsyncd.conf.5.html \
+@@ -46,7 +47,7 @@ OBJS1=flist.o rsync.o generator.o receiver.o cleanup.o sender.o exclude.o \
+ 	util1.o util2.o main.o checksum.o match.o syscall.o log.o backup.o delete.o
+ OBJS2=options.o io.o compat.o hlink.o token.o uidlist.o socket.o hashtable.o \
+ 	usage.o fileio.o batch.o clientname.o chmod.o acls.o xattrs.o
+-OBJS3=progress.o pipe.o @ASM@ @SIMD@
++OBJS3=progress.o pipe.o @MD5_ASM@ @ROLL_SIMD@ @ROLL_ASM@
+ DAEMON_OBJ = params.o loadparm.o clientserver.o access.o connection.o authenticate.o
+ popt_OBJS=popt/findme.o  popt/popt.o  popt/poptconfig.o \
+ 	popt/popthelp.o popt/poptparse.o
+@@ -147,13 +148,13 @@ git-version.h: ALWAYS_RUN
+ ALWAYS_RUN:
+ 
+ simd-checksum-x86_64.o: simd-checksum-x86_64.cpp
+-	@$(srcdir)/cmd-or-msg disable-simd $(CXX) -I. $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $(srcdir)/simd-checksum-x86_64.cpp
++	@$(srcdir)/cmd-or-msg disable-roll-simd $(CXX) -I. $(CXXFLAGS) $(CPPFLAGS) -c -o $@ $(srcdir)/simd-checksum-x86_64.cpp
+ 
+ simd-checksum-avx2.o: simd-checksum-avx2.S
+-	@$(srcdir)/cmd-or-msg disable-asm $(CC) $(CFLAGS) --include=$(srcdir)/rsync.h -DAVX2_ASM -I. @NOEXECSTACK@ -c -o $@ $(srcdir)/simd-checksum-avx2.S
++	@$(srcdir)/cmd-or-msg disable-roll-asm $(CC) $(CFLAGS) -I. @NOEXECSTACK@ -c -o $@ $(srcdir)/simd-checksum-avx2.S
+ 
+-lib/md5-asm-x86_64.o: lib/md5-asm-x86_64.S config.h lib/md-defines.h
+-	@$(srcdir)/cmd-or-msg disable-asm $(CC) -I. @NOEXECSTACK@ -c -o $@ $(srcdir)/lib/md5-asm-x86_64.S
++lib/md5-asm-x86_64.o: lib/md5-asm-x86_64.S lib/md-defines.h
++	@$(srcdir)/cmd-or-msg disable-md5-asm $(CC) -I. @NOEXECSTACK@ -c -o $@ $(srcdir)/lib/md5-asm-x86_64.S
+ 
+ tls$(EXEEXT): $(TLS_OBJ)
+ 	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(TLS_OBJ) $(LIBS)
+diff --git a/checksum.c b/checksum.c
+index 1ed76828..77848585 100644
+--- a/checksum.c
++++ b/checksum.c
+@@ -179,7 +179,7 @@ int canonical_checksum(int csum_type)
+ 	return 0;
+ }
+ 
+-#ifndef HAVE_SIMD /* See simd-checksum-*.cpp. */
++#ifndef USE_ROLL_SIMD /* See simd-checksum-*.cpp. */
+ /*
+   a simple 32 bit checksum that can be updated from either end
+   (inspired by Mark Adler's Adler-32 checksum)
+@@ -222,23 +222,23 @@ void get_checksum2(char *buf, int32 len, char *sum)
+ 	  }
+ #endif
+ 	  case CSUM_MD5: {
+-		MD5_CTX m5;
++		md5_context m5;
+ 		uchar seedbuf[4];
+-		MD5_Init(&m5);
++		md5_begin(&m5);
+ 		if (proper_seed_order) {
+ 			if (checksum_seed) {
+ 				SIVALu(seedbuf, 0, checksum_seed);
+-				MD5_Update(&m5, seedbuf, 4);
++				md5_update(&m5, seedbuf, 4);
+ 			}
+-			MD5_Update(&m5, (uchar *)buf, len);
++			md5_update(&m5, (uchar *)buf, len);
+ 		} else {
+-			MD5_Update(&m5, (uchar *)buf, len);
++			md5_update(&m5, (uchar *)buf, len);
+ 			if (checksum_seed) {
+ 				SIVALu(seedbuf, 0, checksum_seed);
+-				MD5_Update(&m5, seedbuf, 4);
++				md5_update(&m5, seedbuf, 4);
+ 			}
+ 		}
+-		MD5_Final((uchar *)sum, &m5);
++		md5_result(&m5, (uchar *)sum);
+ 		break;
+ 	  }
+ 	  case CSUM_MD4:
+@@ -374,18 +374,18 @@ void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum)
+ 	  }
+ #endif
+ 	  case CSUM_MD5: {
+-		MD5_CTX m5;
++		md5_context m5;
+ 
+-		MD5_Init(&m5);
++		md5_begin(&m5);
+ 
+ 		for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE)
+-			MD5_Update(&m5, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
++			md5_update(&m5, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE);
+ 
+ 		remainder = (int32)(len - i);
+ 		if (remainder > 0)
+-			MD5_Update(&m5, (uchar *)map_ptr(buf, i, remainder), remainder);
++			md5_update(&m5, (uchar *)map_ptr(buf, i, remainder), remainder);
+ 
+-		MD5_Final((uchar *)sum, &m5);
++		md5_result(&m5, (uchar *)sum);
+ 		break;
+ 	  }
+ 	  case CSUM_MD4:
+@@ -443,7 +443,7 @@ static union {
+ #ifdef USE_OPENSSL
+ 	MD4_CTX m4;
+ #endif
+-	MD5_CTX m5;
++	md5_context m5;
+ } ctx;
+ #ifdef SUPPORT_XXHASH
+ static XXH64_state_t* xxh64_state;
+@@ -482,7 +482,7 @@ void sum_init(int csum_type, int seed)
+ 		break;
+ #endif
+ 	  case CSUM_MD5:
+-		MD5_Init(&ctx.m5);
++		md5_begin(&ctx.m5);
+ 		break;
+ 	  case CSUM_MD4:
+ #ifdef USE_OPENSSL
+@@ -532,7 +532,7 @@ void sum_update(const char *p, int32 len)
+ 		break;
+ #endif
+ 	  case CSUM_MD5:
+-		MD5_Update(&ctx.m5, (uchar *)p, len);
++		md5_update(&ctx.m5, (uchar *)p, len);
+ 		break;
+ 	  case CSUM_MD4:
+ #ifdef USE_OPENSSL
+@@ -597,7 +597,7 @@ int sum_end(char *sum)
+ 	  }
+ #endif
+ 	  case CSUM_MD5:
+-		MD5_Final((uchar *)sum, &ctx.m5);
++		md5_result(&ctx.m5, (uchar *)sum);
+ 		break;
+ 	  case CSUM_MD4:
+ #ifdef USE_OPENSSL
+diff --git a/lib/md5-asm-x86_64.S b/lib/md5-asm-x86_64.S
+index 383f193a..3737058f 100644
+--- a/lib/md5-asm-x86_64.S
++++ b/lib/md5-asm-x86_64.S
+@@ -27,7 +27,7 @@
+ #include "config.h"
+ #include "md-defines.h"
+ 
+-#if !defined USE_OPENSSL && CSUM_CHUNK == 64
++#ifdef USE_MD5_ASM /* { */
+ 
+ #ifdef __APPLE__
+ #define md5_process_asm _md5_process_asm
+@@ -698,4 +698,4 @@ md5_process_asm:
+ 	pop	%rbp
+ 	ret
+ 
+-#endif /* !USE_OPENSSL ... */
++#endif /* } USE_MD5_ASM */
+diff --git a/lib/md5.c b/lib/md5.c
+index 41f158b8..07fd6147 100644
+--- a/lib/md5.c
++++ b/lib/md5.c
+@@ -20,7 +20,7 @@
+ 
+ #include "rsync.h"
+ 
+-#ifndef USE_OPENSSL
++#if !defined USE_OPENSSL || USE_MD5_ASM /* { */
+ void md5_begin(md_context *ctx)
+ {
+ 	ctx->A = 0x67452301;
+@@ -148,7 +148,10 @@ static void md5_process(md_context *ctx, const uchar data[CSUM_CHUNK])
+ 	ctx->D += D;
+ }
+ 
+-#if defined HAVE_ASM && CSUM_CHUNK == 64
++#ifdef USE_MD5_ASM
++#if CSUM_CHUNK != 64
++#error The MD5 ASM code does not support CSUM_CHUNK != 64
++#endif
+ extern void md5_process_asm(md_context *ctx, const void *data, size_t num);
+ #endif
+ 
+@@ -176,20 +179,20 @@ void md5_update(md_context *ctx, const uchar *input, uint32 length)
+ 		left = 0;
+ 	}
+ 
+-#if defined HAVE_ASM && CSUM_CHUNK == 64
++#ifdef USE_MD5_ASM /* { */
+ 	if (length >= CSUM_CHUNK) {
+ 		uint32 chunks = length / CSUM_CHUNK;
+ 		md5_process_asm(ctx, input, chunks);
+ 		length -= chunks * CSUM_CHUNK;
+ 		input += chunks * CSUM_CHUNK;
+ 	}
+-#else
++#else /* } { */
+ 	while (length >= CSUM_CHUNK) {
+ 		md5_process(ctx, input);
+ 		length -= CSUM_CHUNK;
+ 		input  += CSUM_CHUNK;
+ 	}
+-#endif
++#endif /* } */
+ 
+ 	if (length)
+ 		memcpy(ctx->buffer + left, input, length);
+@@ -221,9 +224,9 @@ void md5_result(md_context *ctx, uchar digest[MD5_DIGEST_LEN])
+ 	SIVALu(digest, 8, ctx->C);
+ 	SIVALu(digest, 12, ctx->D);
+ }
+-#endif
++#endif /* } */
+ 
+-#ifdef TEST_MD5
++#ifdef TEST_MD5 /* { */
+ 
+ void get_md5(uchar *out, const uchar *input, int n)
+ {
+@@ -317,4 +320,4 @@ int main(int argc, char *argv[])
+ 	return 0;
+ }
+ 
+-#endif
++#endif /* } */
+diff --git a/lib/mdigest.h b/lib/mdigest.h
+index db174017..f1d6d934 100644
+--- a/lib/mdigest.h
++++ b/lib/mdigest.h
+@@ -17,12 +17,13 @@ void mdfour_begin(md_context *md);
+ void mdfour_update(md_context *md, const uchar *in, uint32 length);
+ void mdfour_result(md_context *md, uchar digest[MD4_DIGEST_LEN]);
+ 
+-#ifndef USE_OPENSSL
+-#define MD5_CTX md_context
+-#define MD5_Init md5_begin
+-#define MD5_Update md5_update
+-#define MD5_Final(digest, cptr) md5_result(cptr, digest)
+-
++#if defined USE_OPENSSL && !defined USE_MD5_ASM
++#define md5_context MD5_CTX
++#define md5_begin MD5_Init
++#define md5_update MD5_Update
++#define md5_result(cptr, digest) MD5_Final(digest, cptr)
++#else
++#define md5_context md_context
+ void md5_begin(md_context *ctx);
+ void md5_update(md_context *ctx, const uchar *input, uint32 length);
+ void md5_result(md_context *ctx, uchar digest[MD5_DIGEST_LEN]);
+diff --git a/rsync.h b/rsync.h
+index 41a014c3..4b30570b 100644
+--- a/rsync.h
++++ b/rsync.h
+@@ -18,11 +18,6 @@
+  * with this program; if not, visit the http://fsf.org website.
+  */
+ 
+-/* a non-zero CHAR_OFFSET makes the rolling sum stronger, but is
+-   incompatible with older versions :-( */
+-#define CHAR_OFFSET 0
+-
+-#ifndef AVX2_ASM /* do not include the rest of file for assembly */
+ #define False 0
+ #define True 1
+ #define Unset (-1) /* Our BOOL values are always an int. */
+@@ -43,6 +38,9 @@
+ 
+ #define BACKUP_SUFFIX "~"
+ 
++/* a non-zero CHAR_OFFSET makes the rolling sum stronger, but is
++   incompatible with older versions :-( */
++#define CHAR_OFFSET 0
+ 
+ /* These flags are only used during the flist transfer. */
+ 
+@@ -1477,7 +1475,6 @@ const char *get_panic_action(void);
+     fprintf(stderr, "%s in %s at line %d\n", msg, __FILE__, __LINE__); \
+     exit_cleanup(RERR_UNSUPPORTED); \
+ } while (0)
+-#endif  /* AVX2_ASM */
+ 
+ #ifdef HAVE_MALLINFO2
+ #define MEM_ALLOC_INFO mallinfo2
+diff --git a/simd-checksum-avx2.S b/simd-checksum-avx2.S
+index dc8d145b..549cc3ef 100644
+--- a/simd-checksum-avx2.S
++++ b/simd-checksum-avx2.S
+@@ -1,15 +1,21 @@
++#include "config.h"
++
++#ifdef USE_ROLL_ASM /* { */
++
++#define CHAR_OFFSET 0 /* Keep this the same as rsync.h, which isn't likely to change. */
++
+ #ifdef __APPLE__
+-#define get_checksum1_avx2  _get_checksum1_avx2
++#define get_checksum1_avx2_asm  _get_checksum1_avx2_asm
+ #endif
+ 
+ .intel_syntax noprefix
+ .text
+ 
+ 	.p2align 5
+-	.globl get_checksum1_avx2
++	.globl get_checksum1_avx2_asm
+ 
+ # rdi=*buf, esi=len, edx=i, rcx= *ps1, r8= *ps2
+-get_checksum1_avx2:
++get_checksum1_avx2_asm:
+ 	vmovd	xmm6,[rcx] # load *ps1
+ 	lea	eax, [rsi-128] # at least 128 bytes to process?
+ 	cmp	edx, eax
+@@ -167,3 +173,5 @@ get_checksum1_avx2:
+ 	.byte 3
+ 	.byte 2
+ 	.byte 1
++
++#endif /* } USE_ROLL_ASM */
+diff --git a/simd-checksum-x86_64.cpp b/simd-checksum-x86_64.cpp
+index ebeeac2d..33f26e92 100644
+--- a/simd-checksum-x86_64.cpp
++++ b/simd-checksum-x86_64.cpp
+@@ -51,12 +51,12 @@
+  * GCC 4.x are not supported to ease configure.ac logic.
+  */
+ 
+-#ifdef __x86_64__
+-#ifdef __cplusplus
++#ifdef __x86_64__ /* { */
++#ifdef __cplusplus /* { */
+ 
+ #include "rsync.h"
+ 
+-#ifdef HAVE_SIMD
++#ifdef USE_ROLL_SIMD /* { */
+ 
+ #include <immintrin.h>
+ 
+@@ -85,6 +85,9 @@ typedef long long __m256i_u __attribute__((__vector_size__(32), __may_alias__, _
+ #define SSE2_HADDS_EPI16(a, b) _mm_adds_epi16(SSE2_INTERLEAVE_EVEN_EPI16(a, b), SSE2_INTERLEAVE_ODD_EPI16(a, b))
+ #define SSE2_MADDUBS_EPI16(a, b) _mm_adds_epi16(SSE2_MULU_EVEN_EPI8(a, b), SSE2_MULU_ODD_EPI8(a, b))
+ 
++#ifndef USE_ROLL_ASM
++__attribute__ ((target("default"))) MVSTATIC int32 get_checksum1_avx2_64(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2) { return i; }
++#endif
+ __attribute__ ((target("default"))) MVSTATIC int32 get_checksum1_ssse3_32(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2) { return i; }
+ __attribute__ ((target("default"))) MVSTATIC int32 get_checksum1_sse2_32(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2) { return i; }
+ 
+@@ -245,7 +248,7 @@ __attribute__ ((target("sse2"))) MVSTATIC int32 get_checksum1_sse2_32(schar* buf
+ 
+             // (4*buf[i] + 3*buf[i+1]), (2*buf[i+2], buf[i+3]), ... 2*[int16*8]
+             __m128i mul_const = _mm_set1_epi32(4 + (3 << 8) + (2 << 16) + (1 << 24));
+-            __m128i mul_add16_1 = SSE2_MADDUBS_EPI16(mul_const, in8_1);
++            __m128i mul_add16_1 = SSE2_MADDUBS_EPI16(mul_const, in8_1);
+             __m128i mul_add16_2 = SSE2_MADDUBS_EPI16(mul_const, in8_2);
+ 
+             // s2 += 32*s1
+@@ -310,7 +313,127 @@ __attribute__ ((target("sse2"))) MVSTATIC int32 get_checksum1_sse2_32(schar* buf
+     return i;
+ }
+ 
+-extern "C" __attribute__ ((target("avx2"))) int32 get_checksum1_avx2(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2);
++#ifdef USE_ROLL_ASM /* { */
++
++extern "C" __attribute__ ((target("avx2"))) int32 get_checksum1_avx2_asm(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2);
++
++#else /* } { */
++
++/*
++  AVX2 loop per 64 bytes:
++    int16 t1[16];
++    int16 t2[16];
++    for (int j = 0; j < 16; j++) {
++      t1[j] = buf[j*4 + i] + buf[j*4 + i+1] + buf[j*4 + i+2] + buf[j*4 + i+3];
++      t2[j] = 4*buf[j*4 + i] + 3*buf[j*4 + i+1] + 2*buf[j*4 + i+2] + buf[j*4 + i+3];
++    }
++    s2 += 64*s1 + (uint32)(
++              60*t1[0] + 56*t1[1] + 52*t1[2] + 48*t1[3] + 44*t1[4] + 40*t1[5] + 36*t1[6] + 32*t1[7] + 28*t1[8] + 24*t1[9] + 20*t1[10] + 16*t1[11] + 12*t1[12] + 8*t1[13] + 4*t1[14] +
++              t2[0] + t2[1] + t2[2] + t2[3] + t2[4] + t2[5] + t2[6] + t2[7] + t2[8] + t2[9] + t2[10] + t2[11] + t2[12] + t2[13] + t2[14] + t2[15]
++          ) + 2080*CHAR_OFFSET;
++    s1 += (uint32)(t1[0] + t1[1] + t1[2] + t1[3] + t1[4] + t1[5] + t1[6] + t1[7] + t1[8] + t1[9] + t1[10] + t1[11] + t1[12] + t1[13] + t1[14] + t1[15]) +
++          64*CHAR_OFFSET;
++ */
++
++__attribute__ ((target("avx2"))) MVSTATIC int32 get_checksum1_avx2_64(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2)
++{
++    if (len > 64) {
++
++        uint32 x[4] = {0};
++        __m128i ss1 = _mm_cvtsi32_si128(*ps1);
++        __m128i ss2 = _mm_cvtsi32_si128(*ps2);
++
++        const char mul_t1_buf[16] = {60, 56, 52, 48, 44, 40, 36, 32, 28, 24, 20, 16, 12, 8, 4, 0};
++	__m128i tmp = _mm_load_si128((__m128i*) mul_t1_buf);
++        __m256i mul_t1 = _mm256_cvtepu8_epi16(tmp);
++	__m256i mul_const = _mm256_broadcastd_epi32(_mm_cvtsi32_si128(4 | (3 << 8) | (2 << 16) | (1 << 24)));
++        __m256i mul_one;
++       	    mul_one = _mm256_abs_epi8(_mm256_cmpeq_epi16(mul_one,mul_one)); // set all vector elements to 1
++
++        for (; i < (len-64); i+=64) {
++            // Load ... 4*[int8*16]
++            __m256i in8_1, in8_2;
++	    __m128i in8_1_low, in8_2_low, in8_1_high, in8_2_high;
++	    in8_1_low = _mm_loadu_si128((__m128i_u*)&buf[i]);
++	    in8_2_low = _mm_loadu_si128((__m128i_u*)&buf[i+16]);
++	    in8_1_high = _mm_loadu_si128((__m128i_u*)&buf[i+32]);
++	    in8_2_high = _mm_loadu_si128((__m128i_u*)&buf[i+48]);
++	    in8_1 = _mm256_inserti128_si256(_mm256_castsi128_si256(in8_1_low), in8_1_high,1);
++	    in8_2 = _mm256_inserti128_si256(_mm256_castsi128_si256(in8_2_low), in8_2_high,1);
++
++            // (1*buf[i] + 1*buf[i+1]), (1*buf[i+2], 1*buf[i+3]), ... 2*[int16*8]
++            // Fastest, even though multiply by 1
++            __m256i add16_1 = _mm256_maddubs_epi16(mul_one, in8_1);
++            __m256i add16_2 = _mm256_maddubs_epi16(mul_one, in8_2);
++
++            // (4*buf[i] + 3*buf[i+1]), (2*buf[i+2], buf[i+3]), ... 2*[int16*8]
++            __m256i mul_add16_1 = _mm256_maddubs_epi16(mul_const, in8_1);
++            __m256i mul_add16_2 = _mm256_maddubs_epi16(mul_const, in8_2);
++
++            // s2 += 64*s1
++            ss2 = _mm_add_epi32(ss2, _mm_slli_epi32(ss1, 6));
++
++            // [sum(t1[0]..t1[7]), X, X, X] [int32*4]; faster than multiple _mm_hadds_epi16
++            __m256i sum_add32 = _mm256_add_epi16(add16_1, add16_2);
++            sum_add32 = _mm256_add_epi16(sum_add32, _mm256_srli_epi32(sum_add32, 16));
++            sum_add32 = _mm256_add_epi16(sum_add32, _mm256_srli_si256(sum_add32, 4));
++            sum_add32 = _mm256_add_epi16(sum_add32, _mm256_srli_si256(sum_add32, 8));
++
++            // [sum(t2[0]..t2[7]), X, X, X] [int32*4]; faster than multiple _mm_hadds_epi16
++            __m256i sum_mul_add32 = _mm256_add_epi16(mul_add16_1, mul_add16_2);
++            sum_mul_add32 = _mm256_add_epi16(sum_mul_add32, _mm256_srli_epi32(sum_mul_add32, 16));
++            sum_mul_add32 = _mm256_add_epi16(sum_mul_add32, _mm256_srli_si256(sum_mul_add32, 4));
++            sum_mul_add32 = _mm256_add_epi16(sum_mul_add32, _mm256_srli_si256(sum_mul_add32, 8));
++
++            // s1 += t1[0] + t1[1] + t1[2] + t1[3] + t1[4] + t1[5] + t1[6] + t1[7]
++	    __m128i sum_add32_hi = _mm256_extracti128_si256(sum_add32, 0x1);
++            ss1 = _mm_add_epi32(ss1, _mm256_castsi256_si128(sum_add32));
++            ss1 = _mm_add_epi32(ss1, sum_add32_hi);
++
++            // s2 += t2[0] + t2[1] + t2[2] + t2[3] + t2[4] + t2[5] + t2[6] + t2[7]
++	    __m128i sum_mul_add32_hi = _mm256_extracti128_si256(sum_mul_add32, 0x1);
++            ss2 = _mm_add_epi32(ss2, _mm256_castsi256_si128(sum_mul_add32));
++            ss2 = _mm_add_epi32(ss2, sum_mul_add32_hi);
++
++            // [t1[0] + t1[1], t1[2] + t1[3] ...] [int16*8]
++            // We could've combined this with generating sum_add32 above and
++            // save an instruction but benchmarking shows that as being slower
++            __m256i add16 = _mm256_hadds_epi16(add16_1, add16_2);
++
++            // [t1[0], t1[1], ...] -> [t1[0]*28 + t1[1]*24, ...] [int32*4]
++            __m256i mul32 = _mm256_madd_epi16(add16, mul_t1);
++
++            // [sum(mul32), X, X, X] [int32*4]; faster than multiple _mm_hadd_epi32
++            mul32 = _mm256_add_epi32(mul32, _mm256_srli_si256(mul32, 4));
++            mul32 = _mm256_add_epi32(mul32, _mm256_srli_si256(mul32, 8));
++	    // prefetch 2 cacheline ahead
++            _mm_prefetch(&buf[i + 160], _MM_HINT_T0);
++
++            // s2 += 28*t1[0] + 24*t1[1] + 20*t1[2] + 16*t1[3] + 12*t1[4] + 8*t1[5] + 4*t1[6]
++	    __m128i mul32_hi = _mm256_extracti128_si256(mul32, 0x1);
++            ss2 = _mm_add_epi32(ss2, _mm256_castsi256_si128(mul32));
++            ss2 = _mm_add_epi32(ss2, mul32_hi);
++
++#if CHAR_OFFSET != 0
++            // s1 += 32*CHAR_OFFSET
++            __m128i char_offset_multiplier = _mm_set1_epi32(32 * CHAR_OFFSET);
++            ss1 = _mm_add_epi32(ss1, char_offset_multiplier);
++
++            // s2 += 528*CHAR_OFFSET
++            char_offset_multiplier = _mm_set1_epi32(528 * CHAR_OFFSET);
++            ss2 = _mm_add_epi32(ss2, char_offset_multiplier);
++#endif
++        }
++
++        _mm_store_si128((__m128i_u*)x, ss1);
++        *ps1 = x[0];
++        _mm_store_si128((__m128i_u*)x, ss2);
++        *ps2 = x[0];
++    }
++    return i;
++}
++
++#endif /* } !USE_ROLL_ASM */
+ 
+ static int32 get_checksum1_default_1(schar* buf, int32 len, int32 i, uint32* ps1, uint32* ps2)
+ {
+@@ -338,7 +461,11 @@ static inline uint32 get_checksum1_cpp(char *buf1, int32 len)
+     uint32 s2 = 0;
+ 
+     // multiples of 64 bytes using AVX2 (if available)
+-    i = get_checksum1_avx2((schar*)buf1, len, i, &s1, &s2);
++#ifdef USE_ROLL_ASM
++    i = get_checksum1_avx2_asm((schar*)buf1, len, i, &s1, &s2);
++#else
++    i = get_checksum1_avx2_64((schar*)buf1, len, i, &s1, &s2);
++#endif
+ 
+     // multiples of 32 bytes using SSSE3 (if available)
+     i = get_checksum1_ssse3_32((schar*)buf1, len, i, &s1, &s2);
+@@ -407,7 +534,11 @@ int main() {
+     benchmark("Raw-C", get_checksum1_default_1, (schar*)buf, BLOCK_LEN);
+     benchmark("SSE2", get_checksum1_sse2_32, (schar*)buf, BLOCK_LEN);
+     benchmark("SSSE3", get_checksum1_ssse3_32, (schar*)buf, BLOCK_LEN);
+-    benchmark("AVX2", get_checksum1_avx2, (schar*)buf, BLOCK_LEN);
++#ifdef USE_ROLL_ASM
++    benchmark("AVX2-ASM", get_checksum1_avx2_asm, (schar*)buf, BLOCK_LEN);
++#else
++    benchmark("AVX2", get_checksum1_avx2_64, (schar*)buf, BLOCK_LEN);
++#endif
+ 
+     free(buf);
+     return 0;
+@@ -417,6 +548,6 @@ int main() {
+ #pragma clang optimize on
+ #endif /* BENCHMARK_SIMD_CHECKSUM1 */
+ 
+-#endif /* HAVE_SIMD */
+-#endif /* __cplusplus */
+-#endif /* __x86_64__ */
++#endif /* } USE_ROLL_SIMD */
++#endif /* } __cplusplus */
++#endif /* } __x86_64__ */
+
diff --git a/0001-bugfix-test-md5-check-failure-2.patch b/0001-bugfix-test-md5-check-failure-2.patch
new file mode 100644
index 0000000000000000000000000000000000000000..faf9321376ce208f261291a7433198831b481a78
--- /dev/null
+++ b/0001-bugfix-test-md5-check-failure-2.patch
@@ -0,0 +1,258 @@
+From b81a5095563776397a4239132d2b737a1083e02f Mon Sep 17 00:00:00 2001
+From: Wayne Davison <wayne@opencoder.net>
+Date: Thu, 3 Mar 2022 17:00:57 -0800
+Subject: [PATCH] Make asm use more selectable
+
+- Make the SIMD ASM code off by default. Use configure --enable-simd-asm
+  to enable.
+- Allow MD5 ASM code to be requested even when OpenSSL is handling MD4
+  checksums. Use configure --enable-md5-asm to enable.
+---
+ NEWS.md      | 23 +++++++++----
+ configure.ac | 96 ++++++++++++++++++++++++++++++++--------------------
+ usage.c      | 12 ++++---
+ 3 files changed, 84 insertions(+), 47 deletions(-)
+
+diff --git a/NEWS.md b/NEWS.md
+index 3083ca3..ed19449 100644
+--- a/NEWS.md
++++ b/NEWS.md
+@@ -136,8 +136,9 @@
+    (keeping the behavior the same as before), so specifying `--info=nonreg0`
+    can be used to turn the warnings off.
+ 
+- - More ASM optimizations from Shark64.
+-
++ - An optional asm optimization for the rolling checksum from Shark64. Enable
++   it with `./configure --enable-roll-asm`.
++	
+  - Transformed rrsync into a python script with improvements:
+    - Security has been beefed up.
+    - The known rsync options were updated to include recent additions.
+@@ -189,14 +190,24 @@
+    using the output of `git describe` when building inside a non-shallow git
+    checkout, though.)
+ 
+- - Improved the IPv6 determination in configure.
++ - Renamed configure's `--enable-simd` option to `--enable-roll-simd` and added
++   the option `--enable-roll-asm` to use the new asm version of the code.  Both
++   are x86_64/amd64 only.
++
++ - Renamed configure's `--enable-asm` option to `--enable-md5-asm` to avoid
++   confusion with the asm option for the rolling checksum.  It is also honored
++   even when openssl crypto is in use.  This allows: normal MD4 & MD5, normal
++   MD4 + asm MD5, openssl MD4 & MD5, or openssl MD4 + asm MD5.
+ 
+- - Made SIMD & ASM configure default to "no" on non-Linux hosts due to various
+-   reports of problems on NetBSD & macOS hosts.  These tests were also tweaked
+-   to allow enabling the feature on a host_cpu of amd64 (was only x86_64).
++ - Made SIMD & asm configure checks default to "no" on non-Linux hosts due to
++   various reports of problems on NetBSD & macOS hosts.  These were also
++   tweaked to allow enabling the feature on a host_cpu of amd64 (was only
++   allowed on x86_64 before).
+ 
+  - Fixed configure to not fail at the SIMD check when cross-compiling.
+ 
++ - Improved the IPv6 determination in configure.
++
+  - Compile the C files with `-pedantic-errors` (when possible) so that we will
+    get warned if a static initialization overflows in the future (among other
+    things).
+diff --git a/configure.ac b/configure.ac
+index 7031283..1dd3e8e 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -229,12 +229,13 @@ fi
+ AC_DEFINE_UNQUOTED(NOBODY_USER, "$NOBODY_USER", [unprivileged user--e.g. nobody])
+ AC_DEFINE_UNQUOTED(NOBODY_GROUP, "$NOBODY_GROUP", [unprivileged group for unprivileged user])
+ 
+-# SIMD optimizations
+-SIMD=
++# rolling-checksum SIMD optimizations
++ROLL_SIMD=
+ 
+-AC_MSG_CHECKING([whether to enable SIMD optimizations])
+-AC_ARG_ENABLE(simd,
+-    AS_HELP_STRING([--enable-simd],[enable/disable to control SIMD optimizations (requires c++)]))
++AC_MSG_CHECKING([whether to enable rolling-checksum SIMD optimizations])
++AC_ARG_ENABLE(roll-simd,
+++    AS_HELP_STRING([--enable-roll-simd],[enable/disable to control rolling-checksum SIMD optimizations (requires c++)]))
++ 
+ 
+ # Clag is crashing with -g -O2, so we'll get rid of -g for now.
+ CXXFLAGS=`echo "$CXXFLAGS" | sed 's/-g //'`
+@@ -263,14 +264,14 @@ __attribute__ ((target("ssse3"))) void more_testing(char* buf, int len)
+ }
+ ]])
+ 
+-if test x"$enable_simd" = x""; then
++if test x"$enable_roll_simd" = x""; then
+     case "$host_os" in
+ 	*linux*) ;;
+-	*) enable_simd=no ;;
+-    esac
++	*) enable_roll_simd=no ;;
++    esac
+ fi
+ 
+-if test x"$enable_simd" != x"no"; then
++if test x"$enable_roll_simd" != x"no"; then
+     # For x86-64 SIMD, g++ >=5 or clang++ >=7 is required
+     if test x"$host_cpu" = x"x86_64" || test x"$host_cpu" = x"amd64"; then
+ 	AC_LANG(C++)
+@@ -283,23 +284,23 @@ if test x"$enable_simd" != x"no"; then
+ 	AC_LANG(C)
+ 	if test x"$CXX_OK" = x"yes"; then
+ 	    # AC_MSG_RESULT() is called below.
+-	    SIMD="$host_cpu"
+-	elif test x"$enable_simd" = x"yes"; then
++	    ROLL_SIMD="$host_cpu"
++	elif test x"$enable_roll_simd" = x"yes"; then
+ 	    AC_MSG_RESULT(error)
+-	    AC_MSG_ERROR(The SIMD compilation test failed.
+-Omit --enable-simd to continue without it.)
++	AC_MSG_ERROR(The rolling-checksum SIMD compilation test failed.
++Omit --enable-roll-simd to continue without it.)
+ 	fi
+-    elif test x"$enable_simd" = x"yes"; then
++    elif test x"$enable_roll_simd" = x"yes"; then
+         AC_MSG_RESULT(unavailable)
+-        AC_MSG_ERROR(The SIMD optimizations are currently x86_64|amd64 only.
+-Omit --enable-simd to continue without it.)
++        AC_MSG_ERROR(The rolling-checksum SIMD optimizations are currently x86_64|amd64 only.
++Omit --enable-roll-simd to continue without it.)
+     fi
+ fi
+ 
+-if test x"$SIMD" != x""; then
+-    AC_MSG_RESULT([yes ($SIMD)])
+-    AC_DEFINE(HAVE_SIMD, 1, [Define to 1 to enable SIMD optimizations])
+-    SIMD='$(SIMD_'"$SIMD)"
++if test x"$ROLL_SIMD" != x""; then
++    AC_MSG_RESULT([yes ($ROLL_SIMD)])
++    AC_DEFINE(USE_ROLL_SIMD, 1, [Define to 1 to enable rolling-checksum SIMD optimizations])
++    ROLL_SIMD='$(ROLL_SIMD_'"$ROLL_SIMD)"
+     # We only use c++ for its target attribute dispatching, disable unneeded bulky features
+     CXXFLAGS="$CXXFLAGS -fno-exceptions -fno-rtti"
+     # Apple often has "g++" as a symlink for clang. Try to find out the truth.
+@@ -311,7 +312,7 @@ else
+     AC_MSG_RESULT(no)
+ fi
+ 
+-AC_SUBST(SIMD)
++AC_SUBST(ROLL_SIMD)
+ 
+ AC_MSG_CHECKING([if assembler accepts noexecstack])
+ OLD_CFLAGS="$CFLAGS"
+@@ -322,38 +323,59 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ ]], [[return 0;]])],
+ CFLAGS="$OLD_CFLAGS"
+ AC_SUBST(NOEXECSTACK)
+ 
+-ASM=
+-
+-AC_MSG_CHECKING([whether to enable ASM optimizations])
+-AC_ARG_ENABLE(asm,
+-    AS_HELP_STRING([--enable-asm],[enable/disable to control ASM optimizations]))
++MD5_ASM=
+ 
+-if test x"$enable_asm" = x""; then
++AC_MSG_CHECKING([whether to enable MD5 ASM optimizations])
++AC_ARG_ENABLE(md5-asm,
++    AS_HELP_STRING([--enable-md5-asm],[enable/disable to control MD5 ASM optimizations]))
++ 
++if test x"$enable_md5_asm" = x""; then
+     case "$host_os" in
+ 	*linux*) ;;
+-	*) enable_asm=no ;;
++        *) enable_md5_asm=no ;;
+     esac
+ fi
+ 
+-if test x"$enable_asm" != x"no"; then
++if test x"$enable_md5_asm" != x"no"; then
+     if test x"$host_cpu" = x"x86_64" || test x"$host_cpu" = x"amd64"; then
+-	ASM="$host_cpu"
+-    elif test x"$enable_asm" = x"yes"; then
++        MD5_ASM="$host_cpu"
++    elif test x"$enable_md5_asm" = x"yes"; then
+         AC_MSG_RESULT(unavailable)
+         AC_MSG_ERROR(The ASM optimizations are currently x86_64|amd64 only.
+-Omit --enable-asm to continue without it.)
++Omit --enable-md5-asm to continue without it.)
+     fi
+ fi
+ 
+-if test x"$ASM" != x""; then
+-    AC_MSG_RESULT([yes ($ASM)])
+-    AC_DEFINE(HAVE_ASM, 1, [Define to 1 to enable ASM optimizations])
+-    ASM='$(ASM_'"$ASM)"
++if test x"$MD5_ASM" != x""; then
++    AC_MSG_RESULT([yes ($MD5_ASM)])
++    AC_DEFINE(USE_MD5_ASM, 1, [Define to 1 to enable MD5 ASM optimizations])
++    MD5_ASM='$(MD5_ASM_'"$MD5_ASM)"
++else
++    AC_MSG_RESULT(no)
++fi
++
++AC_SUBST(MD5_ASM)
++
++ROLL_ASM=
++
++AC_MSG_CHECKING([whether to enable rolling-checksum ASM optimizations])
++AC_ARG_ENABLE(roll-asm,
++    AS_HELP_STRING([--enable-roll-asm],[enable/disable to control rolling-checksum ASM optimizations (requires --enable-roll-simd)]))
++
++if test x"$ROLL_SIMD" = x""; then
++    enable_roll_asm=no
++fi
++
++if test x"$enable_roll_asm" = x"yes"; then
++    ROLL_ASM="$host_cpu"
++    AC_MSG_RESULT([yes ($ROLL_ASM)])
++    AC_DEFINE(USE_ROLL_ASM, 1, [Define to 1 to enable rolling-checksum ASM optimizations (requires --enable-roll-simd)])
++    ROLL_ASM='$(ROLL_ASM_'"$ROLL_ASM)"
+ else
+     AC_MSG_RESULT(no)
+ fi
+ 
+-AC_SUBST(ASM)
++AC_SUBST(ROLL_ASM)
+ 
+ # arrgh. libc in some old debian version screwed up the largefile
+ # stuff, getting byte range locking wrong
+diff --git a/usage.c b/usage.c
+index db13535..e710d84 100644
+--- a/usage.c
++++ b/usage.c
+@@ -139,20 +139,24 @@ static void print_info_flags(enum logcode f)
+ 
+ 	"*Optimizations",
+ 
+-#ifndef HAVE_SIMD
++#ifndef USE_ROLL_SIMD
+ 		"no "
+ #endif
+-			"SIMD",
++			"SIMD-roll",
+ 
+-#ifndef HAVE_ASM
++#ifndef USE_ROLL_ASM
+ 		"no "
+ #endif
+-			"asm",
++			"asm-roll",
+ 
+ #ifndef USE_OPENSSL
+ 		"no "
+ #endif
+ 			"openssl-crypto",
++#ifndef USE_MD5_ASM
++		"no "
++#endif
++			"asm-MD5",
+ 
+ 		NULL
+ 	};
+-- 
+2.27.0
+
diff --git a/0002-rsync-3.2.2-runtests.patch b/0002-rsync-3.2.2-runtests.patch
new file mode 100644
index 0000000000000000000000000000000000000000..0f682e56c921819d34bced97c3a3d2bb7ba03a76
--- /dev/null
+++ b/0002-rsync-3.2.2-runtests.patch
@@ -0,0 +1,12 @@
+diff --git a/runtests.sh.old b/runtests.sh
+index ecb383e..1cd1d1a 100755
+--- a/runtests.sh.old
++++ b/runtests.sh
+@@ -276,6 +276,7 @@ do
+ 
+     case "$testscript" in
+     *hardlinks*) TESTRUN_TIMEOUT=600 ;;
++    *default-acls*) continue ;;
+     *) TESTRUN_TIMEOUT=300 ;;
+     esac
+ 
diff --git a/0003-rsync-3.2.4-hello-test.patch b/0003-rsync-3.2.4-hello-test.patch
new file mode 100644
index 0000000000000000000000000000000000000000..e6bb4b727f7986d39505169cd9e4c08a53bd2fd7
--- /dev/null
+++ b/0003-rsync-3.2.4-hello-test.patch
@@ -0,0 +1,31 @@
+diff --git a/testsuite/00-hello.test b/testsuite/00-hello.test
+index a359753..ec0279a 100644
+--- a/testsuite/00-hello.test
++++ b/testsuite/00-hello.test
+@@ -29,7 +29,7 @@ append_line test1
+ checkit "$RSYNC -ai '$fromdir/' '$todir/'" "$fromdir" "$todir"
+ 
+ copy_weird() {
+-    checkit "$RSYNC $1 \"$2$fromdir/$weird_name/\" \"$3$todir/$weird_name\"" "$fromdir" "$todir"
++	checkit "$RSYNC $1 --rsync-path='$RSYNC' '$2$fromdir/$weird_name/' '$3$todir/$weird_name'" "$fromdir" "$todir"
+ }
+ 
+ append_line test2
+@@ -47,7 +47,7 @@ copy_weird '-ais' '' 'lh:'
+ echo test6
+ 
+ touch "$fromdir/one" "$fromdir/two"
+-(cd "$fromdir" && $RSYNC -ai --old-args lh:'one two' "$todir/")
++(cd "$fromdir" && $RSYNC -ai --old-args --rsync-path="$RSYNC" lh:'one two' "$todir/")
+ if [ ! -f "$todir/one" ] || [ ! -f "$todir/two" ]; then
+     test_fail "old-args copy of 'one two' failed"
+ fi
+@@ -55,7 +55,7 @@ fi
+ echo test7
+ 
+ rm "$todir/one" "$todir/two"
+-(cd "$fromdir" && RSYNC_OLD_ARGS=1 $RSYNC -ai lh:'one two' "$todir/")
++(cd "$fromdir" && RSYNC_OLD_ARGS=1 $RSYNC -ai --rsync-path="$RSYNC" lh:'one two' "$todir/")
+ 
+ # The script would have aborted on error, so getting here means we've won.
+ exit 0
diff --git a/0004-cve-2018-25032.patch b/0004-cve-2018-25032.patch
new file mode 100644
index 0000000000000000000000000000000000000000..6e558996110c2730474957fb2df960292618f7ae
--- /dev/null
+++ b/0004-cve-2018-25032.patch
@@ -0,0 +1,343 @@
+From 5c44459c3b28a9bd3283aaceab7c615f8020c531 Mon Sep 17 00:00:00 2001
+From: Mark Adler <madler@alumni.caltech.edu>
+Date: Tue, 17 Apr 2018 22:09:22 -0700
+Subject: [PATCH] Fix a bug that can crash deflate on some input when using
+ Z_FIXED.
+ 
+This bug was reported by Danilo Ramos of Eideticom, Inc. It has
+lain in wait 13 years before being found! The bug was introduced
+in zlib 1.2.2.2, with the addition of the Z_FIXED option. That
+option forces the use of fixed Huffman codes. For rare inputs with
+a large number of distant matches, the pending buffer into which
+the compressed data is written can overwrite the distance symbol
+table which it overlays. That results in corrupted output due to
+invalid distances, and can result in out-of-bound accesses,
+crashing the application.
+ 
+The fix here combines the distance buffer and literal/length
+buffers into a single symbol buffer. Now three bytes of pending
+buffer space are opened up for each literal or length/distance
+pair consumed, instead of the previous two bytes. This assures
+that the pending buffer cannot overwrite the symbol table, since
+the maximum fixed code compressed length/distance is 31 bits, and
+since there are four bytes of pending space for every three bytes
+of symbol space.
+---
+ deflate.c | 74 ++++++++++++++++++++++++++++++++++++++++---------------
+ deflate.h | 25 +++++++++----------
+ trees.c   | 50 +++++++++++--------------------------
+ 3 files changed, 79 insertions(+), 70 deletions(-)
+ 
+diff --git a/zlib/deflate.c b/zlib/deflate.c
+index 425babc00..19cba873a 100644
+--- a/zlib/deflate.c
++++ b/zlib/deflate.c
+@@ -255,11 +255,6 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
+     int wrap = 1;
+     static const char my_version[] = ZLIB_VERSION;
+ 
+-    ushf *overlay;
+-    /* We overlay pending_buf and d_buf+l_buf. This works since the average
+-     * output size for (length,distance) codes is <= 24 bits.
+-     */
+-
+     if (version == Z_NULL || version[0] != my_version[0] ||
+         stream_size != sizeof(z_stream)) {
+         return Z_VERSION_ERROR;
+@@ -329,9 +324,47 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
+ 
+     s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */
+ 
+-    overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2);
+-    s->pending_buf = (uchf *) overlay;
+-    s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L);
++    /* We overlay pending_buf and sym_buf. This works since the average size
++     * for length/distance pairs over any compressed block is assured to be 31
++     * bits or less.
++     *
++     * Analysis: The longest fixed codes are a length code of 8 bits plus 5
++     * extra bits, for lengths 131 to 257. The longest fixed distance codes are
++     * 5 bits plus 13 extra bits, for distances 16385 to 32768. The longest
++     * possible fixed-codes length/distance pair is then 31 bits total.
++     *
++     * sym_buf starts one-fourth of the way into pending_buf. So there are
++     * three bytes in sym_buf for every four bytes in pending_buf. Each symbol
++     * in sym_buf is three bytes -- two for the distance and one for the
++     * literal/length. As each symbol is consumed, the pointer to the next
++     * sym_buf value to read moves forward three bytes. From that symbol, up to
++     * 31 bits are written to pending_buf. The closest the written pending_buf
++     * bits gets to the next sym_buf symbol to read is just before the last
++     * code is written. At that time, 31*(n-2) bits have been written, just
++     * after 24*(n-2) bits have been consumed from sym_buf. sym_buf starts at
++     * 8*n bits into pending_buf. (Note that the symbol buffer fills when n-1
++     * symbols are written.) The closest the writing gets to what is unread is
++     * then n+14 bits. Here n is lit_bufsize, which is 16384 by default, and
++     * can range from 128 to 32768.
++     *
++     * Therefore, at a minimum, there are 142 bits of space between what is
++     * written and what is read in the overlain buffers, so the symbols cannot
++     * be overwritten by the compressed data. That space is actually 139 bits,
++     * due to the three-bit fixed-code block header.
++     *
++     * That covers the case where either Z_FIXED is specified, forcing fixed
++     * codes, or when the use of fixed codes is chosen, because that choice
++     * results in a smaller compressed block than dynamic codes. That latter
++     * condition then assures that the above analysis also covers all dynamic
++     * blocks. A dynamic-code block will only be chosen to be emitted if it has
++     * fewer bits than a fixed-code block would for the same set of symbols.
++     * Therefore its average symbol length is assured to be less than 31. So
++     * the compressed data for a dynamic block also cannot overwrite the
++     * symbols from which it is being constructed.
++     */
++
++    s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, 4);
++    s->pending_buf_size = (ulg)s->lit_bufsize * 4;
+ 
+     if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL ||
+         s->pending_buf == Z_NULL) {
+@@ -340,8 +373,12 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
+         deflateEnd (strm);
+         return Z_MEM_ERROR;
+     }
+-    s->d_buf = overlay + s->lit_bufsize/sizeof(ush);
+-    s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize;
++    s->sym_buf = s->pending_buf + s->lit_bufsize;
++    s->sym_end = (s->lit_bufsize - 1) * 3;
++    /* We avoid equality with lit_bufsize*3 because of wraparound at 64K
++     * on 16 bit machines and because stored blocks are restricted to
++     * 64K-1 bytes.
++     */
+ 
+     s->level = level;
+     s->strategy = strategy;
+@@ -552,7 +589,7 @@ int ZEXPORT deflatePrime (strm, bits, value)
+ 
+     if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+     s = strm->state;
+-    if ((Bytef *)(s->d_buf) < s->pending_out + ((Buf_size + 7) >> 3))
++    if (s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3))
+         return Z_BUF_ERROR;
+     do {
+         put = Buf_size - s->bi_valid;
+@@ -1113,7 +1150,6 @@ int ZEXPORT deflateCopy (dest, source)
+ #else
+     deflate_state *ds;
+     deflate_state *ss;
+-    ushf *overlay;
+ 
+ 
+     if (source == Z_NULL || dest == Z_NULL || source->state == Z_NULL) {
+@@ -1133,8 +1169,7 @@ int ZEXPORT deflateCopy (dest, source)
+     ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte));
+     ds->prev   = (Posf *)  ZALLOC(dest, ds->w_size, sizeof(Pos));
+     ds->head   = (Posf *)  ZALLOC(dest, ds->hash_size, sizeof(Pos));
+-    overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2);
+-    ds->pending_buf = (uchf *) overlay;
++    ds->pending_buf = (uchf *) ZALLOC(dest, ds->lit_bufsize, 4);
+ 
+     if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL ||
+         ds->pending_buf == Z_NULL) {
+@@ -1148,8 +1183,7 @@ int ZEXPORT deflateCopy (dest, source)
+     zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size);
+ 
+     ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
+-    ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush);
+-    ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize;
++    ds->sym_buf = ds->pending_buf + ds->lit_bufsize;
+ 
+     ds->l_desc.dyn_tree = ds->dyn_ltree;
+     ds->d_desc.dyn_tree = ds->dyn_dtree;
+@@ -1771,7 +1771,7 @@ local block_state deflate_fast(s, flush)
+         FLUSH_BLOCK(s, 1);
+         return finish_done;
+     }
+-    if (s->last_lit)
++    if (s->sym_next)
+         FLUSH_BLOCK(s, 0);
+     return block_done;
+ }
+@@ -1912,7 +1912,7 @@ local block_state deflate_slow(s, flush)
+         FLUSH_BLOCK(s, 1);
+         return finish_done;
+     }
+-    if (s->last_lit)
++    if (s->sym_next)
+         FLUSH_BLOCK(s, 0);
+     return block_done;
+ }
+@@ -1987,7 +1987,7 @@ local block_state deflate_rle(s, flush)
+         FLUSH_BLOCK(s, 1);
+         return finish_done;
+     }
+-    if (s->last_lit)
++    if (s->sym_next)
+         FLUSH_BLOCK(s, 0);
+     return block_done;
+ }
+@@ -2026,7 +2026,7 @@ local block_state deflate_huff(s, flush)
+         FLUSH_BLOCK(s, 1);
+         return finish_done;
+     }
+-    if (s->last_lit)
++    if (s->sym_next)
+         FLUSH_BLOCK(s, 0);
+     return block_done;
+ }
+diff --git a/zlib/deflate.h b/zlib/deflate.h
+index 23ecdd312..d4cf1a98b 100644
+--- a/zlib/deflate.h
++++ b/zlib/deflate.h
+@@ -217,7 +217,7 @@ typedef struct internal_state {
+     /* Depth of each subtree used as tie breaker for trees of equal frequency
+      */
+ 
+-    uchf *l_buf;          /* buffer for literals or lengths */
++    uchf *sym_buf;        /* buffer for distances and literals/lengths */
+ 
+     uInt  lit_bufsize;
+     /* Size of match buffer for literals/lengths.  There are 4 reasons for
+@@ -239,13 +239,8 @@ typedef struct internal_state {
+      *   - I can't count above 4
+      */
+ 
+-    uInt last_lit;      /* running index in l_buf */
+-
+-    ushf *d_buf;
+-    /* Buffer for distances. To simplify the code, d_buf and l_buf have
+-     * the same number of elements. To use different lengths, an extra flag
+-     * array would be necessary.
+-     */
++    uInt sym_next;      /* running index in sym_buf */
++    uInt sym_end;       /* symbol table full when sym_next reaches this */
+ 
+     ulg opt_len;        /* bit length of current block with optimal trees */
+     ulg static_len;     /* bit length of current block with static trees */
+@@ -317,20 +317,22 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf,
+ 
+ # define _tr_tally_lit(s, c, flush) \
+   { uch cc = (c); \
+-    s->d_buf[s->last_lit] = 0; \
+-    s->l_buf[s->last_lit++] = cc; \
++    s->sym_buf[s->sym_next++] = 0; \
++    s->sym_buf[s->sym_next++] = 0; \
++    s->sym_buf[s->sym_next++] = cc; \
+     s->dyn_ltree[cc].Freq++; \
+-    flush = (s->last_lit == s->lit_bufsize-1); \
++    flush = (s->sym_next == s->sym_end); \
+    }
+ # define _tr_tally_dist(s, distance, length, flush) \
+   { uch len = (length); \
+     ush dist = (distance); \
+-    s->d_buf[s->last_lit] = dist; \
+-    s->l_buf[s->last_lit++] = len; \
++    s->sym_buf[s->sym_next++] = dist; \
++    s->sym_buf[s->sym_next++] = dist >> 8; \
++    s->sym_buf[s->sym_next++] = len; \
+     dist--; \
+     s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
+     s->dyn_dtree[d_code(dist)].Freq++; \
+-    flush = (s->last_lit == s->lit_bufsize-1); \
++    flush = (s->sym_next == s->sym_end); \
+   }
+ #else
+ # define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)
+diff --git a/zlib/trees.c b/zlib/trees.c
+index 4f4a65011..decaeb7c3 100644
+--- a/zlib/trees.c
++++ b/zlib/trees.c
+@@ -416,7 +416,7 @@ local void init_block(s)
+ 
+     s->dyn_ltree[END_BLOCK].Freq = 1;
+     s->opt_len = s->static_len = 0L;
+-    s->last_lit = s->matches = 0;
++    s->sym_next = s->matches = 0;
+ }
+ 
+ #define SMALLEST 1
+@@ -948,7 +948,7 @@ void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last)
+ 
+         Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ",
+                 opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len,
+-                s->last_lit));
++                s->sym_next / 3));
+ 
+         if (static_lenb <= opt_lenb) opt_lenb = static_lenb;
+ 
+@@ -1017,8 +1017,9 @@ int ZLIB_INTERNAL _tr_tally (s, dist, lc)
+     unsigned dist;  /* distance of matched string */
+     unsigned lc;    /* match length-MIN_MATCH or unmatched char (if dist==0) */
+ {
+-    s->d_buf[s->last_lit] = (ush)dist;
+-    s->l_buf[s->last_lit++] = (uch)lc;
++    s->sym_buf[s->sym_next++] = dist;
++    s->sym_buf[s->sym_next++] = dist >> 8;
++    s->sym_buf[s->sym_next++] = lc;
+     if (dist == 0) {
+         /* lc is the unmatched char */
+         s->dyn_ltree[lc].Freq++;
+@@ -1033,30 +1034,7 @@ int ZLIB_INTERNAL _tr_tally (s, dist, lc)
+         s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++;
+         s->dyn_dtree[d_code(dist)].Freq++;
+     }
+-
+-#ifdef TRUNCATE_BLOCK
+-    /* Try to guess if it is profitable to stop the current block here */
+-    if ((s->last_lit & 0x1fff) == 0 && s->level > 2) {
+-        /* Compute an upper bound for the compressed length */
+-        ulg out_length = (ulg)s->last_lit*8L;
+-        ulg in_length = (ulg)((long)s->strstart - s->block_start);
+-        int dcode;
+-        for (dcode = 0; dcode < D_CODES; dcode++) {
+-            out_length += (ulg)s->dyn_dtree[dcode].Freq *
+-                (5L+extra_dbits[dcode]);
+-        }
+-        out_length >>= 3;
+-        Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ",
+-               s->last_lit, in_length, out_length,
+-               100L - out_length*100L/in_length));
+-        if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1;
+-    }
+-#endif
+-    return (s->last_lit == s->lit_bufsize-1);
+-    /* We avoid equality with lit_bufsize because of wraparound at 64K
+-     * on 16 bit machines and because stored blocks are restricted to
+-     * 64K-1 bytes.
+-     */
++    return (s->sym_next == s->sym_end);
+ }
+ 
+ /* ===========================================================================
+@@ -1069,13 +1047,14 @@ local void compress_block(s, ltree, dtree)
+ {
+     unsigned dist;      /* distance of matched string */
+     int lc;             /* match length or unmatched char (if dist == 0) */
+-    unsigned lx = 0;    /* running index in l_buf */
++    unsigned sx = 0;    /* running index in sym_buf */
+     unsigned code;      /* the code to send */
+     int extra;          /* number of extra bits to send */
+ 
+-    if (s->last_lit != 0) do {
+-        dist = s->d_buf[lx];
+-        lc = s->l_buf[lx++];
++    if (s->sym_next != 0) do {
++        dist = s->sym_buf[sx++] & 0xff;
++        dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8;
++        lc = s->sym_buf[sx++];
+         if (dist == 0) {
+             send_code(s, lc, ltree); /* send a literal byte */
+             Tracecv(isgraph(lc), (stderr," '%c' ", lc));
+@@ -1100,11 +1079,10 @@ local void compress_block(s, ltree, dtree)
+             }
+         } /* literal or match pair ? */
+ 
+-        /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */
+-        Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx,
+-               "pendingBuf overflow");
++        /* Check that the overlay between pending_buf and sym_buf is ok: */
++        Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow");
+ 
+-    } while (lx < s->last_lit);
++    } while (sx < s->sym_next);
+ 
+     send_code(s, END_BLOCK, ltree);
+ }
diff --git a/0005-restart-daemon-on-failure.patch b/0005-restart-daemon-on-failure.patch
new file mode 100644
index 0000000000000000000000000000000000000000..0afba61f7f7db42122b7bf68950932fb2d5f0c8c
--- /dev/null
+++ b/0005-restart-daemon-on-failure.patch
@@ -0,0 +1,27 @@
+From d41bb98c09bf0b999c4eee4e2125c7e5d0747ec4 Mon Sep 17 00:00:00 2001
+From: Simon Deziel <simon.deziel@gmail.com>
+Date: Mon, 11 Apr 2022 12:08:11 -0400
+Subject: [PATCH] systemd: restart daemon on-failure (#302)
+
+man 5 systemd.service:
+> Setting this to on-failure is the recommended choice for long-running services
+
+Partial fix for https://bugzilla.samba.org/show_bug.cgi?id=13463
+
+Signed-off-by: Simon Deziel <simon@sdeziel.info>
+---
+ packaging/systemd/rsync.service | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/packaging/systemd/rsync.service b/packaging/systemd/rsync.service
+index 8a0b5820..8a867ca6 100644
+--- a/packaging/systemd/rsync.service
++++ b/packaging/systemd/rsync.service
+@@ -7,6 +7,7 @@ Documentation=man:rsync(1) man:rsyncd.conf(5)
+ [Service]
+ ExecStart=/usr/bin/rsync --daemon --no-detach
+ RestartSec=1
++Restart=on-failure
+ 
+ # Citing README.md:
+ #
diff --git a/rsync-patches-3.2.4pre3.tar.gz b/rsync-patches-3.2.4pre3.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..9991ca62145feabb05bcc18a9578d1e6363c1f46
Binary files /dev/null and b/rsync-patches-3.2.4pre3.tar.gz differ
diff --git a/rsync.spec b/rsync.spec
new file mode 100644
index 0000000000000000000000000000000000000000..b75c124b1fdf52ca7f26efe9a300410fcbd08a6d
--- /dev/null
+++ b/rsync.spec
@@ -0,0 +1,117 @@
+%define anolis_release 1
+%define pre_release pre3
+%define version_num 3.2.4
+
+Name:		rsync
+Version:	%{version_num}~%{pre_release}
+Release:	%{anolis_release}%{?dist}
+Summary:	A program for synchronizing files over a network
+
+License:	GPLv3+
+URL:		https://github.com/WayneD/rsync
+Source0:	https://github.com/WayneD/rsync/archive/refs/tags/v%{version_num}%{pre_release}.tar.gz
+Source1:	https://github.com/WayneD/rsync/archive/refs/tags/%{name}-patches-%{version_num}%{pre_release}.tar.gz
+Source2: 	rsyncd.socket
+Source3: 	rsyncd.service
+Source4: 	rsyncd.conf
+Source5: 	rsyncd.sysconfig
+Source6: 	rsyncd@.service
+
+BuildRequires: 	make vim
+BuildRequires: 	gcc
+BuildRequires: 	gcc-c++
+BuildRequires: 	libacl-devel
+BuildRequires: 	libattr-devel
+BuildRequires: 	autoconf automake
+BuildRequires: 	popt-devel
+BuildRequires: 	systemd
+BuildRequires: 	lz4-devel
+BuildRequires: 	openssl-devel libtool-ltdl
+BuildRequires: 	libzstd-devel
+BuildRequires: 	xxhash-devel
+BuildRequires:	python3-cmarkgfm
+
+#needed to make hello test run correctly
+Patch0: 	0000-bugfix-test-md5-check-failure-1.patch
+Patch1:		0001-bugfix-test-md5-check-failure-2.patch
+Patch2: 	0002-rsync-3.2.2-runtests.patch
+Patch3: 	0003-rsync-3.2.4-hello-test.patch
+
+Patch4:		0004-cve-2018-25032.patch
+Patch5:		0005-restart-daemon-on-failure.patch
+
+%description
+Rsync uses a reliable algorithm to bring remote and host files into
+sync very quickly. Rsync is fast because it just sends the differences
+in the files over the network instead of sending the complete
+files. Rsync is often used as a very powerful mirroring process or
+just as a more capable replacement for the rcp command. A technical
+report which describes the rsync algorithm is included in this
+package.
+	
+%package 	daemon
+Summary: 	Service for anonymous access to rsync
+BuildArch: 	noarch
+Requires: 	%{name} = %{version}-%{release}
+%{?systemd_requires}
+
+%description 	daemon
+Rsync can be used to offer read only access to anonymous clients. This
+package provides the anonymous rsync service.
+
+%prep
+%autosetup -b 1 -n %{name}-%{version_num}%{pre_release} -p1
+
+#Enable --copy-devices parameter
+patch -p1 -i patches/copy-devices.diff
+
+%build
+%configure \
+  --enable-openssl \
+  --enable-xxhash \
+  --enable-zstd \
+  --enable-lz4 \
+  --enable-ipv6
+
+%make_build
+
+%install
+%make_install
+install -D -m644 %{SOURCE2} %{buildroot}/%{_unitdir}/rsyncd.socket
+install -D -m644 %{SOURCE3} %{buildroot}/%{_unitdir}/rsyncd.service
+install -D -m644 %{SOURCE4} %{buildroot}/%{_sysconfdir}/rsyncd.conf
+install -D -m644 %{SOURCE5} %{buildroot}/%{_sysconfdir}/sysconfig/rsyncd
+install -D -m644 %{SOURCE6} %{buildroot}/%{_unitdir}/rsyncd@.service
+
+%check
+make check
+chmod -x support/*
+
+%post daemon
+%systemd_post rsyncd.service
+ 
+%preun daemon
+%systemd_preun rsyncd.service
+ 
+%postun daemon
+%systemd_postun_with_restart rsyncd.service
+
+%files
+%license COPYING
+%doc support/ tech_report.tex
+%{_bindir}/%{name}
+%{_bindir}/%{name}-ssl
+%{_mandir}/man1/%{name}.1*
+%{_mandir}/man1/%{name}-ssl.1*
+%{_mandir}/man5/rsyncd.conf.5*
+%config(noreplace) %{_sysconfdir}/rsyncd.conf
+
+%files daemon
+%config(noreplace) %{_sysconfdir}/sysconfig/rsyncd
+%{_unitdir}/rsyncd.socket
+%{_unitdir}/rsyncd.service
+%{_unitdir}/rsyncd@.service
+
+%changelog
+* Thu Apr 14 2022 happy_orange <songnannan@linux.alibaba.com> - 3.2.4~pre3-1
+- Init package from upstream
diff --git a/rsyncd.conf b/rsyncd.conf
new file mode 100644
index 0000000000000000000000000000000000000000..6e058aa2c6bcf142707adc8acd0f353dd9f08ba3
--- /dev/null
+++ b/rsyncd.conf
@@ -0,0 +1,20 @@
+# /etc/rsyncd: configuration file for rsync daemon mode
+
+# See rsyncd.conf man page for more options.
+
+# configuration example:
+
+# uid = nobody
+# gid = nobody
+# use chroot = yes
+# max connections = 4
+# pid file = /var/run/rsyncd.pid
+# exclude = lost+found/
+# transfer logging = yes
+# timeout = 900
+# ignore nonreadable = yes
+# dont compress   = *.gz *.tgz *.zip *.z *.Z *.rpm *.deb *.bz2
+
+# [ftp]
+#        path = /home/ftp
+#        comment = ftp export area
diff --git a/rsyncd.service b/rsyncd.service
new file mode 100644
index 0000000000000000000000000000000000000000..d2d6362e874a028bb4ef82df466b2b55a3ed2449
--- /dev/null
+++ b/rsyncd.service
@@ -0,0 +1,12 @@
+[Unit]
+Description=fast remote file copy program daemon
+ConditionPathExists=/etc/rsyncd.conf
+Wants=network-online.target
+After=network-online.target
+
+[Service]
+EnvironmentFile=/etc/sysconfig/rsyncd
+ExecStart=/usr/bin/rsync --daemon --no-detach "$OPTIONS"
+
+[Install]
+WantedBy=multi-user.target
diff --git a/rsyncd.socket b/rsyncd.socket
new file mode 100644
index 0000000000000000000000000000000000000000..7306ad0fae875c5bac08d007f0c5d2f9d557c36e
--- /dev/null
+++ b/rsyncd.socket
@@ -0,0 +1,10 @@
+[Unit]
+Description=Rsync Server Socket
+Conflicts=rsyncd.service
+
+[Socket]
+ListenStream=873
+Accept=yes
+
+[Install]
+WantedBy=sockets.target 
diff --git a/rsyncd.sysconfig b/rsyncd.sysconfig
new file mode 100644
index 0000000000000000000000000000000000000000..90a5a43d0dd6339257675f7af97a0fa21832d446
--- /dev/null
+++ b/rsyncd.sysconfig
@@ -0,0 +1 @@
+OPTIONS="" 
diff --git a/rsyncd@.service b/rsyncd@.service
new file mode 100644
index 0000000000000000000000000000000000000000..89f96214a40aa3964d4b0f37dfe1927de56bf70e
--- /dev/null
+++ b/rsyncd@.service
@@ -0,0 +1,8 @@
+[Unit]
+Description=fast remote file copy program daemon
+ConditionPathExists=/etc/rsyncd.conf
+
+[Service]
+EnvironmentFile=/etc/sysconfig/rsyncd
+ExecStart=/usr/bin/rsync --daemon --no-detach "$OPTIONS"
+StandardInput=socket
diff --git a/v3.2.4pre3.tar.gz b/v3.2.4pre3.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..41ff6344ff07166e9ce79d0953eb11e3229bf461
Binary files /dev/null and b/v3.2.4pre3.tar.gz differ